fix tests, fix hidden images, remove source label under title

This commit is contained in:
q
2025-01-12 18:57:33 +01:00
parent 403fdcc0f0
commit 32168718c9
5 changed files with 25 additions and 23 deletions

View File

@@ -32,7 +32,7 @@ func ToMarkdownString(c chapter) string {
// url
if c.config.PrintURL {
markdown += fmt.Sprintf("_Source: %s_\n\n", c.URL())
markdown += fmt.Sprintf("_%s_\n\n", c.URL())
}
// convert content to markdown
@@ -82,7 +82,7 @@ func ToHtmlString(c chapter) string {
// url
if c.config.PrintURL {
html += fmt.Sprintf("<p><i>Source: %s</i></p>\n", c.URL())
html += fmt.Sprintf("<p><i>%s</i></p>\n", c.URL())
}
// content
@@ -177,9 +177,9 @@ func AppendToEpub(e *epub.Epub, c chapter) {
if c.config.ImagesOnly {
imageTag, _ := goquery.OuterHtml(s)
content += strings.Replace(imageTag, src, imagePath, 1)
content += strings.ReplaceAll(imageTag, src, imagePath)
} else {
content = strings.Replace(content, src, imagePath, 1)
content = strings.ReplaceAll(content, src, imagePath)
}
})
@@ -191,7 +191,7 @@ func AppendToEpub(e *epub.Epub, c chapter) {
// url
if c.config.PrintURL {
html += fmt.Sprintf("<p><i>Source: %s</i></p>\n", c.URL())
html += fmt.Sprintf("<p><i>%s</i></p>\n", c.URL())
}
// content

View File

@@ -38,7 +38,7 @@ func TestToMarkdownPrintURL(t *testing.T) {
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := ToMarkdownString(c)
want := "Example Domain\n==============\n\n_Source: https://example.com/_\n\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\n\n[More information...](https://www.iana.org/domains/example)\n\n\n"
want := "Example Domain\n==============\n\n_https://example.com/_\n\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\n\n[More information...](https://www.iana.org/domains/example)\n\n\n"
if got != want {
t.Errorf("got %v, wanted %v", got, want)
@@ -99,7 +99,7 @@ func TestToHtmlPrintURL(t *testing.T) {
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := ToHtmlString(c)
want := "<h1>Example Domain</h1>\n<p><i>Source: https://example.com/</i></p>\n<div>\n \n <p>This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.</p>\n <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>\n</div>"
want := "<h1>Example Domain</h1>\n<p><i>https://example.com/</i></p>\n<div>\n \n <p>This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.</p>\n <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>\n</div>"
if got != want {
t.Errorf("got %q, wanted %q", got, want)

View File

@@ -242,7 +242,6 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int,
content = ""
doc.Find("img").Each(func(i int, s *goquery.Selection) {
imageTag, _ := goquery.OuterHtml(s)
// imageTag = strings.ReplaceAll(imageTag, "\n", "")
content += imageTag
})

View File

@@ -117,10 +117,10 @@ func TestSubChapters(t *testing.T) {
config0 := NewScrapeConfigQuiet()
config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
got := len(c.SubChapters())
want := 14
want := 21
if got != want {
t.Errorf("got %v, wanted %v", got, want)
@@ -133,10 +133,10 @@ func TestSubChaptersRSS(t *testing.T) {
config0 := NewScrapeConfigQuiet()
config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://www.nginx.com/feed/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://blog.nginx.org/feed", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
got := len(c.SubChapters())
want := 14
want := 10
if got != want {
t.Errorf("got %v, wanted %v", got, want)
@@ -147,14 +147,14 @@ func TestSubChaptersRSS(t *testing.T) {
func TestSubChaptersSelector(t *testing.T) {
config0 := NewScrapeConfigQuiet()
config0.Selector = "body > aside > p > a"
config0.Selector = "section.concrete>article>h2>a"
config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
got := len(c.SubChapters())
want := 14
want := 12
if got != want {
t.Errorf("got %v, wanted %v", got, want)
@@ -165,11 +165,12 @@ func TestSubChaptersSelector(t *testing.T) {
func TestSubChaptersLimit(t *testing.T) {
config0 := NewScrapeConfigQuiet()
config0.Selector = "section.concrete>article>h2>a"
config0.Limit = 1
config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
got := len(c.SubChapters())
want := 1
@@ -183,14 +184,15 @@ func TestSubChaptersLimit(t *testing.T) {
func TestSubChaptersLimitOver(t *testing.T) {
config0 := NewScrapeConfigQuiet()
config0.Selector = "section.concrete>article>h2>a"
config0.Limit = 15
config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
got := len(c.SubChapters())
want := 14
want := 12
if got != want {
t.Errorf("got %v, wanted %v", got, want)
@@ -201,14 +203,15 @@ func TestSubChaptersLimitOver(t *testing.T) {
func TestReverse(t *testing.T) {
config0 := NewScrapeConfigQuiet()
config0.Selector = "section.concrete>article>h2>a"
config0.Reverse = true
config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
got := c.SubChapters()[0].Name()
want := "The W3C Markup Validation Service"
got := c.SubChapters()[0].URL()
want := "https://12factor.net/admin-processes"
if got != want {
t.Errorf("got %v, wanted %v", got, want)
@@ -221,7 +224,7 @@ func TestNotInclude(t *testing.T) {
config := NewScrapeConfigQuiet()
config.Include = false
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := c.Content()
want := ""

View File

@@ -14,6 +14,6 @@ var versionCmd = &cobra.Command{
Use: "version",
Short: "Print the version number of papeer",
Run: func(cmd *cobra.Command, args []string) {
fmt.Println("papeer v0.8.2")
fmt.Println("papeer v0.8.3")
},
}