diff --git a/book/format.go b/book/format.go index 7cb6dd7..c02c840 100644 --- a/book/format.go +++ b/book/format.go @@ -32,7 +32,7 @@ func ToMarkdownString(c chapter) string { // url if c.config.PrintURL { - markdown += fmt.Sprintf("_Source: %s_\n\n", c.URL()) + markdown += fmt.Sprintf("_%s_\n\n", c.URL()) } // convert content to markdown @@ -82,7 +82,7 @@ func ToHtmlString(c chapter) string { // url if c.config.PrintURL { - html += fmt.Sprintf("

Source: %s

\n", c.URL()) + html += fmt.Sprintf("

%s

\n", c.URL()) } // content @@ -177,9 +177,9 @@ func AppendToEpub(e *epub.Epub, c chapter) { if c.config.ImagesOnly { imageTag, _ := goquery.OuterHtml(s) - content += strings.Replace(imageTag, src, imagePath, 1) + content += strings.ReplaceAll(imageTag, src, imagePath) } else { - content = strings.Replace(content, src, imagePath, 1) + content = strings.ReplaceAll(content, src, imagePath) } }) @@ -191,7 +191,7 @@ func AppendToEpub(e *epub.Epub, c chapter) { // url if c.config.PrintURL { - html += fmt.Sprintf("

Source: %s

\n", c.URL()) + html += fmt.Sprintf("

%s

\n", c.URL()) } // content diff --git a/book/format_test.go b/book/format_test.go index 847f2f4..077f9a3 100644 --- a/book/format_test.go +++ b/book/format_test.go @@ -38,7 +38,7 @@ func TestToMarkdownPrintURL(t *testing.T) { c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {}) got := ToMarkdownString(c) - want := "Example Domain\n==============\n\n_Source: https://example.com/_\n\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\n\n[More information...](https://www.iana.org/domains/example)\n\n\n" + want := "Example Domain\n==============\n\n_https://example.com/_\n\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\n\n[More information...](https://www.iana.org/domains/example)\n\n\n" if got != want { t.Errorf("got %v, wanted %v", got, want) @@ -99,7 +99,7 @@ func TestToHtmlPrintURL(t *testing.T) { c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {}) got := ToHtmlString(c) - want := "

Example Domain

\n

Source: https://example.com/

\n
\n \n

This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.

\n

More information...

\n
" + want := "

Example Domain

\n

https://example.com/

\n
\n \n

This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.

\n

More information...

\n
" if got != want { t.Errorf("got %q, wanted %q", got, want) diff --git a/book/scraper.go b/book/scraper.go index a75497f..2e2e8ea 100644 --- a/book/scraper.go +++ b/book/scraper.go @@ -242,7 +242,6 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int, content = "" doc.Find("img").Each(func(i int, s *goquery.Selection) { imageTag, _ := goquery.OuterHtml(s) - // imageTag = strings.ReplaceAll(imageTag, "\n", "") content += imageTag }) diff --git a/book/scraper_test.go b/book/scraper_test.go index 500948b..7e76ba9 100644 --- a/book/scraper_test.go +++ b/book/scraper_test.go @@ -117,10 +117,10 @@ func TestSubChapters(t *testing.T) { config0 := NewScrapeConfigQuiet() config1 := NewScrapeConfigQuiet() - c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {}) + c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {}) got := len(c.SubChapters()) - want := 14 + want := 21 if got != want { t.Errorf("got %v, wanted %v", got, want) @@ -133,10 +133,10 @@ func TestSubChaptersRSS(t *testing.T) { config0 := NewScrapeConfigQuiet() config1 := NewScrapeConfigQuiet() - c := NewChapterFromURL("https://www.nginx.com/feed/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {}) + c := NewChapterFromURL("https://blog.nginx.org/feed", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {}) got := len(c.SubChapters()) - want := 14 + want := 10 if got != want { t.Errorf("got %v, wanted %v", got, want) @@ -147,14 +147,14 @@ func TestSubChaptersRSS(t *testing.T) { func TestSubChaptersSelector(t *testing.T) { config0 := NewScrapeConfigQuiet() - config0.Selector = "body > aside > p > a" + config0.Selector = "section.concrete>article>h2>a" config1 := NewScrapeConfigQuiet() - c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {}) + c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {}) got := len(c.SubChapters()) - want := 14 + want := 12 if got != want { t.Errorf("got %v, wanted %v", got, want) @@ -165,11 +165,12 @@ func TestSubChaptersSelector(t *testing.T) { func TestSubChaptersLimit(t *testing.T) { config0 := NewScrapeConfigQuiet() + config0.Selector = "section.concrete>article>h2>a" config0.Limit = 1 config1 := NewScrapeConfigQuiet() - c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {}) + c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {}) got := len(c.SubChapters()) want := 1 @@ -183,14 +184,15 @@ func TestSubChaptersLimit(t *testing.T) { func TestSubChaptersLimitOver(t *testing.T) { config0 := NewScrapeConfigQuiet() + config0.Selector = "section.concrete>article>h2>a" config0.Limit = 15 config1 := NewScrapeConfigQuiet() - c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {}) + c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {}) got := len(c.SubChapters()) - want := 14 + want := 12 if got != want { t.Errorf("got %v, wanted %v", got, want) @@ -201,14 +203,15 @@ func TestSubChaptersLimitOver(t *testing.T) { func TestReverse(t *testing.T) { config0 := NewScrapeConfigQuiet() + config0.Selector = "section.concrete>article>h2>a" config0.Reverse = true config1 := NewScrapeConfigQuiet() - c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {}) + c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {}) - got := c.SubChapters()[0].Name() - want := "The W3C Markup Validation Service" + got := c.SubChapters()[0].URL() + want := "https://12factor.net/admin-processes" if got != want { t.Errorf("got %v, wanted %v", got, want) @@ -221,7 +224,7 @@ func TestNotInclude(t *testing.T) { config := NewScrapeConfigQuiet() config.Include = false - c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {}) + c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {}) got := c.Content() want := "" diff --git a/cmd/version.go b/cmd/version.go index cce9f1c..167663c 100644 --- a/cmd/version.go +++ b/cmd/version.go @@ -14,6 +14,6 @@ var versionCmd = &cobra.Command{ Use: "version", Short: "Print the version number of papeer", Run: func(cmd *cobra.Command, args []string) { - fmt.Println("papeer v0.8.2") + fmt.Println("papeer v0.8.3") }, }