[get] print url option

This commit is contained in:
lapwat
2024-08-14 23:32:33 +02:00
parent 1b2be1c390
commit 403fdcc0f0
7 changed files with 115 additions and 54 deletions

View File

@@ -1,6 +1,7 @@
package book
type chapter struct {
url string
body string
name string
author string
@@ -10,11 +11,11 @@ type chapter struct {
}
func NewEmptyChapter() chapter {
return chapter{"", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
return chapter{"", "", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
}
func NewChapter(body, name, author, content string, subChapters []chapter, config *ScrapeConfig) chapter {
return chapter{body, name, author, content, subChapters, config}
func (c chapter) URL() string {
return c.url
}
func (c chapter) Body() string {

View File

@@ -30,6 +30,11 @@ func ToMarkdownString(c chapter) string {
markdown += fmt.Sprintf("%s\n", c.Name())
markdown += fmt.Sprintf("%s\n\n", strings.Repeat("=", len(c.Name())))
// url
if c.config.PrintURL {
markdown += fmt.Sprintf("_Source: %s_\n\n", c.URL())
}
// convert content to markdown
content, err := md.NewConverter("", true, nil).ConvertString(c.Content())
if err != nil {
@@ -72,7 +77,15 @@ func ToHtmlString(c chapter) string {
// chapter content
if c.config.Include {
html += fmt.Sprintf("<h1>%s</h1>", c.Name())
// title
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name())
// url
if c.config.PrintURL {
html += fmt.Sprintf("<p><i>Source: %s</i></p>\n", c.URL())
}
// content
html += c.Content()
}
@@ -114,19 +127,6 @@ func ToEpub(c chapter, filename string) string {
e := epub.NewEpub(c.Name())
e.SetAuthor(c.Author())
AppendToEpub(e, c)
err := e.Write(filename)
if err != nil {
log.Fatal(err)
}
return filename
}
func AppendToEpub(e *epub.Epub, c chapter) {
content := ""
// append table of content
if len(c.SubChapters()) > 1 {
html := "<h1>Table of Contents</h1>"
@@ -143,6 +143,19 @@ func AppendToEpub(e *epub.Epub, c chapter) {
}
}
AppendToEpub(e, c)
err := e.Write(filename)
if err != nil {
log.Fatal(err)
}
return filename
}
func AppendToEpub(e *epub.Epub, c chapter) {
content := ""
// chapter content
if c.config.Include {
@@ -173,8 +186,15 @@ func AppendToEpub(e *epub.Epub, c chapter) {
html := ""
// add title only if ImagesOnly = false
if c.config.ImagesOnly == false {
html += fmt.Sprintf("<h1>%s</h1>", c.Name())
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name())
}
// url
if c.config.PrintURL {
html += fmt.Sprintf("<p><i>Source: %s</i></p>\n", c.URL())
}
// content
html += content
// write to epub file

View File

@@ -19,7 +19,7 @@ func TestFilename(t *testing.T) {
func TestToMarkdownString(t *testing.T) {
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
got := ToMarkdownString(c)
want := "Example Domain\n==============\n\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\n\n[More information...](https://www.iana.org/domains/example)\n\n\n"
@@ -30,9 +30,25 @@ func TestToMarkdownString(t *testing.T) {
}
func TestToMarkdownPrintURL(t *testing.T) {
config := NewScrapeConfigQuiet()
config.PrintURL = true
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := ToMarkdownString(c)
want := "Example Domain\n==============\n\n_Source: https://example.com/_\n\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\n\n[More information...](https://www.iana.org/domains/example)\n\n\n"
if got != want {
t.Errorf("got %v, wanted %v", got, want)
}
}
func TestToMarkdown(t *testing.T) {
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToMarkdown(c, "")
filename := "Example_Domain.md"
@@ -49,7 +65,7 @@ func TestToMarkdown(t *testing.T) {
func TestToMarkdownFilename(t *testing.T) {
filename := "ebook.md"
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToMarkdown(c, filename)
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
@@ -64,10 +80,26 @@ func TestToMarkdownFilename(t *testing.T) {
func TestToHtmlString(t *testing.T) {
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
got := ToHtmlString(c)
want := "<h1>Example Domain</h1><div>\n \n <p>This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.</p>\n <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>\n</div>"
want := "<h1>Example Domain</h1>\n<div>\n \n <p>This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.</p>\n <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>\n</div>"
if got != want {
t.Errorf("got %q, wanted %q", got, want)
}
}
func TestToHtmlPrintURL(t *testing.T) {
config := NewScrapeConfigQuiet()
config.PrintURL = true
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := ToHtmlString(c)
want := "<h1>Example Domain</h1>\n<p><i>Source: https://example.com/</i></p>\n<div>\n \n <p>This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.</p>\n <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>\n</div>"
if got != want {
t.Errorf("got %q, wanted %q", got, want)
@@ -77,7 +109,7 @@ func TestToHtmlString(t *testing.T) {
func TestToHtml(t *testing.T) {
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToHtml(c, "")
filename := "Example_Domain.html"
@@ -94,7 +126,7 @@ func TestToHtml(t *testing.T) {
func TestToHtmlFilename(t *testing.T) {
filename := "ebook.html"
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToHtml(c, filename)
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
@@ -109,7 +141,7 @@ func TestToHtmlFilename(t *testing.T) {
func TestToEpub(t *testing.T) {
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToEpub(c, "")
filename := "Example_Domain.epub"
@@ -126,7 +158,7 @@ func TestToEpub(t *testing.T) {
func TestToEpubFilename(t *testing.T) {
filename := "ebook.epub"
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToEpub(c, filename)
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
@@ -141,7 +173,7 @@ func TestToEpubFilename(t *testing.T) {
func TestToMobi(t *testing.T) {
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToMobi(c, "")
filename := "Example_Domain.mobi"
@@ -158,7 +190,7 @@ func TestToMobi(t *testing.T) {
func TestToMobiFilename(t *testing.T) {
filename := "ebook.mobi"
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToMobi(c, filename)
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {

View File

@@ -30,14 +30,19 @@ type ScrapeConfig struct {
Include bool
ImagesOnly bool
UseLinkName bool
PrintURL bool
}
func NewScrapeConfig() *ScrapeConfig {
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false}
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false, false}
}
func NewScrapeConfigQuiet() *ScrapeConfig {
return &ScrapeConfig{0, "", true, -1, 0, false, -1, -1, true, false, false, false}
}
func NewScrapeConfigNoInclude() *ScrapeConfig {
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false}
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false, false}
}
func NewScrapeConfigs(selectors []string) []*ScrapeConfig {
@@ -252,7 +257,7 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int,
}
return chapter{string(body), name, article.Byline, content, subchapters, config}
return chapter{url, string(body), name, article.Byline, content, subchapters, config}
}
func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, quiet bool) ([]chapter, chapter) {

View File

@@ -7,7 +7,7 @@ import (
func TestBody(t *testing.T) {
config := NewScrapeConfig()
config := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := c.Body()
@@ -21,7 +21,7 @@ func TestBody(t *testing.T) {
func TestName(t *testing.T) {
config := NewScrapeConfig()
config := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := c.Name()
@@ -35,7 +35,7 @@ func TestName(t *testing.T) {
func TestCustomName(t *testing.T) {
config := NewScrapeConfig()
config := NewScrapeConfigQuiet()
config.UseLinkName = true
c := NewChapterFromURL("https://example.com/", "Custom Name", []*ScrapeConfig{config}, 0, func(index int, name string) {})
@@ -50,7 +50,7 @@ func TestCustomName(t *testing.T) {
func TestAuthor(t *testing.T) {
config := NewScrapeConfig()
config := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := c.Author()
@@ -64,7 +64,7 @@ func TestAuthor(t *testing.T) {
func TestContent(t *testing.T) {
config := NewScrapeConfig()
config := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := c.Content()
@@ -78,10 +78,10 @@ func TestContent(t *testing.T) {
func TestDelay(t *testing.T) {
config0 := NewScrapeConfig()
config0 := NewScrapeConfigQuiet()
config0.Delay = 500
config1 := NewScrapeConfig()
config1 := NewScrapeConfigQuiet()
start := time.Now()
NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -98,7 +98,7 @@ func TestDelay(t *testing.T) {
func TestContentImagesOnly(t *testing.T) {
config := NewScrapeConfig()
config := NewScrapeConfigQuiet()
config.ImagesOnly = true
c := NewChapterFromURL("https://12factor.net/codebase", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
@@ -114,8 +114,8 @@ func TestContentImagesOnly(t *testing.T) {
func TestSubChapters(t *testing.T) {
config0 := NewScrapeConfig()
config1 := NewScrapeConfig()
config0 := NewScrapeConfigQuiet()
config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -130,8 +130,8 @@ func TestSubChapters(t *testing.T) {
func TestSubChaptersRSS(t *testing.T) {
config0 := NewScrapeConfig()
config1 := NewScrapeConfig()
config0 := NewScrapeConfigQuiet()
config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://www.nginx.com/feed/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -146,10 +146,10 @@ func TestSubChaptersRSS(t *testing.T) {
func TestSubChaptersSelector(t *testing.T) {
config0 := NewScrapeConfig()
config0 := NewScrapeConfigQuiet()
config0.Selector = "body > aside > p > a"
config1 := NewScrapeConfig()
config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -164,10 +164,10 @@ func TestSubChaptersSelector(t *testing.T) {
func TestSubChaptersLimit(t *testing.T) {
config0 := NewScrapeConfig()
config0 := NewScrapeConfigQuiet()
config0.Limit = 1
config1 := NewScrapeConfig()
config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -182,10 +182,10 @@ func TestSubChaptersLimit(t *testing.T) {
func TestSubChaptersLimitOver(t *testing.T) {
config0 := NewScrapeConfig()
config0 := NewScrapeConfigQuiet()
config0.Limit = 15
config1 := NewScrapeConfig()
config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -200,10 +200,10 @@ func TestSubChaptersLimitOver(t *testing.T) {
func TestReverse(t *testing.T) {
config0 := NewScrapeConfig()
config0 := NewScrapeConfigQuiet()
config0.Reverse = true
config1 := NewScrapeConfig()
config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -218,7 +218,7 @@ func TestReverse(t *testing.T) {
func TestNotInclude(t *testing.T) {
config := NewScrapeConfig()
config := NewScrapeConfigQuiet()
config.Include = false
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})

View File

@@ -33,6 +33,7 @@ type GetOptions struct {
threads int
include bool
useLinkName bool
printURL bool
}
var getOpts *GetOptions
@@ -46,6 +47,7 @@ func init() {
getCmd.Flags().StringVarP(&getOpts.output, "output", "", "", "file name (default: book name)")
getCmd.Flags().BoolVarP(&getOpts.stdout, "stdout", "", false, "print to standard output")
getCmd.Flags().BoolVarP(&getOpts.images, "images", "", false, "retrieve images only")
getCmd.Flags().BoolVarP(&getOpts.printURL, "print-url", "", false, "print url after chapter title")
getCmd.Flags().BoolVarP(&getOpts.quiet, "quiet", "q", false, "hide progress bar")
// common with list command
@@ -147,6 +149,7 @@ var getCmd = &cobra.Command{
config.ImagesOnly = getOpts.images
config.Include = getOpts.include
config.UseLinkName = getOpts.useLinkName
config.PrintURL = getOpts.printURL
// do not use link name for root level as there is not parent link
if index == 0 {

View File

@@ -14,6 +14,6 @@ var versionCmd = &cobra.Command{
Use: "version",
Short: "Print the version number of papeer",
Run: func(cmd *cobra.Command, args []string) {
fmt.Println("papeer v0.8.1")
fmt.Println("papeer v0.8.2")
},
}