mirror of
https://github.com/NohamR/papeer.git
synced 2026-05-24 20:00:45 +00:00
[get] print url option
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package book
|
||||
|
||||
type chapter struct {
|
||||
url string
|
||||
body string
|
||||
name string
|
||||
author string
|
||||
@@ -10,11 +11,11 @@ type chapter struct {
|
||||
}
|
||||
|
||||
func NewEmptyChapter() chapter {
|
||||
return chapter{"", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
|
||||
return chapter{"", "", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
|
||||
}
|
||||
|
||||
func NewChapter(body, name, author, content string, subChapters []chapter, config *ScrapeConfig) chapter {
|
||||
return chapter{body, name, author, content, subChapters, config}
|
||||
func (c chapter) URL() string {
|
||||
return c.url
|
||||
}
|
||||
|
||||
func (c chapter) Body() string {
|
||||
|
||||
@@ -30,6 +30,11 @@ func ToMarkdownString(c chapter) string {
|
||||
markdown += fmt.Sprintf("%s\n", c.Name())
|
||||
markdown += fmt.Sprintf("%s\n\n", strings.Repeat("=", len(c.Name())))
|
||||
|
||||
// url
|
||||
if c.config.PrintURL {
|
||||
markdown += fmt.Sprintf("_Source: %s_\n\n", c.URL())
|
||||
}
|
||||
|
||||
// convert content to markdown
|
||||
content, err := md.NewConverter("", true, nil).ConvertString(c.Content())
|
||||
if err != nil {
|
||||
@@ -72,7 +77,15 @@ func ToHtmlString(c chapter) string {
|
||||
|
||||
// chapter content
|
||||
if c.config.Include {
|
||||
html += fmt.Sprintf("<h1>%s</h1>", c.Name())
|
||||
// title
|
||||
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name())
|
||||
|
||||
// url
|
||||
if c.config.PrintURL {
|
||||
html += fmt.Sprintf("<p><i>Source: %s</i></p>\n", c.URL())
|
||||
}
|
||||
|
||||
// content
|
||||
html += c.Content()
|
||||
}
|
||||
|
||||
@@ -114,19 +127,6 @@ func ToEpub(c chapter, filename string) string {
|
||||
e := epub.NewEpub(c.Name())
|
||||
e.SetAuthor(c.Author())
|
||||
|
||||
AppendToEpub(e, c)
|
||||
|
||||
err := e.Write(filename)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
return filename
|
||||
}
|
||||
|
||||
func AppendToEpub(e *epub.Epub, c chapter) {
|
||||
content := ""
|
||||
|
||||
// append table of content
|
||||
if len(c.SubChapters()) > 1 {
|
||||
html := "<h1>Table of Contents</h1>"
|
||||
@@ -143,6 +143,19 @@ func AppendToEpub(e *epub.Epub, c chapter) {
|
||||
}
|
||||
}
|
||||
|
||||
AppendToEpub(e, c)
|
||||
|
||||
err := e.Write(filename)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
return filename
|
||||
}
|
||||
|
||||
func AppendToEpub(e *epub.Epub, c chapter) {
|
||||
content := ""
|
||||
|
||||
// chapter content
|
||||
if c.config.Include {
|
||||
|
||||
@@ -173,8 +186,15 @@ func AppendToEpub(e *epub.Epub, c chapter) {
|
||||
html := ""
|
||||
// add title only if ImagesOnly = false
|
||||
if c.config.ImagesOnly == false {
|
||||
html += fmt.Sprintf("<h1>%s</h1>", c.Name())
|
||||
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name())
|
||||
}
|
||||
|
||||
// url
|
||||
if c.config.PrintURL {
|
||||
html += fmt.Sprintf("<p><i>Source: %s</i></p>\n", c.URL())
|
||||
}
|
||||
|
||||
// content
|
||||
html += content
|
||||
|
||||
// write to epub file
|
||||
|
||||
@@ -19,7 +19,7 @@ func TestFilename(t *testing.T) {
|
||||
|
||||
func TestToMarkdownString(t *testing.T) {
|
||||
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
|
||||
|
||||
got := ToMarkdownString(c)
|
||||
want := "Example Domain\n==============\n\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\n\n[More information...](https://www.iana.org/domains/example)\n\n\n"
|
||||
@@ -30,9 +30,25 @@ func TestToMarkdownString(t *testing.T) {
|
||||
|
||||
}
|
||||
|
||||
func TestToMarkdownPrintURL(t *testing.T) {
|
||||
|
||||
config := NewScrapeConfigQuiet()
|
||||
config.PrintURL = true
|
||||
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
|
||||
|
||||
got := ToMarkdownString(c)
|
||||
want := "Example Domain\n==============\n\n_Source: https://example.com/_\n\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\n\n[More information...](https://www.iana.org/domains/example)\n\n\n"
|
||||
|
||||
if got != want {
|
||||
t.Errorf("got %v, wanted %v", got, want)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestToMarkdown(t *testing.T) {
|
||||
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
|
||||
ToMarkdown(c, "")
|
||||
|
||||
filename := "Example_Domain.md"
|
||||
@@ -49,7 +65,7 @@ func TestToMarkdown(t *testing.T) {
|
||||
func TestToMarkdownFilename(t *testing.T) {
|
||||
|
||||
filename := "ebook.md"
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
|
||||
ToMarkdown(c, filename)
|
||||
|
||||
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
|
||||
@@ -64,10 +80,26 @@ func TestToMarkdownFilename(t *testing.T) {
|
||||
|
||||
func TestToHtmlString(t *testing.T) {
|
||||
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
|
||||
|
||||
got := ToHtmlString(c)
|
||||
want := "<h1>Example Domain</h1><div>\n \n <p>This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.</p>\n <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>\n</div>"
|
||||
want := "<h1>Example Domain</h1>\n<div>\n \n <p>This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.</p>\n <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>\n</div>"
|
||||
|
||||
if got != want {
|
||||
t.Errorf("got %q, wanted %q", got, want)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestToHtmlPrintURL(t *testing.T) {
|
||||
|
||||
config := NewScrapeConfigQuiet()
|
||||
config.PrintURL = true
|
||||
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
|
||||
|
||||
got := ToHtmlString(c)
|
||||
want := "<h1>Example Domain</h1>\n<p><i>Source: https://example.com/</i></p>\n<div>\n \n <p>This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.</p>\n <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>\n</div>"
|
||||
|
||||
if got != want {
|
||||
t.Errorf("got %q, wanted %q", got, want)
|
||||
@@ -77,7 +109,7 @@ func TestToHtmlString(t *testing.T) {
|
||||
|
||||
func TestToHtml(t *testing.T) {
|
||||
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
|
||||
ToHtml(c, "")
|
||||
|
||||
filename := "Example_Domain.html"
|
||||
@@ -94,7 +126,7 @@ func TestToHtml(t *testing.T) {
|
||||
func TestToHtmlFilename(t *testing.T) {
|
||||
|
||||
filename := "ebook.html"
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
|
||||
ToHtml(c, filename)
|
||||
|
||||
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
|
||||
@@ -109,7 +141,7 @@ func TestToHtmlFilename(t *testing.T) {
|
||||
|
||||
func TestToEpub(t *testing.T) {
|
||||
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
|
||||
ToEpub(c, "")
|
||||
|
||||
filename := "Example_Domain.epub"
|
||||
@@ -126,7 +158,7 @@ func TestToEpub(t *testing.T) {
|
||||
func TestToEpubFilename(t *testing.T) {
|
||||
|
||||
filename := "ebook.epub"
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
|
||||
ToEpub(c, filename)
|
||||
|
||||
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
|
||||
@@ -141,7 +173,7 @@ func TestToEpubFilename(t *testing.T) {
|
||||
|
||||
func TestToMobi(t *testing.T) {
|
||||
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
|
||||
ToMobi(c, "")
|
||||
|
||||
filename := "Example_Domain.mobi"
|
||||
@@ -158,7 +190,7 @@ func TestToMobi(t *testing.T) {
|
||||
func TestToMobiFilename(t *testing.T) {
|
||||
|
||||
filename := "ebook.mobi"
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
|
||||
ToMobi(c, filename)
|
||||
|
||||
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
|
||||
|
||||
@@ -30,14 +30,19 @@ type ScrapeConfig struct {
|
||||
Include bool
|
||||
ImagesOnly bool
|
||||
UseLinkName bool
|
||||
PrintURL bool
|
||||
}
|
||||
|
||||
func NewScrapeConfig() *ScrapeConfig {
|
||||
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false}
|
||||
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false, false}
|
||||
}
|
||||
|
||||
func NewScrapeConfigQuiet() *ScrapeConfig {
|
||||
return &ScrapeConfig{0, "", true, -1, 0, false, -1, -1, true, false, false, false}
|
||||
}
|
||||
|
||||
func NewScrapeConfigNoInclude() *ScrapeConfig {
|
||||
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false}
|
||||
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false, false}
|
||||
}
|
||||
|
||||
func NewScrapeConfigs(selectors []string) []*ScrapeConfig {
|
||||
@@ -252,7 +257,7 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int,
|
||||
|
||||
}
|
||||
|
||||
return chapter{string(body), name, article.Byline, content, subchapters, config}
|
||||
return chapter{url, string(body), name, article.Byline, content, subchapters, config}
|
||||
}
|
||||
|
||||
func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, quiet bool) ([]chapter, chapter) {
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
|
||||
func TestBody(t *testing.T) {
|
||||
|
||||
config := NewScrapeConfig()
|
||||
config := NewScrapeConfigQuiet()
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
|
||||
|
||||
got := c.Body()
|
||||
@@ -21,7 +21,7 @@ func TestBody(t *testing.T) {
|
||||
|
||||
func TestName(t *testing.T) {
|
||||
|
||||
config := NewScrapeConfig()
|
||||
config := NewScrapeConfigQuiet()
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
|
||||
|
||||
got := c.Name()
|
||||
@@ -35,7 +35,7 @@ func TestName(t *testing.T) {
|
||||
|
||||
func TestCustomName(t *testing.T) {
|
||||
|
||||
config := NewScrapeConfig()
|
||||
config := NewScrapeConfigQuiet()
|
||||
config.UseLinkName = true
|
||||
c := NewChapterFromURL("https://example.com/", "Custom Name", []*ScrapeConfig{config}, 0, func(index int, name string) {})
|
||||
|
||||
@@ -50,7 +50,7 @@ func TestCustomName(t *testing.T) {
|
||||
|
||||
func TestAuthor(t *testing.T) {
|
||||
|
||||
config := NewScrapeConfig()
|
||||
config := NewScrapeConfigQuiet()
|
||||
c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
|
||||
|
||||
got := c.Author()
|
||||
@@ -64,7 +64,7 @@ func TestAuthor(t *testing.T) {
|
||||
|
||||
func TestContent(t *testing.T) {
|
||||
|
||||
config := NewScrapeConfig()
|
||||
config := NewScrapeConfigQuiet()
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
|
||||
|
||||
got := c.Content()
|
||||
@@ -78,10 +78,10 @@ func TestContent(t *testing.T) {
|
||||
|
||||
func TestDelay(t *testing.T) {
|
||||
|
||||
config0 := NewScrapeConfig()
|
||||
config0 := NewScrapeConfigQuiet()
|
||||
config0.Delay = 500
|
||||
|
||||
config1 := NewScrapeConfig()
|
||||
config1 := NewScrapeConfigQuiet()
|
||||
|
||||
start := time.Now()
|
||||
NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
|
||||
@@ -98,7 +98,7 @@ func TestDelay(t *testing.T) {
|
||||
|
||||
func TestContentImagesOnly(t *testing.T) {
|
||||
|
||||
config := NewScrapeConfig()
|
||||
config := NewScrapeConfigQuiet()
|
||||
config.ImagesOnly = true
|
||||
|
||||
c := NewChapterFromURL("https://12factor.net/codebase", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
|
||||
@@ -114,8 +114,8 @@ func TestContentImagesOnly(t *testing.T) {
|
||||
|
||||
func TestSubChapters(t *testing.T) {
|
||||
|
||||
config0 := NewScrapeConfig()
|
||||
config1 := NewScrapeConfig()
|
||||
config0 := NewScrapeConfigQuiet()
|
||||
config1 := NewScrapeConfigQuiet()
|
||||
|
||||
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
|
||||
|
||||
@@ -130,8 +130,8 @@ func TestSubChapters(t *testing.T) {
|
||||
|
||||
func TestSubChaptersRSS(t *testing.T) {
|
||||
|
||||
config0 := NewScrapeConfig()
|
||||
config1 := NewScrapeConfig()
|
||||
config0 := NewScrapeConfigQuiet()
|
||||
config1 := NewScrapeConfigQuiet()
|
||||
|
||||
c := NewChapterFromURL("https://www.nginx.com/feed/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
|
||||
|
||||
@@ -146,10 +146,10 @@ func TestSubChaptersRSS(t *testing.T) {
|
||||
|
||||
func TestSubChaptersSelector(t *testing.T) {
|
||||
|
||||
config0 := NewScrapeConfig()
|
||||
config0 := NewScrapeConfigQuiet()
|
||||
config0.Selector = "body > aside > p > a"
|
||||
|
||||
config1 := NewScrapeConfig()
|
||||
config1 := NewScrapeConfigQuiet()
|
||||
|
||||
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
|
||||
|
||||
@@ -164,10 +164,10 @@ func TestSubChaptersSelector(t *testing.T) {
|
||||
|
||||
func TestSubChaptersLimit(t *testing.T) {
|
||||
|
||||
config0 := NewScrapeConfig()
|
||||
config0 := NewScrapeConfigQuiet()
|
||||
config0.Limit = 1
|
||||
|
||||
config1 := NewScrapeConfig()
|
||||
config1 := NewScrapeConfigQuiet()
|
||||
|
||||
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
|
||||
|
||||
@@ -182,10 +182,10 @@ func TestSubChaptersLimit(t *testing.T) {
|
||||
|
||||
func TestSubChaptersLimitOver(t *testing.T) {
|
||||
|
||||
config0 := NewScrapeConfig()
|
||||
config0 := NewScrapeConfigQuiet()
|
||||
config0.Limit = 15
|
||||
|
||||
config1 := NewScrapeConfig()
|
||||
config1 := NewScrapeConfigQuiet()
|
||||
|
||||
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
|
||||
|
||||
@@ -200,10 +200,10 @@ func TestSubChaptersLimitOver(t *testing.T) {
|
||||
|
||||
func TestReverse(t *testing.T) {
|
||||
|
||||
config0 := NewScrapeConfig()
|
||||
config0 := NewScrapeConfigQuiet()
|
||||
config0.Reverse = true
|
||||
|
||||
config1 := NewScrapeConfig()
|
||||
config1 := NewScrapeConfigQuiet()
|
||||
|
||||
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
|
||||
|
||||
@@ -218,7 +218,7 @@ func TestReverse(t *testing.T) {
|
||||
|
||||
func TestNotInclude(t *testing.T) {
|
||||
|
||||
config := NewScrapeConfig()
|
||||
config := NewScrapeConfigQuiet()
|
||||
config.Include = false
|
||||
|
||||
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
|
||||
|
||||
@@ -33,6 +33,7 @@ type GetOptions struct {
|
||||
threads int
|
||||
include bool
|
||||
useLinkName bool
|
||||
printURL bool
|
||||
}
|
||||
|
||||
var getOpts *GetOptions
|
||||
@@ -46,6 +47,7 @@ func init() {
|
||||
getCmd.Flags().StringVarP(&getOpts.output, "output", "", "", "file name (default: book name)")
|
||||
getCmd.Flags().BoolVarP(&getOpts.stdout, "stdout", "", false, "print to standard output")
|
||||
getCmd.Flags().BoolVarP(&getOpts.images, "images", "", false, "retrieve images only")
|
||||
getCmd.Flags().BoolVarP(&getOpts.printURL, "print-url", "", false, "print url after chapter title")
|
||||
getCmd.Flags().BoolVarP(&getOpts.quiet, "quiet", "q", false, "hide progress bar")
|
||||
|
||||
// common with list command
|
||||
@@ -147,6 +149,7 @@ var getCmd = &cobra.Command{
|
||||
config.ImagesOnly = getOpts.images
|
||||
config.Include = getOpts.include
|
||||
config.UseLinkName = getOpts.useLinkName
|
||||
config.PrintURL = getOpts.printURL
|
||||
|
||||
// do not use link name for root level as there is not parent link
|
||||
if index == 0 {
|
||||
|
||||
@@ -14,6 +14,6 @@ var versionCmd = &cobra.Command{
|
||||
Use: "version",
|
||||
Short: "Print the version number of papeer",
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
fmt.Println("papeer v0.8.1")
|
||||
fmt.Println("papeer v0.8.2")
|
||||
},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user