4 Commits

Author SHA1 Message Date
q
99b7d16de7 stdout: remove file after use 2025-03-06 18:26:14 +01:00
q
32168718c9 fix tests, fix hidden images, remove source label under title 2025-01-12 18:57:33 +01:00
lapwat
403fdcc0f0 [get] print url option 2024-08-14 23:32:33 +02:00
lapwat
1b2be1c390 update tests 2024-08-14 14:32:00 +02:00
7 changed files with 149 additions and 83 deletions

View File

@@ -1,6 +1,7 @@
package book package book
type chapter struct { type chapter struct {
url string
body string body string
name string name string
author string author string
@@ -10,11 +11,11 @@ type chapter struct {
} }
func NewEmptyChapter() chapter { func NewEmptyChapter() chapter {
return chapter{"", "", "", "", []chapter{}, NewScrapeConfigNoInclude()} return chapter{"", "", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
} }
func NewChapter(body, name, author, content string, subChapters []chapter, config *ScrapeConfig) chapter { func (c chapter) URL() string {
return chapter{body, name, author, content, subChapters, config} return c.url
} }
func (c chapter) Body() string { func (c chapter) Body() string {

View File

@@ -30,6 +30,11 @@ func ToMarkdownString(c chapter) string {
markdown += fmt.Sprintf("%s\n", c.Name()) markdown += fmt.Sprintf("%s\n", c.Name())
markdown += fmt.Sprintf("%s\n\n", strings.Repeat("=", len(c.Name()))) markdown += fmt.Sprintf("%s\n\n", strings.Repeat("=", len(c.Name())))
// url
if c.config.PrintURL {
markdown += fmt.Sprintf("_%s_\n\n", c.URL())
}
// convert content to markdown // convert content to markdown
content, err := md.NewConverter("", true, nil).ConvertString(c.Content()) content, err := md.NewConverter("", true, nil).ConvertString(c.Content())
if err != nil { if err != nil {
@@ -72,7 +77,15 @@ func ToHtmlString(c chapter) string {
// chapter content // chapter content
if c.config.Include { if c.config.Include {
html += fmt.Sprintf("<h1>%s</h1>", c.Name()) // title
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name())
// url
if c.config.PrintURL {
html += fmt.Sprintf("<p><i>%s</i></p>\n", c.URL())
}
// content
html += c.Content() html += c.Content()
} }
@@ -114,19 +127,6 @@ func ToEpub(c chapter, filename string) string {
e := epub.NewEpub(c.Name()) e := epub.NewEpub(c.Name())
e.SetAuthor(c.Author()) e.SetAuthor(c.Author())
AppendToEpub(e, c)
err := e.Write(filename)
if err != nil {
log.Fatal(err)
}
return filename
}
func AppendToEpub(e *epub.Epub, c chapter) {
content := ""
// append table of content // append table of content
if len(c.SubChapters()) > 1 { if len(c.SubChapters()) > 1 {
html := "<h1>Table of Contents</h1>" html := "<h1>Table of Contents</h1>"
@@ -143,6 +143,19 @@ func AppendToEpub(e *epub.Epub, c chapter) {
} }
} }
AppendToEpub(e, c)
err := e.Write(filename)
if err != nil {
log.Fatal(err)
}
return filename
}
func AppendToEpub(e *epub.Epub, c chapter) {
content := ""
// chapter content // chapter content
if c.config.Include { if c.config.Include {
@@ -164,17 +177,24 @@ func AppendToEpub(e *epub.Epub, c chapter) {
if c.config.ImagesOnly { if c.config.ImagesOnly {
imageTag, _ := goquery.OuterHtml(s) imageTag, _ := goquery.OuterHtml(s)
content += strings.Replace(imageTag, src, imagePath, 1) content += strings.ReplaceAll(imageTag, src, imagePath)
} else { } else {
content = strings.Replace(content, src, imagePath, 1) content = strings.ReplaceAll(content, src, imagePath)
} }
}) })
html := "" html := ""
// add title only if ImagesOnly = false // add title only if ImagesOnly = false
if c.config.ImagesOnly == false { if c.config.ImagesOnly == false {
html += fmt.Sprintf("<h1>%s</h1>", c.Name()) html += fmt.Sprintf("<h1>%s</h1>\n", c.Name())
} }
// url
if c.config.PrintURL {
html += fmt.Sprintf("<p><i>%s</i></p>\n", c.URL())
}
// content
html += content html += content
// write to epub file // write to epub file

File diff suppressed because one or more lines are too long

View File

@@ -30,14 +30,19 @@ type ScrapeConfig struct {
Include bool Include bool
ImagesOnly bool ImagesOnly bool
UseLinkName bool UseLinkName bool
PrintURL bool
} }
func NewScrapeConfig() *ScrapeConfig { func NewScrapeConfig() *ScrapeConfig {
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false} return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false, false}
}
func NewScrapeConfigQuiet() *ScrapeConfig {
return &ScrapeConfig{0, "", true, -1, 0, false, -1, -1, true, false, false, false}
} }
func NewScrapeConfigNoInclude() *ScrapeConfig { func NewScrapeConfigNoInclude() *ScrapeConfig {
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false} return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false, false}
} }
func NewScrapeConfigs(selectors []string) []*ScrapeConfig { func NewScrapeConfigs(selectors []string) []*ScrapeConfig {
@@ -237,7 +242,6 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int,
content = "" content = ""
doc.Find("img").Each(func(i int, s *goquery.Selection) { doc.Find("img").Each(func(i int, s *goquery.Selection) {
imageTag, _ := goquery.OuterHtml(s) imageTag, _ := goquery.OuterHtml(s)
// imageTag = strings.ReplaceAll(imageTag, "\n", "")
content += imageTag content += imageTag
}) })
@@ -252,7 +256,7 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int,
} }
return chapter{string(body), name, article.Byline, content, subchapters, config} return chapter{url, string(body), name, article.Byline, content, subchapters, config}
} }
func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, quiet bool) ([]chapter, chapter) { func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, quiet bool) ([]chapter, chapter) {

File diff suppressed because one or more lines are too long

View File

@@ -6,6 +6,7 @@ import (
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"log" "log"
"os"
"strings" "strings"
"github.com/spf13/cobra" "github.com/spf13/cobra"
@@ -33,6 +34,7 @@ type GetOptions struct {
threads int threads int
include bool include bool
useLinkName bool useLinkName bool
printURL bool
} }
var getOpts *GetOptions var getOpts *GetOptions
@@ -46,6 +48,7 @@ func init() {
getCmd.Flags().StringVarP(&getOpts.output, "output", "", "", "file name (default: book name)") getCmd.Flags().StringVarP(&getOpts.output, "output", "", "", "file name (default: book name)")
getCmd.Flags().BoolVarP(&getOpts.stdout, "stdout", "", false, "print to standard output") getCmd.Flags().BoolVarP(&getOpts.stdout, "stdout", "", false, "print to standard output")
getCmd.Flags().BoolVarP(&getOpts.images, "images", "", false, "retrieve images only") getCmd.Flags().BoolVarP(&getOpts.images, "images", "", false, "retrieve images only")
getCmd.Flags().BoolVarP(&getOpts.printURL, "print-url", "", false, "print url after chapter title")
getCmd.Flags().BoolVarP(&getOpts.quiet, "quiet", "q", false, "hide progress bar") getCmd.Flags().BoolVarP(&getOpts.quiet, "quiet", "q", false, "hide progress bar")
// common with list command // common with list command
@@ -147,6 +150,7 @@ var getCmd = &cobra.Command{
config.ImagesOnly = getOpts.images config.ImagesOnly = getOpts.images
config.Include = getOpts.include config.Include = getOpts.include
config.UseLinkName = getOpts.useLinkName config.UseLinkName = getOpts.useLinkName
config.PrintURL = getOpts.printURL
// do not use link name for root level as there is not parent link // do not use link name for root level as there is not parent link
if index == 0 { if index == 0 {
@@ -177,6 +181,7 @@ var getCmd = &cobra.Command{
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
os.Remove(filename)
fmt.Println(string(bytesRead)) fmt.Println(string(bytesRead))
} else { } else {
@@ -191,6 +196,7 @@ var getCmd = &cobra.Command{
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
os.Remove(filename)
book := make(map[string]interface{}) book := make(map[string]interface{})
book["name"] = c.Name() book["name"] = c.Name()
@@ -212,6 +218,7 @@ var getCmd = &cobra.Command{
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
os.Remove(filename)
fmt.Println(string(bytesRead)) fmt.Println(string(bytesRead))
} else { } else {
@@ -227,6 +234,7 @@ var getCmd = &cobra.Command{
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
os.Remove(filename)
fmt.Println(string(bytesRead)) fmt.Println(string(bytesRead))
} else { } else {
@@ -242,6 +250,7 @@ var getCmd = &cobra.Command{
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
os.Remove(filename)
fmt.Println(string(bytesRead)) fmt.Println(string(bytesRead))
} else { } else {

View File

@@ -14,6 +14,6 @@ var versionCmd = &cobra.Command{
Use: "version", Use: "version",
Short: "Print the version number of papeer", Short: "Print the version number of papeer",
Run: func(cmd *cobra.Command, args []string) { Run: func(cmd *cobra.Command, args []string) {
fmt.Println("papeer v0.8.1") fmt.Println("papeer v0.8.4")
}, },
} }