mirror of
https://github.com/NohamR/papeer.git
synced 2026-05-25 12:27:20 +00:00
Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
99b7d16de7 | ||
|
|
32168718c9 | ||
|
|
403fdcc0f0 | ||
|
|
1b2be1c390 |
@@ -1,6 +1,7 @@
|
||||
package book
|
||||
|
||||
type chapter struct {
|
||||
url string
|
||||
body string
|
||||
name string
|
||||
author string
|
||||
@@ -10,11 +11,11 @@ type chapter struct {
|
||||
}
|
||||
|
||||
func NewEmptyChapter() chapter {
|
||||
return chapter{"", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
|
||||
return chapter{"", "", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
|
||||
}
|
||||
|
||||
func NewChapter(body, name, author, content string, subChapters []chapter, config *ScrapeConfig) chapter {
|
||||
return chapter{body, name, author, content, subChapters, config}
|
||||
func (c chapter) URL() string {
|
||||
return c.url
|
||||
}
|
||||
|
||||
func (c chapter) Body() string {
|
||||
|
||||
@@ -30,6 +30,11 @@ func ToMarkdownString(c chapter) string {
|
||||
markdown += fmt.Sprintf("%s\n", c.Name())
|
||||
markdown += fmt.Sprintf("%s\n\n", strings.Repeat("=", len(c.Name())))
|
||||
|
||||
// url
|
||||
if c.config.PrintURL {
|
||||
markdown += fmt.Sprintf("_%s_\n\n", c.URL())
|
||||
}
|
||||
|
||||
// convert content to markdown
|
||||
content, err := md.NewConverter("", true, nil).ConvertString(c.Content())
|
||||
if err != nil {
|
||||
@@ -72,7 +77,15 @@ func ToHtmlString(c chapter) string {
|
||||
|
||||
// chapter content
|
||||
if c.config.Include {
|
||||
html += fmt.Sprintf("<h1>%s</h1>", c.Name())
|
||||
// title
|
||||
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name())
|
||||
|
||||
// url
|
||||
if c.config.PrintURL {
|
||||
html += fmt.Sprintf("<p><i>%s</i></p>\n", c.URL())
|
||||
}
|
||||
|
||||
// content
|
||||
html += c.Content()
|
||||
}
|
||||
|
||||
@@ -114,19 +127,6 @@ func ToEpub(c chapter, filename string) string {
|
||||
e := epub.NewEpub(c.Name())
|
||||
e.SetAuthor(c.Author())
|
||||
|
||||
AppendToEpub(e, c)
|
||||
|
||||
err := e.Write(filename)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
return filename
|
||||
}
|
||||
|
||||
func AppendToEpub(e *epub.Epub, c chapter) {
|
||||
content := ""
|
||||
|
||||
// append table of content
|
||||
if len(c.SubChapters()) > 1 {
|
||||
html := "<h1>Table of Contents</h1>"
|
||||
@@ -143,6 +143,19 @@ func AppendToEpub(e *epub.Epub, c chapter) {
|
||||
}
|
||||
}
|
||||
|
||||
AppendToEpub(e, c)
|
||||
|
||||
err := e.Write(filename)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
return filename
|
||||
}
|
||||
|
||||
func AppendToEpub(e *epub.Epub, c chapter) {
|
||||
content := ""
|
||||
|
||||
// chapter content
|
||||
if c.config.Include {
|
||||
|
||||
@@ -164,17 +177,24 @@ func AppendToEpub(e *epub.Epub, c chapter) {
|
||||
|
||||
if c.config.ImagesOnly {
|
||||
imageTag, _ := goquery.OuterHtml(s)
|
||||
content += strings.Replace(imageTag, src, imagePath, 1)
|
||||
content += strings.ReplaceAll(imageTag, src, imagePath)
|
||||
} else {
|
||||
content = strings.Replace(content, src, imagePath, 1)
|
||||
content = strings.ReplaceAll(content, src, imagePath)
|
||||
}
|
||||
})
|
||||
|
||||
html := ""
|
||||
// add title only if ImagesOnly = false
|
||||
if c.config.ImagesOnly == false {
|
||||
html += fmt.Sprintf("<h1>%s</h1>", c.Name())
|
||||
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name())
|
||||
}
|
||||
|
||||
// url
|
||||
if c.config.PrintURL {
|
||||
html += fmt.Sprintf("<p><i>%s</i></p>\n", c.URL())
|
||||
}
|
||||
|
||||
// content
|
||||
html += content
|
||||
|
||||
// write to epub file
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -30,14 +30,19 @@ type ScrapeConfig struct {
|
||||
Include bool
|
||||
ImagesOnly bool
|
||||
UseLinkName bool
|
||||
PrintURL bool
|
||||
}
|
||||
|
||||
func NewScrapeConfig() *ScrapeConfig {
|
||||
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false}
|
||||
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false, false}
|
||||
}
|
||||
|
||||
func NewScrapeConfigQuiet() *ScrapeConfig {
|
||||
return &ScrapeConfig{0, "", true, -1, 0, false, -1, -1, true, false, false, false}
|
||||
}
|
||||
|
||||
func NewScrapeConfigNoInclude() *ScrapeConfig {
|
||||
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false}
|
||||
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false, false}
|
||||
}
|
||||
|
||||
func NewScrapeConfigs(selectors []string) []*ScrapeConfig {
|
||||
@@ -237,7 +242,6 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int,
|
||||
content = ""
|
||||
doc.Find("img").Each(func(i int, s *goquery.Selection) {
|
||||
imageTag, _ := goquery.OuterHtml(s)
|
||||
// imageTag = strings.ReplaceAll(imageTag, "\n", "")
|
||||
content += imageTag
|
||||
})
|
||||
|
||||
@@ -252,7 +256,7 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int,
|
||||
|
||||
}
|
||||
|
||||
return chapter{string(body), name, article.Byline, content, subchapters, config}
|
||||
return chapter{url, string(body), name, article.Byline, content, subchapters, config}
|
||||
}
|
||||
|
||||
func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, quiet bool) ([]chapter, chapter) {
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
@@ -33,6 +34,7 @@ type GetOptions struct {
|
||||
threads int
|
||||
include bool
|
||||
useLinkName bool
|
||||
printURL bool
|
||||
}
|
||||
|
||||
var getOpts *GetOptions
|
||||
@@ -46,6 +48,7 @@ func init() {
|
||||
getCmd.Flags().StringVarP(&getOpts.output, "output", "", "", "file name (default: book name)")
|
||||
getCmd.Flags().BoolVarP(&getOpts.stdout, "stdout", "", false, "print to standard output")
|
||||
getCmd.Flags().BoolVarP(&getOpts.images, "images", "", false, "retrieve images only")
|
||||
getCmd.Flags().BoolVarP(&getOpts.printURL, "print-url", "", false, "print url after chapter title")
|
||||
getCmd.Flags().BoolVarP(&getOpts.quiet, "quiet", "q", false, "hide progress bar")
|
||||
|
||||
// common with list command
|
||||
@@ -147,6 +150,7 @@ var getCmd = &cobra.Command{
|
||||
config.ImagesOnly = getOpts.images
|
||||
config.Include = getOpts.include
|
||||
config.UseLinkName = getOpts.useLinkName
|
||||
config.PrintURL = getOpts.printURL
|
||||
|
||||
// do not use link name for root level as there is not parent link
|
||||
if index == 0 {
|
||||
@@ -177,6 +181,7 @@ var getCmd = &cobra.Command{
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
os.Remove(filename)
|
||||
|
||||
fmt.Println(string(bytesRead))
|
||||
} else {
|
||||
@@ -191,6 +196,7 @@ var getCmd = &cobra.Command{
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
os.Remove(filename)
|
||||
|
||||
book := make(map[string]interface{})
|
||||
book["name"] = c.Name()
|
||||
@@ -212,6 +218,7 @@ var getCmd = &cobra.Command{
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
os.Remove(filename)
|
||||
|
||||
fmt.Println(string(bytesRead))
|
||||
} else {
|
||||
@@ -227,6 +234,7 @@ var getCmd = &cobra.Command{
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
os.Remove(filename)
|
||||
|
||||
fmt.Println(string(bytesRead))
|
||||
} else {
|
||||
@@ -242,6 +250,7 @@ var getCmd = &cobra.Command{
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
os.Remove(filename)
|
||||
|
||||
fmt.Println(string(bytesRead))
|
||||
} else {
|
||||
|
||||
@@ -14,6 +14,6 @@ var versionCmd = &cobra.Command{
|
||||
Use: "version",
|
||||
Short: "Print the version number of papeer",
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
fmt.Println("papeer v0.8.1")
|
||||
fmt.Println("papeer v0.8.4")
|
||||
},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user