mirror of
https://github.com/NohamR/papeer.git
synced 2026-05-25 04:17:19 +00:00
refacto get command, fix: images option
This commit is contained in:
157
cmd/get.go
157
cmd/get.go
@@ -5,18 +5,14 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
md "github.com/JohannesKaufmann/html-to-markdown"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
epub "github.com/bmaupin/go-epub"
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/lapwat/papeer/book"
|
||||
)
|
||||
|
||||
var recursive, include, images bool
|
||||
var recursive, include, images, quiet bool
|
||||
var format, output, selector, name, author string
|
||||
var limit, offset, delay, threads int
|
||||
|
||||
@@ -79,159 +75,46 @@ var getCmd = &cobra.Command{
|
||||
url := args[0]
|
||||
b := book.NewBookFromURL(url, selector, name, author, recursive, include, images, limit, offset, delay, threads)
|
||||
|
||||
if len(output) == 0 {
|
||||
// set default output
|
||||
output = strings.ReplaceAll(b.Name(), " ", "_")
|
||||
output = strings.ReplaceAll(output, "/", "")
|
||||
output = fmt.Sprintf("%s.%s", output, format)
|
||||
}
|
||||
fakeConfig := book.NewScrapeConfigFake()
|
||||
fakeChapter := book.NewChapter("", b.Name(), b.Author(), "", b.Chapters(), fakeConfig)
|
||||
|
||||
if format == "stdout" {
|
||||
|
||||
for _, c := range b.Chapters() {
|
||||
// convert to markdown
|
||||
content, err := md.NewConverter("", true, nil).ConvertString(c.Content())
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
text := fmt.Sprintf("%s\n%s\n\n%s\n\n\n", c.Name(), strings.Repeat("=", len(c.Name())), content)
|
||||
|
||||
// write to stdout
|
||||
fmt.Println(text)
|
||||
}
|
||||
|
||||
// TODO: ToMarkdownString
|
||||
markdown := book.ToMarkdown(fakeChapter)
|
||||
fmt.Println(markdown)
|
||||
}
|
||||
|
||||
if format == "md" {
|
||||
// TODO: ToMarkdownFile
|
||||
markdown := book.ToMarkdown(fakeChapter)
|
||||
|
||||
// create markdown file
|
||||
if len(output) == 0 {
|
||||
filename := book.Filename(fakeChapter.Name())
|
||||
output = fmt.Sprintf("%s.md", filename)
|
||||
}
|
||||
|
||||
// write to file
|
||||
f, err := os.Create(output)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
for _, c := range b.Chapters() {
|
||||
// convert to markdown
|
||||
content, err := md.NewConverter("", true, nil).ConvertString(c.Content())
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
text := fmt.Sprintf("%s\n%s\n\n%s\n\n\n", c.Name(), strings.Repeat("=", len(c.Name())), content)
|
||||
|
||||
// write to markdown file
|
||||
_, err = f.WriteString(text)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
_, err2 := f.WriteString(markdown)
|
||||
if err2 != nil {
|
||||
log.Fatal(err2)
|
||||
}
|
||||
f.Close()
|
||||
|
||||
fmt.Printf("Markdown saved to \"%s\"\n", output)
|
||||
}
|
||||
|
||||
if format == "epub" {
|
||||
e := epub.NewEpub(b.Name())
|
||||
e.SetAuthor(b.Author())
|
||||
|
||||
for _, c := range b.Chapters() {
|
||||
var content string
|
||||
|
||||
if images == false {
|
||||
content = c.Content()
|
||||
}
|
||||
|
||||
// parse content
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(c.Content()))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// retrieve images and download it
|
||||
doc.Find("img").Each(func(i int, s *goquery.Selection) {
|
||||
src, _ := s.Attr("src")
|
||||
imagePath, _ := e.AddImage(src, "")
|
||||
|
||||
if images {
|
||||
imageTag, _ := goquery.OuterHtml(s)
|
||||
content += imageTag
|
||||
}
|
||||
|
||||
content = strings.ReplaceAll(content, src, imagePath)
|
||||
})
|
||||
|
||||
html := fmt.Sprintf("<h1>%s</h1>%s", c.Name(), content)
|
||||
_, err = e.AddSection(html, c.Name(), "", "")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
err := e.Write(output)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
output = book.ToEpub(fakeChapter, output)
|
||||
fmt.Printf("Ebook saved to \"%s\"\n", output)
|
||||
}
|
||||
|
||||
if format == "mobi" {
|
||||
e := epub.NewEpub(b.Name())
|
||||
e.SetAuthor(b.Author())
|
||||
|
||||
for _, c := range b.Chapters() {
|
||||
var content string
|
||||
|
||||
if images == false {
|
||||
content = c.Content()
|
||||
}
|
||||
|
||||
// parse content
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(c.Content()))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// retrieve images and download it
|
||||
doc.Find("img").Each(func(i int, s *goquery.Selection) {
|
||||
src, _ := s.Attr("src")
|
||||
imagePath, _ := e.AddImage(src, "")
|
||||
|
||||
if images {
|
||||
imageTag, _ := goquery.OuterHtml(s)
|
||||
content += imageTag
|
||||
}
|
||||
|
||||
content = strings.ReplaceAll(content, src, imagePath)
|
||||
})
|
||||
|
||||
html := fmt.Sprintf("<h1>%s</h1>%s", c.Name(), content)
|
||||
_, err = e.AddSection(html, c.Name(), "", "")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
outputEPUB := strings.ReplaceAll(output, ".mobi", ".epub")
|
||||
|
||||
err := e.Write(outputEPUB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
exec.Command("kindlegen", outputEPUB).Run()
|
||||
// exec command always return status 1 even if it succeed
|
||||
// if err != nil {
|
||||
// log.Fatal(err)
|
||||
// }
|
||||
|
||||
output = book.ToMobi(fakeChapter, output)
|
||||
fmt.Printf("Ebook saved to \"%s\"\n", output)
|
||||
|
||||
err = os.Remove(outputEPUB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user