5 Commits

Author SHA1 Message Date
lapwat
403fdcc0f0 [get] print url option 2024-08-14 23:32:33 +02:00
lapwat
1b2be1c390 update tests 2024-08-14 14:32:00 +02:00
lapwat
4521497d12 add pub date to chapter skeleton 2023-10-01 22:07:38 +02:00
lapwat
2cbcf17cc2 [get] json format 2023-10-01 16:00:27 +02:00
Dei Layborer
29935be2c3 Add arm64 build actions (#14)
Add arm64 build actions for darwin/macOS and linux.
2023-09-02 11:49:35 +02:00
10 changed files with 186 additions and 97 deletions

View File

@@ -9,10 +9,14 @@ jobs:
strategy:
matrix:
goos: [linux, darwin, windows]
goarch: [amd64, arm64]
exclude:
- goarch: arm64
goos: windows
steps:
- uses: actions/checkout@v3
- uses: wangyoucao577/go-release-action@v1.30
- uses: wangyoucao577/go-release-action@v1.40
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
goos: ${{ matrix.goos }}
goarch: amd64
goarch: ${{ matrix.goarch }}

View File

@@ -40,7 +40,9 @@ Download [latest release](https://github.com/lapwat/papeer/releases/latest) for
## MOBI support
Install kindlegen to convert websites, Linux only.
> Kindle e-readers now support EPUB format
Install kindlegen to export websites to Kindle compatible ebooks, Linux only.
```sh
TMPDIR=$(mktemp -d -t papeer-XXXXX)

View File

@@ -1,6 +1,7 @@
package book
type chapter struct {
url string
body string
name string
author string
@@ -10,11 +11,11 @@ type chapter struct {
}
func NewEmptyChapter() chapter {
return chapter{"", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
return chapter{"", "", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
}
func NewChapter(body, name, author, content string, subChapters []chapter, config *ScrapeConfig) chapter {
return chapter{body, name, author, content, subChapters, config}
func (c chapter) URL() string {
return c.url
}
func (c chapter) Body() string {

View File

@@ -30,6 +30,11 @@ func ToMarkdownString(c chapter) string {
markdown += fmt.Sprintf("%s\n", c.Name())
markdown += fmt.Sprintf("%s\n\n", strings.Repeat("=", len(c.Name())))
// url
if c.config.PrintURL {
markdown += fmt.Sprintf("_Source: %s_\n\n", c.URL())
}
// convert content to markdown
content, err := md.NewConverter("", true, nil).ConvertString(c.Content())
if err != nil {
@@ -72,7 +77,15 @@ func ToHtmlString(c chapter) string {
// chapter content
if c.config.Include {
html += fmt.Sprintf("<h1>%s</h1>", c.Name())
// title
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name())
// url
if c.config.PrintURL {
html += fmt.Sprintf("<p><i>Source: %s</i></p>\n", c.URL())
}
// content
html += c.Content()
}
@@ -114,19 +127,6 @@ func ToEpub(c chapter, filename string) string {
e := epub.NewEpub(c.Name())
e.SetAuthor(c.Author())
AppendToEpub(e, c)
err := e.Write(filename)
if err != nil {
log.Fatal(err)
}
return filename
}
func AppendToEpub(e *epub.Epub, c chapter) {
content := ""
// append table of content
if len(c.SubChapters()) > 1 {
html := "<h1>Table of Contents</h1>"
@@ -143,6 +143,19 @@ func AppendToEpub(e *epub.Epub, c chapter) {
}
}
AppendToEpub(e, c)
err := e.Write(filename)
if err != nil {
log.Fatal(err)
}
return filename
}
func AppendToEpub(e *epub.Epub, c chapter) {
content := ""
// chapter content
if c.config.Include {
@@ -173,8 +186,15 @@ func AppendToEpub(e *epub.Epub, c chapter) {
html := ""
// add title only if ImagesOnly = false
if c.config.ImagesOnly == false {
html += fmt.Sprintf("<h1>%s</h1>", c.Name())
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name())
}
// url
if c.config.PrintURL {
html += fmt.Sprintf("<p><i>Source: %s</i></p>\n", c.URL())
}
// content
html += content
// write to epub file

File diff suppressed because one or more lines are too long

View File

@@ -1,10 +1,13 @@
package book
import "time"
type link struct {
Href string `json:"url"`
Text string `json:"name"`
Href string `json:"url"`
Text string `json:"name"`
Date *time.Time `json:"date"`
}
func NewLink(href, text string) link {
return link{href, text}
func NewLink(href, text string, date *time.Time) link {
return link{href, text, date}
}

View File

@@ -30,14 +30,19 @@ type ScrapeConfig struct {
Include bool
ImagesOnly bool
UseLinkName bool
PrintURL bool
}
func NewScrapeConfig() *ScrapeConfig {
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false}
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false, false}
}
func NewScrapeConfigQuiet() *ScrapeConfig {
return &ScrapeConfig{0, "", true, -1, 0, false, -1, -1, true, false, false, false}
}
func NewScrapeConfigNoInclude() *ScrapeConfig {
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false}
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false, false}
}
func NewScrapeConfigs(selectors []string) []*ScrapeConfig {
@@ -252,7 +257,7 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int,
}
return chapter{string(body), name, article.Byline, content, subchapters, config}
return chapter{url, string(body), name, article.Byline, content, subchapters, config}
}
func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, quiet bool) ([]chapter, chapter) {
@@ -370,7 +375,7 @@ func GetLinks(url *urllib.URL, selector string, limit, offset int, reverse, incl
log.Fatal(err)
}
links = append(links, NewLink(u.String(), item.Title))
links = append(links, NewLink(u.String(), item.Title, item.PublishedParsed))
}
pathMax = "RSS"
@@ -405,7 +410,7 @@ func GetLinks(url *urllib.URL, selector string, limit, offset int, reverse, incl
// if selector is set, we use the selector specified by the user
key = selector
pathLinks[key] = append(pathLinks[key], NewLink(href, text))
pathLinks[key] = append(pathLinks[key], NewLink(href, text, &time.Time{}))
pathCount[key] += 1
pathMax = key
@@ -419,7 +424,7 @@ func GetLinks(url *urllib.URL, selector string, limit, offset int, reverse, incl
// we count this key if the link text is not empty
if text != "" {
pathLinks[key] = append(pathLinks[key], NewLink(href, text))
pathLinks[key] = append(pathLinks[key], NewLink(href, text, &time.Time{}))
pathCount[key] += len(text)
if pathCount[key] > pathCount[pathMax] {
@@ -449,7 +454,7 @@ func GetLinks(url *urllib.URL, selector string, limit, offset int, reverse, incl
// include home page
if include {
l := NewLink(url.String(), home.Name())
l := NewLink(url.String(), home.Name(), &time.Time{})
links = append([]link{l}, links...)
}

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,7 @@
package cmd
import (
"encoding/json"
"errors"
"fmt"
"io/ioutil"
@@ -32,6 +33,7 @@ type GetOptions struct {
threads int
include bool
useLinkName bool
printURL bool
}
var getOpts *GetOptions
@@ -41,10 +43,11 @@ func init() {
getCmd.Flags().StringVarP(&getOpts.name, "name", "n", "", "book name (default: page title)")
getCmd.Flags().StringVarP(&getOpts.author, "author", "a", "", "book author")
getCmd.Flags().StringVarP(&getOpts.Format, "format", "f", "md", "file format [md, html, epub, mobi]")
getCmd.Flags().StringVarP(&getOpts.Format, "format", "f", "md", "file format [md, json, html, epub, mobi]")
getCmd.Flags().StringVarP(&getOpts.output, "output", "", "", "file name (default: book name)")
getCmd.Flags().BoolVarP(&getOpts.stdout, "stdout", "", false, "print to standard output")
getCmd.Flags().BoolVarP(&getOpts.images, "images", "", false, "retrieve images only")
getCmd.Flags().BoolVarP(&getOpts.printURL, "print-url", "", false, "print url after chapter title")
getCmd.Flags().BoolVarP(&getOpts.quiet, "quiet", "q", false, "hide progress bar")
// common with list command
@@ -73,6 +76,7 @@ var getCmd = &cobra.Command{
// check provided format is in list
formatEnum := map[string]bool{
"md": true,
"json": true,
"html": true,
"epub": true,
"mobi": true,
@@ -145,6 +149,7 @@ var getCmd = &cobra.Command{
config.ImagesOnly = getOpts.images
config.Include = getOpts.include
config.UseLinkName = getOpts.useLinkName
config.PrintURL = getOpts.printURL
// do not use link name for root level as there is not parent link
if index == 0 {
@@ -182,6 +187,26 @@ var getCmd = &cobra.Command{
}
}
if getOpts.Format == "json" {
filename := book.ToMarkdown(c, getOpts.output)
bytesRead, err := ioutil.ReadFile(filename)
if err != nil {
log.Fatal(err)
}
book := make(map[string]interface{})
book["name"] = c.Name()
book["content"] = string(bytesRead)
bookJson, err := json.Marshal(book)
if err != nil {
log.Fatal(err)
}
fmt.Println(string(bookJson))
}
if getOpts.Format == "html" {
filename := book.ToHtml(c, getOpts.output)

View File

@@ -14,6 +14,6 @@ var versionCmd = &cobra.Command{
Use: "version",
Short: "Print the version number of papeer",
Run: func(cmd *cobra.Command, args []string) {
fmt.Println("papeer v0.7.1")
fmt.Println("papeer v0.8.2")
},
}