1 Commits

Author SHA1 Message Date
√(noham)²
cbf385d5ac fix rendering issues 2025-05-29 21:55:33 +02:00
5 changed files with 59 additions and 19 deletions

13
.gitignore vendored Normal file
View File

@@ -0,0 +1,13 @@
.DS_Store
links.txt
links/2018.txt
links/2019.txt
links/2020.txt
links/2021.txt
links/2022.txt
links/2023.txt
links/2024.txt
links/2025.txt
papeer
all-years.py
all.py

View File

@@ -2,6 +2,7 @@ package book
import ( import (
"fmt" "fmt"
"html"
"log" "log"
"os" "os"
"os/exec" "os/exec"
@@ -10,6 +11,7 @@ import (
md "github.com/JohannesKaufmann/html-to-markdown" md "github.com/JohannesKaufmann/html-to-markdown"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
epub "github.com/bmaupin/go-epub" epub "github.com/bmaupin/go-epub"
"github.com/microcosm-cc/bluemonday"
) )
func Filename(name string) string { func Filename(name string) string {
@@ -61,7 +63,7 @@ func ToMarkdown(c chapter, filename string) string {
// write to file // write to file
f, err := os.Create(filename) f, err := os.Create(filename)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
_, err2 := f.WriteString(markdown) _, err2 := f.WriteString(markdown)
if err2 != nil { if err2 != nil {
@@ -73,28 +75,28 @@ func ToMarkdown(c chapter, filename string) string {
} }
func ToHtmlString(c chapter) string { func ToHtmlString(c chapter) string {
html := "" htmlContent := ""
// chapter content // chapter content
if c.config.Include { if c.config.Include {
// title // title
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name()) htmlContent += fmt.Sprintf("<h1>%s</h1>\n", html.EscapeString(c.Name()))
// url // url
if c.config.PrintURL { if c.config.PrintURL {
html += fmt.Sprintf("<p><i>%s</i></p>\n", c.URL()) htmlContent += fmt.Sprintf("<p><i>%s</i></p>\n", html.EscapeString(c.URL()))
} }
// content // content
html += c.Content() htmlContent += c.Content()
} }
// subchapters content // subchapters content
for _, sc := range c.SubChapters() { for _, sc := range c.SubChapters() {
html += ToHtmlString(sc) htmlContent += ToHtmlString(sc)
} }
return html return htmlContent
} }
func ToHtml(c chapter, filename string) string { func ToHtml(c chapter, filename string) string {
@@ -125,7 +127,11 @@ func ToEpub(c chapter, filename string) string {
// init ebook // init ebook
e := epub.NewEpub(c.Name()) e := epub.NewEpub(c.Name())
e.SetAuthor(c.Author()) author := c.Author()
if author == "" {
author = "Unknown Author"
}
e.SetAuthor(author)
// append table of content // append table of content
if len(c.SubChapters()) > 1 { if len(c.SubChapters()) > 1 {
@@ -155,16 +161,18 @@ func ToEpub(c chapter, filename string) string {
func AppendToEpub(e *epub.Epub, c chapter) { func AppendToEpub(e *epub.Epub, c chapter) {
content := "" content := ""
p := bluemonday.UGCPolicy()
safeHTML := p.Sanitize(c.Content())
// chapter content // chapter content
if c.config.Include { if c.config.Include {
if c.config.ImagesOnly == false { if c.config.ImagesOnly == false {
content = c.Content() content = safeHTML
} }
// parse content // parse content
doc, err := goquery.NewDocumentFromReader(strings.NewReader(c.Content())) doc, err := goquery.NewDocumentFromReader(strings.NewReader(safeHTML))
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
@@ -175,6 +183,10 @@ func AppendToEpub(e *epub.Epub, c chapter) {
src = strings.Split(src, "?")[0] // remove query part src = strings.Split(src, "?")[0] // remove query part
imagePath, _ := e.AddImage(src, "") imagePath, _ := e.AddImage(src, "")
// Remove or fix invalid width/height attributes
s.RemoveAttr("width")
s.RemoveAttr("height")
if c.config.ImagesOnly { if c.config.ImagesOnly {
imageTag, _ := goquery.OuterHtml(s) imageTag, _ := goquery.OuterHtml(s)
content += strings.ReplaceAll(imageTag, src, imagePath) content += strings.ReplaceAll(imageTag, src, imagePath)
@@ -183,22 +195,22 @@ func AppendToEpub(e *epub.Epub, c chapter) {
} }
}) })
html := "" htmlContent := ""
// add title only if ImagesOnly = false // add title only if ImagesOnly = false
if c.config.ImagesOnly == false { if c.config.ImagesOnly == false {
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name()) htmlContent += fmt.Sprintf("<h1>%s</h1>\n", html.EscapeString(c.Name()))
} }
// url // url
if c.config.PrintURL { if c.config.PrintURL {
html += fmt.Sprintf("<p><i>%s</i></p>\n", c.URL()) htmlContent += fmt.Sprintf("<p><i>%s</i></p>\n", html.EscapeString(c.URL()))
} }
// content // content
html += content htmlContent += content
// write to epub file // write to epub file
_, err = e.AddSection(html, c.Name(), "", "") _, err = e.AddSection(htmlContent, c.Name(), "", "")
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }

View File

@@ -14,6 +14,6 @@ var versionCmd = &cobra.Command{
Use: "version", Use: "version",
Short: "Print the version number of papeer", Short: "Print the version number of papeer",
Run: func(cmd *cobra.Command, args []string) { Run: func(cmd *cobra.Command, args []string) {
fmt.Println("papeer v0.8.4") fmt.Println("papeer v0.8.5")
}, },
} }

9
go.mod
View File

@@ -24,6 +24,7 @@ require (
github.com/antchfx/htmlquery v1.3.0 // indirect github.com/antchfx/htmlquery v1.3.0 // indirect
github.com/antchfx/xmlquery v1.3.15 // indirect github.com/antchfx/xmlquery v1.3.15 // indirect
github.com/antchfx/xpath v1.2.4 // indirect github.com/antchfx/xpath v1.2.4 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.1 // indirect github.com/gabriel-vasile/mimetype v1.4.1 // indirect
github.com/go-shiori/dom v0.0.0-20210627111528-4e4722cd0d65 // indirect github.com/go-shiori/dom v0.0.0-20210627111528-4e4722cd0d65 // indirect
github.com/gobwas/glob v0.2.3 // indirect github.com/gobwas/glob v0.2.3 // indirect
@@ -31,11 +32,13 @@ require (
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.2 // indirect github.com/golang/protobuf v1.5.2 // indirect
github.com/gorilla/css v1.0.1 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect github.com/json-iterator/go v1.1.12 // indirect
github.com/kennygrant/sanitize v1.2.4 // indirect github.com/kennygrant/sanitize v1.2.4 // indirect
github.com/mattn/go-isatty v0.0.17 // indirect github.com/mattn/go-isatty v0.0.17 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/microcosm-cc/bluemonday v1.0.27 // indirect
github.com/mmcdole/goxpp v1.1.0 // indirect github.com/mmcdole/goxpp v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect
@@ -45,9 +48,9 @@ require (
github.com/spf13/pflag v1.0.5 // indirect github.com/spf13/pflag v1.0.5 // indirect
github.com/temoto/robotstxt v1.1.2 // indirect github.com/temoto/robotstxt v1.1.2 // indirect
github.com/vincent-petithory/dataurl v1.0.0 // indirect github.com/vincent-petithory/dataurl v1.0.0 // indirect
golang.org/x/net v0.8.0 // indirect golang.org/x/net v0.26.0 // indirect
golang.org/x/sys v0.6.0 // indirect golang.org/x/sys v0.21.0 // indirect
golang.org/x/text v0.8.0 // indirect golang.org/x/text v0.16.0 // indirect
google.golang.org/appengine v1.6.7 // indirect google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.28.1 // indirect google.golang.org/protobuf v1.28.1 // indirect
) )

12
go.sum
View File

@@ -25,6 +25,8 @@ github.com/antchfx/xpath v1.2.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwq
github.com/antchfx/xpath v1.2.4 h1:dW1HB/JxKvGtJ9WyVGJ0sIoEcqftV3SqIstujI+B9XY= github.com/antchfx/xpath v1.2.4 h1:dW1HB/JxKvGtJ9WyVGJ0sIoEcqftV3SqIstujI+B9XY=
github.com/antchfx/xpath v1.2.4/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= github.com/antchfx/xpath v1.2.4/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/bmaupin/go-epub v1.0.1 h1:LLbczYCXO/1sGpFd4/QRaDiEhevo4PYQxBQClZPRoco= github.com/bmaupin/go-epub v1.0.1 h1:LLbczYCXO/1sGpFd4/QRaDiEhevo4PYQxBQClZPRoco=
@@ -105,6 +107,8 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/gosuri/uilive v0.0.4 h1:hUEBpQDj8D8jXgtCdBu7sWsy5sbW/5GhuO8KBwJ2jyY= github.com/gosuri/uilive v0.0.4 h1:hUEBpQDj8D8jXgtCdBu7sWsy5sbW/5GhuO8KBwJ2jyY=
github.com/gosuri/uilive v0.0.4/go.mod h1:V/epo5LjjlDE5RJUcqx8dbw+zc93y5Ya3yg8tfZ74VI= github.com/gosuri/uilive v0.0.4/go.mod h1:V/epo5LjjlDE5RJUcqx8dbw+zc93y5Ya3yg8tfZ74VI=
@@ -141,6 +145,8 @@ github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mmcdole/gofeed v1.2.1 h1:tPbFN+mfOLcM1kDF1x2c/N68ChbdBatkppdzf/vDe1s= github.com/mmcdole/gofeed v1.2.1 h1:tPbFN+mfOLcM1kDF1x2c/N68ChbdBatkppdzf/vDe1s=
@@ -273,6 +279,8 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ= golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -303,6 +311,8 @@ golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA= golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
@@ -319,6 +329,8 @@ golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=