first commit

This commit is contained in:
lapwat
2021-09-14 22:21:44 +02:00
commit 14856a109a
12 changed files with 1326 additions and 0 deletions

66
cmd/completion.go Normal file
View File

@@ -0,0 +1,66 @@
package cmd
import (
"github.com/spf13/cobra"
"os"
)
var completionCmd = &cobra.Command{
Use: "completion [bash|zsh|fish|powershell]",
Short: "Generate completion script",
Long: `To load completions:
Bash:
$ source <(yourprogram completion bash)
# To load completions for each session, execute once:
# Linux:
$ yourprogram completion bash > /etc/bash_completion.d/yourprogram
# macOS:
$ yourprogram completion bash > /usr/local/etc/bash_completion.d/yourprogram
Zsh:
# If shell completion is not already enabled in your environment,
# you will need to enable it. You can execute the following once:
$ echo "autoload -U compinit; compinit" >> ~/.zshrc
# To load completions for each session, execute once:
$ yourprogram completion zsh > "${fpath[1]}/_yourprogram"
# You will need to start a new shell for this setup to take effect.
fish:
$ yourprogram completion fish | source
# To load completions for each session, execute once:
$ yourprogram completion fish > ~/.config/fish/completions/yourprogram.fish
PowerShell:
PS> yourprogram completion powershell | Out-String | Invoke-Expression
# To load completions for every new session, run:
PS> yourprogram completion powershell > yourprogram.ps1
# and source this file from your PowerShell profile.
`,
DisableFlagsInUseLine: true,
ValidArgs: []string{"bash", "zsh", "fish", "powershell"},
Args: cobra.ExactValidArgs(1),
Run: func(cmd *cobra.Command, args []string) {
switch args[0] {
case "bash":
cmd.Root().GenBashCompletion(os.Stdout)
case "zsh":
cmd.Root().GenZshCompletion(os.Stdout)
case "fish":
cmd.Root().GenFishCompletion(os.Stdout, true)
case "powershell":
cmd.Root().GenPowerShellCompletionWithDesc(os.Stdout)
}
},
}

155
cmd/get.go Normal file
View File

@@ -0,0 +1,155 @@
package cmd
import (
"errors"
"fmt"
"log"
"os"
"os/exec"
"strings"
md "github.com/JohannesKaufmann/html-to-markdown"
epub "github.com/bmaupin/go-epub"
cobra "github.com/spf13/cobra"
"papeer/book"
)
var quiet, stdout, recursive, include bool
var format, output, selector string
var delay int
var getCmd = &cobra.Command{
Use: "get",
Short: "Scrape URL content",
Args: func(cmd *cobra.Command, args []string) error {
if len(args) < 1 {
return errors.New("requires an URL argument")
}
formatEnum := map[string]bool{
"md": true,
"epub": true,
"mobi": true,
}
if formatEnum[format] != true {
return fmt.Errorf("invalid format specified: %s", format)
}
if format == "epub" || format == "mobi" {
if stdout {
return errors.New("cannot print EPUB/MOBI file to standard output")
}
}
if format == "mobi" {
if len(output) > 0 && strings.HasSuffix(output, ".mobi") == false {
output = fmt.Sprintf("%s.mobi", output)
}
}
if include && recursive == false {
return errors.New("cannot use include option if not in recursive mode")
}
return nil
},
Run: func(cmd *cobra.Command, args []string) {
url := args[0]
var b book.Book
if recursive {
b = book.NewBookFromURL(url, selector, include, delay)
} else {
c := book.NewChapterFromURL(url)
b = book.New(c.Name(), c.Author())
b.AddChapter(c)
}
// if quiet == false {
// metadata := fmt.Sprintf("URL : %s\nTitle : %s\nAuthor : %s\nLength : %d\nExcerpt : %s\nSiteName: %s\nImage : %s\nFavicon : %s", url, article.Title, article.Byline, article.Length, article.Excerpt, article.SiteName, article.Image, article.Favicon)
// fmt.Println(metadata)
// }
if len(output) == 0 {
// set default output
output = strings.ReplaceAll(b.Name(), " ", "_")
output = strings.ReplaceAll(output, "/", "")
output = fmt.Sprintf("%s.%s", output, format)
}
if format == "md" {
f, err := os.Create(output)
if err != nil {
log.Fatal(err)
}
defer f.Close()
for _, c := range b.Chapters() {
content, err := md.NewConverter("", true, nil).ConvertString(c.Content())
if err != nil {
log.Fatal(err)
}
if stdout {
fmt.Println(content)
} else {
_, err := f.WriteString(content)
if err != nil {
log.Fatal(err)
}
}
}
fmt.Printf("Markdown saved to \"%s\"\n", output)
}
if format == "epub" {
e := epub.NewEpub(b.Name())
e.SetAuthor(b.Author())
for _, c := range b.Chapters() {
e.AddSection(c.Content(), c.Name(), "", "")
}
err := e.Write(output)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Ebook saved to \"%s\"\n", output)
}
if format == "mobi" {
e := epub.NewEpub(b.Name())
e.SetAuthor(b.Author())
for _, chapter := range b.Chapters() {
e.AddSection(chapter.Content(), chapter.Name(), "", "")
}
outputEPUB := strings.ReplaceAll(output, ".mobi", ".epub")
err := e.Write(outputEPUB)
if err != nil {
log.Fatal(err)
}
exec.Command("kindlegen", outputEPUB).Run()
// exec command always return status 1 even if it fails
// if err != nil {
// log.Fatal(err)
// }
fmt.Printf("Ebook saved to \"%s\"\n", output)
err2 := os.Remove(outputEPUB)
if err2 != nil {
log.Fatal(err)
}
}
},
}

55
cmd/list.go Normal file
View File

@@ -0,0 +1,55 @@
package cmd
import (
"errors"
"fmt"
"strings"
colly "github.com/gocolly/colly/v2"
cobra "github.com/spf13/cobra"
)
var listCmd = &cobra.Command{
Use: "ls",
Short: "Print table of content",
Args: func(cmd *cobra.Command, args []string) error {
if len(args) < 1 {
return errors.New("requires an URL argument")
}
return nil
},
Run: func(cmd *cobra.Command, args []string) {
url := args[0]
c := colly.NewCollector()
// visit and count link classes
classesLinks := map[string][]map[string]string{}
classesCount := map[string]int{}
classMax := ""
c.OnHTML(selector, func(e *colly.HTMLElement) {
href := e.Attr("href")
text := strings.TrimSpace(e.Text)
class := e.Attr("class")
// if class != "" && text != "" {
classesLinks[class] = append(classesLinks[class], map[string]string{
"href": href,
"text": text,
})
classesCount[class]++
if classesCount[class] > classesCount[classMax] {
classMax = class
}
// }
})
c.Visit(url)
for index, link := range classesLinks[classMax] {
fmt.Printf("Chapter %d: %s %s\n", index+1, link["text"], link["href"])
}
},
}

37
cmd/root.go Normal file
View File

@@ -0,0 +1,37 @@
package cmd
import (
"fmt"
"os"
"github.com/spf13/cobra"
)
var rootCmd = &cobra.Command{
Use: "papeer",
Short: "Browse the web in the eink era",
Run: func(cmd *cobra.Command, args []string) {
// Do Stuff Here
},
}
func Execute() {
if err := rootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
func init() {
rootCmd.PersistentFlags().StringVarP(&format, "format", "f", "md", "file format [md, epub, mobi]")
rootCmd.PersistentFlags().StringVarP(&output, "output", "o", "", "output file")
rootCmd.PersistentFlags().StringVarP(&selector, "selector", "s", "", "table of content CSS selector")
rootCmd.PersistentFlags().BoolVarP(&recursive, "recursive", "r", false, "create one chapter per natigation item")
rootCmd.PersistentFlags().BoolVarP(&include, "include", "i", false, "include URL as first chapter, in resursive mode")
rootCmd.PersistentFlags().BoolVarP(&quiet, "quiet", "q", false, "do not show progress bars")
rootCmd.PersistentFlags().BoolVarP(&stdout, "stdout", "", false, "print to standard output")
rootCmd.PersistentFlags().IntVarP(&delay, "delay", "d", -1, "wait before downloading next chapter, in milliseconds")
rootCmd.AddCommand(getCmd)
rootCmd.AddCommand(listCmd)
}

5
cmd/utils.go Normal file
View File

@@ -0,0 +1,5 @@
package cmd
func getTableOfContent() {
}

19
cmd/version.go Normal file
View File

@@ -0,0 +1,19 @@
package cmd
import (
"fmt"
"github.com/spf13/cobra"
)
func init() {
rootCmd.AddCommand(versionCmd)
}
var versionCmd = &cobra.Command{
Use: "version",
Short: "Print the version number of papeer",
Run: func(cmd *cobra.Command, args []string) {
fmt.Println("papeer v0.0.1")
},
}