limit option, docs

This commit is contained in:
lapwat
2021-09-15 20:25:23 +02:00
parent bb1df3b6a3
commit ac1fb3dd51
5 changed files with 99 additions and 57 deletions

View File

@@ -17,7 +17,7 @@ import (
var quiet, stdout, recursive, include bool
var format, output, selector string
var delay int
var limit, delay int
var getCmd = &cobra.Command{
Use: "get",
@@ -48,10 +48,22 @@ var getCmd = &cobra.Command{
}
}
if include && recursive == false {
if cmd.Flags().Changed("selector") && recursive == false {
return errors.New("cannot use selector option if not in recursive mode")
}
if cmd.Flags().Changed("include") && recursive == false {
return errors.New("cannot use include option if not in recursive mode")
}
if cmd.Flags().Changed("limit") && recursive == false {
return errors.New("cannot use limit option if not in recursive mode")
}
if cmd.Flags().Changed("delay") && recursive == false {
return errors.New("cannot use delay option if not in recursive mode")
}
return nil
},
Run: func(cmd *cobra.Command, args []string) {
@@ -59,7 +71,7 @@ var getCmd = &cobra.Command{
var b book.Book
if recursive {
b = book.NewBookFromURL(url, selector, include, delay)
b = book.NewBookFromURL(url, selector, include, limit, delay)
} else {
c := book.NewChapterFromURL(url)
b = book.New(c.Name(), c.Author())
@@ -104,7 +116,9 @@ var getCmd = &cobra.Command{
}
}
fmt.Printf("Markdown saved to \"%s\"\n", output)
if stdout == false {
fmt.Printf("Markdown saved to \"%s\"\n", output)
}
}
if format == "epub" {

View File

@@ -3,6 +3,8 @@ package cmd
import (
"errors"
"fmt"
"log"
urllib "net/url"
"strings"
colly "github.com/gocolly/colly/v2"
@@ -19,20 +21,27 @@ var listCmd = &cobra.Command{
return nil
},
Run: func(cmd *cobra.Command, args []string) {
url := args[0]
c := colly.NewCollector()
base, err := urllib.Parse(args[0])
if err != nil {
log.Fatal(err)
}
if selector == "" {
selector = "a"
}
// visit and count link classes
classesLinks := map[string][]map[string]string{}
classesCount := map[string]int{}
classMax := ""
c := colly.NewCollector()
c.OnHTML(selector, func(e *colly.HTMLElement) {
href := e.Attr("href")
text := strings.TrimSpace(e.Text)
class := e.Attr("class")
// if class != "" && text != "" {
if cmd.Flags().Changed("selector") || class != "" && text != "" {
classesLinks[class] = append(classesLinks[class], map[string]string{
"href": href,
"text": text,
@@ -43,12 +52,17 @@ var listCmd = &cobra.Command{
if classesCount[class] > classesCount[classMax] {
classMax = class
}
// }
}
})
c.Visit(url)
c.Visit(base.String())
for index, link := range classesLinks[classMax] {
fmt.Printf("Chapter %d: %s %s\n", index+1, link["text"], link["href"])
u, err := base.Parse(link["href"])
if err != nil {
log.Fatal(err)
}
fmt.Printf("Chapter %d: %s %s\n", index+1, link["text"], u.String())
}
},

View File

@@ -28,9 +28,10 @@ func init() {
rootCmd.PersistentFlags().StringVarP(&selector, "selector", "s", "", "table of content CSS selector")
rootCmd.PersistentFlags().BoolVarP(&recursive, "recursive", "r", false, "create one chapter per natigation item")
rootCmd.PersistentFlags().BoolVarP(&include, "include", "i", false, "include URL as first chapter, in resursive mode")
rootCmd.PersistentFlags().BoolVarP(&quiet, "quiet", "q", false, "do not show progress bars")
rootCmd.PersistentFlags().BoolVarP(&quiet, "quiet", "q", false, "do not show logs")
rootCmd.PersistentFlags().BoolVarP(&stdout, "stdout", "", false, "print to standard output")
rootCmd.PersistentFlags().IntVarP(&delay, "delay", "d", -1, "wait before downloading next chapter, in milliseconds")
rootCmd.PersistentFlags().IntVarP(&limit, "limit", "l", -1, "limit number of chapters, in recursive mode")
rootCmd.PersistentFlags().IntVarP(&delay, "delay", "d", -1, "time to wait before downloading next chapter, in milliseconds")
rootCmd.AddCommand(getCmd)
rootCmd.AddCommand(listCmd)