add threads option

This commit is contained in:
lapwat
2021-10-10 22:02:39 +02:00
parent 2d1d5a964a
commit d5971a2819
6 changed files with 28 additions and 12 deletions

View File

@@ -75,8 +75,9 @@ Flags:
-o, --offset int skip first chapters, in recursive mode -o, --offset int skip first chapters, in recursive mode
--output string output file --output string output file
-r, --recursive create one chapter per natigation item -r, --recursive create one chapter per natigation item
-s, --selector string table of content CSS selector -s, --selector string table of content CSS selector, in resursive mode
--stdout print to standard output --stdout print to standard output
-t, --threads int download concurrency, in recursive mode (default -1)
Use "papeer [command] --help" for more information about a command. Use "papeer [command] --help" for more information about a command.
``` ```

View File

@@ -19,8 +19,8 @@ func NewProgress(links []link) progress {
return fmt.Sprintf("Chapters %d / %d", b.Current(), len(links)) return fmt.Sprintf("Chapters %d / %d", b.Current(), len(links))
}) })
individuals := []*uiprogress.Bar{}
// hide individual bars if more than 50 chapters // hide individual bars if more than 50 chapters
individuals := []*uiprogress.Bar{}
if len(links) <= 50 { if len(links) <= 50 {
for index, link := range links { for index, link := range links {
bar := uiprogress.AddBar(1) bar := uiprogress.AddBar(1)

View File

@@ -14,9 +14,9 @@ import (
colly "github.com/gocolly/colly/v2" colly "github.com/gocolly/colly/v2"
) )
func NewBookFromURL(url, selector string, recursive, include, images bool, limit, offset, delay int) book { func NewBookFromURL(url, selector string, recursive, include, images bool, limit, offset, delay, threads int) book {
if recursive { if recursive {
chapters := tableOfContent(url, selector, limit, offset, delay, include, images) chapters := tableOfContent(url, selector, limit, offset, delay, threads, include, images)
b := New(chapters[0].Name(), chapters[0].Author()) b := New(chapters[0].Name(), chapters[0].Author())
for _, c := range chapters { for _, c := range chapters {
@@ -56,7 +56,7 @@ func NewChapterFromURL(url string, images bool) chapter {
return chapter{article.Title, article.Byline, content} return chapter{article.Title, article.Byline, content}
} }
func tableOfContent(url, selector string, limit, offset, delay int, include, images bool) []chapter { func tableOfContent(url, selector string, limit, offset, delay, threads int, include, images bool) []chapter {
base, err := urllib.Parse(url) base, err := urllib.Parse(url)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
@@ -71,6 +71,7 @@ func tableOfContent(url, selector string, limit, offset, delay int, include, ima
progress := NewProgress(links) progress := NewProgress(links)
if delay >= 0 { if delay >= 0 {
// synchronous mode
for index, link := range links { for index, link := range links {
// and then use it to parse relative URLs // and then use it to parse relative URLs
@@ -91,10 +92,19 @@ func tableOfContent(url, selector string, limit, offset, delay int, include, ima
} }
} else { } else {
// asynchronous mode
var wg sync.WaitGroup var wg sync.WaitGroup
if threads == -1 {
threads = len(links)
}
semaphore := make(chan bool, threads)
for index, l := range links { for index, l := range links {
wg.Add(1) wg.Add(1)
semaphore <- true
go func(index int, l link) { go func(index int, l link) {
defer wg.Done() defer wg.Done()
@@ -107,6 +117,7 @@ func tableOfContent(url, selector string, limit, offset, delay int, include, ima
chapters[index] = NewChapterFromURL(u.String(), images) chapters[index] = NewChapterFromURL(u.String(), images)
progress.Incr(index) progress.Incr(index)
<-semaphore
}(index, l) }(index, l)
} }
wg.Wait() wg.Wait()

View File

@@ -17,7 +17,7 @@ import (
var stdout, recursive, include, images bool var stdout, recursive, include, images bool
var format, output, selector string var format, output, selector string
var limit, offset, delay int var limit, offset, delay, threads int
var getCmd = &cobra.Command{ var getCmd = &cobra.Command{
Use: "get", Use: "get",
@@ -68,11 +68,19 @@ var getCmd = &cobra.Command{
return errors.New("cannot use delay option if not in recursive mode") return errors.New("cannot use delay option if not in recursive mode")
} }
if cmd.Flags().Changed("threads") && recursive == false {
return errors.New("cannot use threads option if not in recursive mode")
}
if cmd.Flags().Changed("delay") && cmd.Flags().Changed("threads") {
return errors.New("cannot use delay and threads options at the same time")
}
return nil return nil
}, },
Run: func(cmd *cobra.Command, args []string) { Run: func(cmd *cobra.Command, args []string) {
url := args[0] url := args[0]
b := book.NewBookFromURL(url, selector, recursive, include, images, limit, offset, delay) b := book.NewBookFromURL(url, selector, recursive, include, images, limit, offset, delay, threads)
if len(output) == 0 { if len(output) == 0 {
// set default output // set default output

View File

@@ -33,6 +33,7 @@ func init() {
rootCmd.PersistentFlags().IntVarP(&limit, "limit", "l", -1, "limit number of chapters, in recursive mode") rootCmd.PersistentFlags().IntVarP(&limit, "limit", "l", -1, "limit number of chapters, in recursive mode")
rootCmd.PersistentFlags().IntVarP(&offset, "offset", "o", 0, "skip first chapters, in recursive mode") rootCmd.PersistentFlags().IntVarP(&offset, "offset", "o", 0, "skip first chapters, in recursive mode")
rootCmd.PersistentFlags().IntVarP(&delay, "delay", "d", -1, "time to wait before downloading next chapter, in milliseconds") rootCmd.PersistentFlags().IntVarP(&delay, "delay", "d", -1, "time to wait before downloading next chapter, in milliseconds")
rootCmd.PersistentFlags().IntVarP(&threads, "threads", "t", -1, "download concurrency, in recursive mode")
rootCmd.AddCommand(getCmd) rootCmd.AddCommand(getCmd)
rootCmd.AddCommand(listCmd) rootCmd.AddCommand(listCmd)

View File

@@ -1,5 +0,0 @@
package cmd
func getTableOfContent() {
}