add threads option

This commit is contained in:
lapwat
2021-10-10 22:02:39 +02:00
parent 2d1d5a964a
commit d5971a2819
6 changed files with 28 additions and 12 deletions

View File

@@ -75,8 +75,9 @@ Flags:
-o, --offset int skip first chapters, in recursive mode
--output string output file
-r, --recursive create one chapter per natigation item
-s, --selector string table of content CSS selector
-s, --selector string table of content CSS selector, in resursive mode
--stdout print to standard output
-t, --threads int download concurrency, in recursive mode (default -1)
Use "papeer [command] --help" for more information about a command.
```

View File

@@ -19,8 +19,8 @@ func NewProgress(links []link) progress {
return fmt.Sprintf("Chapters %d / %d", b.Current(), len(links))
})
individuals := []*uiprogress.Bar{}
// hide individual bars if more than 50 chapters
individuals := []*uiprogress.Bar{}
if len(links) <= 50 {
for index, link := range links {
bar := uiprogress.AddBar(1)

View File

@@ -14,9 +14,9 @@ import (
colly "github.com/gocolly/colly/v2"
)
func NewBookFromURL(url, selector string, recursive, include, images bool, limit, offset, delay int) book {
func NewBookFromURL(url, selector string, recursive, include, images bool, limit, offset, delay, threads int) book {
if recursive {
chapters := tableOfContent(url, selector, limit, offset, delay, include, images)
chapters := tableOfContent(url, selector, limit, offset, delay, threads, include, images)
b := New(chapters[0].Name(), chapters[0].Author())
for _, c := range chapters {
@@ -56,7 +56,7 @@ func NewChapterFromURL(url string, images bool) chapter {
return chapter{article.Title, article.Byline, content}
}
func tableOfContent(url, selector string, limit, offset, delay int, include, images bool) []chapter {
func tableOfContent(url, selector string, limit, offset, delay, threads int, include, images bool) []chapter {
base, err := urllib.Parse(url)
if err != nil {
log.Fatal(err)
@@ -71,6 +71,7 @@ func tableOfContent(url, selector string, limit, offset, delay int, include, ima
progress := NewProgress(links)
if delay >= 0 {
// synchronous mode
for index, link := range links {
// and then use it to parse relative URLs
@@ -91,10 +92,19 @@ func tableOfContent(url, selector string, limit, offset, delay int, include, ima
}
} else {
// asynchronous mode
var wg sync.WaitGroup
if threads == -1 {
threads = len(links)
}
semaphore := make(chan bool, threads)
for index, l := range links {
wg.Add(1)
semaphore <- true
go func(index int, l link) {
defer wg.Done()
@@ -107,6 +117,7 @@ func tableOfContent(url, selector string, limit, offset, delay int, include, ima
chapters[index] = NewChapterFromURL(u.String(), images)
progress.Incr(index)
<-semaphore
}(index, l)
}
wg.Wait()

View File

@@ -17,7 +17,7 @@ import (
var stdout, recursive, include, images bool
var format, output, selector string
var limit, offset, delay int
var limit, offset, delay, threads int
var getCmd = &cobra.Command{
Use: "get",
@@ -68,11 +68,19 @@ var getCmd = &cobra.Command{
return errors.New("cannot use delay option if not in recursive mode")
}
if cmd.Flags().Changed("threads") && recursive == false {
return errors.New("cannot use threads option if not in recursive mode")
}
if cmd.Flags().Changed("delay") && cmd.Flags().Changed("threads") {
return errors.New("cannot use delay and threads options at the same time")
}
return nil
},
Run: func(cmd *cobra.Command, args []string) {
url := args[0]
b := book.NewBookFromURL(url, selector, recursive, include, images, limit, offset, delay)
b := book.NewBookFromURL(url, selector, recursive, include, images, limit, offset, delay, threads)
if len(output) == 0 {
// set default output

View File

@@ -33,6 +33,7 @@ func init() {
rootCmd.PersistentFlags().IntVarP(&limit, "limit", "l", -1, "limit number of chapters, in recursive mode")
rootCmd.PersistentFlags().IntVarP(&offset, "offset", "o", 0, "skip first chapters, in recursive mode")
rootCmd.PersistentFlags().IntVarP(&delay, "delay", "d", -1, "time to wait before downloading next chapter, in milliseconds")
rootCmd.PersistentFlags().IntVarP(&threads, "threads", "t", -1, "download concurrency, in recursive mode")
rootCmd.AddCommand(getCmd)
rootCmd.AddCommand(listCmd)

View File

@@ -1,5 +0,0 @@
package cmd
func getTableOfContent() {
}