mirror of
https://github.com/NohamR/papeer.git
synced 2026-05-24 20:00:45 +00:00
add threads option
This commit is contained in:
@@ -75,8 +75,9 @@ Flags:
|
||||
-o, --offset int skip first chapters, in recursive mode
|
||||
--output string output file
|
||||
-r, --recursive create one chapter per natigation item
|
||||
-s, --selector string table of content CSS selector
|
||||
-s, --selector string table of content CSS selector, in resursive mode
|
||||
--stdout print to standard output
|
||||
-t, --threads int download concurrency, in recursive mode (default -1)
|
||||
|
||||
Use "papeer [command] --help" for more information about a command.
|
||||
```
|
||||
|
||||
@@ -19,8 +19,8 @@ func NewProgress(links []link) progress {
|
||||
return fmt.Sprintf("Chapters %d / %d", b.Current(), len(links))
|
||||
})
|
||||
|
||||
individuals := []*uiprogress.Bar{}
|
||||
// hide individual bars if more than 50 chapters
|
||||
individuals := []*uiprogress.Bar{}
|
||||
if len(links) <= 50 {
|
||||
for index, link := range links {
|
||||
bar := uiprogress.AddBar(1)
|
||||
|
||||
@@ -14,9 +14,9 @@ import (
|
||||
colly "github.com/gocolly/colly/v2"
|
||||
)
|
||||
|
||||
func NewBookFromURL(url, selector string, recursive, include, images bool, limit, offset, delay int) book {
|
||||
func NewBookFromURL(url, selector string, recursive, include, images bool, limit, offset, delay, threads int) book {
|
||||
if recursive {
|
||||
chapters := tableOfContent(url, selector, limit, offset, delay, include, images)
|
||||
chapters := tableOfContent(url, selector, limit, offset, delay, threads, include, images)
|
||||
|
||||
b := New(chapters[0].Name(), chapters[0].Author())
|
||||
for _, c := range chapters {
|
||||
@@ -56,7 +56,7 @@ func NewChapterFromURL(url string, images bool) chapter {
|
||||
return chapter{article.Title, article.Byline, content}
|
||||
}
|
||||
|
||||
func tableOfContent(url, selector string, limit, offset, delay int, include, images bool) []chapter {
|
||||
func tableOfContent(url, selector string, limit, offset, delay, threads int, include, images bool) []chapter {
|
||||
base, err := urllib.Parse(url)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
@@ -71,6 +71,7 @@ func tableOfContent(url, selector string, limit, offset, delay int, include, ima
|
||||
progress := NewProgress(links)
|
||||
|
||||
if delay >= 0 {
|
||||
// synchronous mode
|
||||
|
||||
for index, link := range links {
|
||||
// and then use it to parse relative URLs
|
||||
@@ -91,10 +92,19 @@ func tableOfContent(url, selector string, limit, offset, delay int, include, ima
|
||||
}
|
||||
|
||||
} else {
|
||||
// asynchronous mode
|
||||
var wg sync.WaitGroup
|
||||
|
||||
if threads == -1 {
|
||||
threads = len(links)
|
||||
}
|
||||
semaphore := make(chan bool, threads)
|
||||
|
||||
for index, l := range links {
|
||||
|
||||
wg.Add(1)
|
||||
semaphore <- true
|
||||
|
||||
go func(index int, l link) {
|
||||
defer wg.Done()
|
||||
|
||||
@@ -107,6 +117,7 @@ func tableOfContent(url, selector string, limit, offset, delay int, include, ima
|
||||
chapters[index] = NewChapterFromURL(u.String(), images)
|
||||
progress.Incr(index)
|
||||
|
||||
<-semaphore
|
||||
}(index, l)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
12
cmd/get.go
12
cmd/get.go
@@ -17,7 +17,7 @@ import (
|
||||
|
||||
var stdout, recursive, include, images bool
|
||||
var format, output, selector string
|
||||
var limit, offset, delay int
|
||||
var limit, offset, delay, threads int
|
||||
|
||||
var getCmd = &cobra.Command{
|
||||
Use: "get",
|
||||
@@ -68,11 +68,19 @@ var getCmd = &cobra.Command{
|
||||
return errors.New("cannot use delay option if not in recursive mode")
|
||||
}
|
||||
|
||||
if cmd.Flags().Changed("threads") && recursive == false {
|
||||
return errors.New("cannot use threads option if not in recursive mode")
|
||||
}
|
||||
|
||||
if cmd.Flags().Changed("delay") && cmd.Flags().Changed("threads") {
|
||||
return errors.New("cannot use delay and threads options at the same time")
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
url := args[0]
|
||||
b := book.NewBookFromURL(url, selector, recursive, include, images, limit, offset, delay)
|
||||
b := book.NewBookFromURL(url, selector, recursive, include, images, limit, offset, delay, threads)
|
||||
|
||||
if len(output) == 0 {
|
||||
// set default output
|
||||
|
||||
@@ -33,6 +33,7 @@ func init() {
|
||||
rootCmd.PersistentFlags().IntVarP(&limit, "limit", "l", -1, "limit number of chapters, in recursive mode")
|
||||
rootCmd.PersistentFlags().IntVarP(&offset, "offset", "o", 0, "skip first chapters, in recursive mode")
|
||||
rootCmd.PersistentFlags().IntVarP(&delay, "delay", "d", -1, "time to wait before downloading next chapter, in milliseconds")
|
||||
rootCmd.PersistentFlags().IntVarP(&threads, "threads", "t", -1, "download concurrency, in recursive mode")
|
||||
|
||||
rootCmd.AddCommand(getCmd)
|
||||
rootCmd.AddCommand(listCmd)
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
package cmd
|
||||
|
||||
func getTableOfContent() {
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user