mirror of
https://github.com/NohamR/papeer.git
synced 2026-05-25 04:17:19 +00:00
add threads option
This commit is contained in:
@@ -75,8 +75,9 @@ Flags:
|
|||||||
-o, --offset int skip first chapters, in recursive mode
|
-o, --offset int skip first chapters, in recursive mode
|
||||||
--output string output file
|
--output string output file
|
||||||
-r, --recursive create one chapter per natigation item
|
-r, --recursive create one chapter per natigation item
|
||||||
-s, --selector string table of content CSS selector
|
-s, --selector string table of content CSS selector, in resursive mode
|
||||||
--stdout print to standard output
|
--stdout print to standard output
|
||||||
|
-t, --threads int download concurrency, in recursive mode (default -1)
|
||||||
|
|
||||||
Use "papeer [command] --help" for more information about a command.
|
Use "papeer [command] --help" for more information about a command.
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ func NewProgress(links []link) progress {
|
|||||||
return fmt.Sprintf("Chapters %d / %d", b.Current(), len(links))
|
return fmt.Sprintf("Chapters %d / %d", b.Current(), len(links))
|
||||||
})
|
})
|
||||||
|
|
||||||
individuals := []*uiprogress.Bar{}
|
|
||||||
// hide individual bars if more than 50 chapters
|
// hide individual bars if more than 50 chapters
|
||||||
|
individuals := []*uiprogress.Bar{}
|
||||||
if len(links) <= 50 {
|
if len(links) <= 50 {
|
||||||
for index, link := range links {
|
for index, link := range links {
|
||||||
bar := uiprogress.AddBar(1)
|
bar := uiprogress.AddBar(1)
|
||||||
|
|||||||
@@ -14,9 +14,9 @@ import (
|
|||||||
colly "github.com/gocolly/colly/v2"
|
colly "github.com/gocolly/colly/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewBookFromURL(url, selector string, recursive, include, images bool, limit, offset, delay int) book {
|
func NewBookFromURL(url, selector string, recursive, include, images bool, limit, offset, delay, threads int) book {
|
||||||
if recursive {
|
if recursive {
|
||||||
chapters := tableOfContent(url, selector, limit, offset, delay, include, images)
|
chapters := tableOfContent(url, selector, limit, offset, delay, threads, include, images)
|
||||||
|
|
||||||
b := New(chapters[0].Name(), chapters[0].Author())
|
b := New(chapters[0].Name(), chapters[0].Author())
|
||||||
for _, c := range chapters {
|
for _, c := range chapters {
|
||||||
@@ -56,7 +56,7 @@ func NewChapterFromURL(url string, images bool) chapter {
|
|||||||
return chapter{article.Title, article.Byline, content}
|
return chapter{article.Title, article.Byline, content}
|
||||||
}
|
}
|
||||||
|
|
||||||
func tableOfContent(url, selector string, limit, offset, delay int, include, images bool) []chapter {
|
func tableOfContent(url, selector string, limit, offset, delay, threads int, include, images bool) []chapter {
|
||||||
base, err := urllib.Parse(url)
|
base, err := urllib.Parse(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
@@ -71,6 +71,7 @@ func tableOfContent(url, selector string, limit, offset, delay int, include, ima
|
|||||||
progress := NewProgress(links)
|
progress := NewProgress(links)
|
||||||
|
|
||||||
if delay >= 0 {
|
if delay >= 0 {
|
||||||
|
// synchronous mode
|
||||||
|
|
||||||
for index, link := range links {
|
for index, link := range links {
|
||||||
// and then use it to parse relative URLs
|
// and then use it to parse relative URLs
|
||||||
@@ -91,10 +92,19 @@ func tableOfContent(url, selector string, limit, offset, delay int, include, ima
|
|||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
// asynchronous mode
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
|
if threads == -1 {
|
||||||
|
threads = len(links)
|
||||||
|
}
|
||||||
|
semaphore := make(chan bool, threads)
|
||||||
|
|
||||||
for index, l := range links {
|
for index, l := range links {
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
|
semaphore <- true
|
||||||
|
|
||||||
go func(index int, l link) {
|
go func(index int, l link) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
@@ -107,6 +117,7 @@ func tableOfContent(url, selector string, limit, offset, delay int, include, ima
|
|||||||
chapters[index] = NewChapterFromURL(u.String(), images)
|
chapters[index] = NewChapterFromURL(u.String(), images)
|
||||||
progress.Incr(index)
|
progress.Incr(index)
|
||||||
|
|
||||||
|
<-semaphore
|
||||||
}(index, l)
|
}(index, l)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|||||||
12
cmd/get.go
12
cmd/get.go
@@ -17,7 +17,7 @@ import (
|
|||||||
|
|
||||||
var stdout, recursive, include, images bool
|
var stdout, recursive, include, images bool
|
||||||
var format, output, selector string
|
var format, output, selector string
|
||||||
var limit, offset, delay int
|
var limit, offset, delay, threads int
|
||||||
|
|
||||||
var getCmd = &cobra.Command{
|
var getCmd = &cobra.Command{
|
||||||
Use: "get",
|
Use: "get",
|
||||||
@@ -68,11 +68,19 @@ var getCmd = &cobra.Command{
|
|||||||
return errors.New("cannot use delay option if not in recursive mode")
|
return errors.New("cannot use delay option if not in recursive mode")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if cmd.Flags().Changed("threads") && recursive == false {
|
||||||
|
return errors.New("cannot use threads option if not in recursive mode")
|
||||||
|
}
|
||||||
|
|
||||||
|
if cmd.Flags().Changed("delay") && cmd.Flags().Changed("threads") {
|
||||||
|
return errors.New("cannot use delay and threads options at the same time")
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
Run: func(cmd *cobra.Command, args []string) {
|
Run: func(cmd *cobra.Command, args []string) {
|
||||||
url := args[0]
|
url := args[0]
|
||||||
b := book.NewBookFromURL(url, selector, recursive, include, images, limit, offset, delay)
|
b := book.NewBookFromURL(url, selector, recursive, include, images, limit, offset, delay, threads)
|
||||||
|
|
||||||
if len(output) == 0 {
|
if len(output) == 0 {
|
||||||
// set default output
|
// set default output
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ func init() {
|
|||||||
rootCmd.PersistentFlags().IntVarP(&limit, "limit", "l", -1, "limit number of chapters, in recursive mode")
|
rootCmd.PersistentFlags().IntVarP(&limit, "limit", "l", -1, "limit number of chapters, in recursive mode")
|
||||||
rootCmd.PersistentFlags().IntVarP(&offset, "offset", "o", 0, "skip first chapters, in recursive mode")
|
rootCmd.PersistentFlags().IntVarP(&offset, "offset", "o", 0, "skip first chapters, in recursive mode")
|
||||||
rootCmd.PersistentFlags().IntVarP(&delay, "delay", "d", -1, "time to wait before downloading next chapter, in milliseconds")
|
rootCmd.PersistentFlags().IntVarP(&delay, "delay", "d", -1, "time to wait before downloading next chapter, in milliseconds")
|
||||||
|
rootCmd.PersistentFlags().IntVarP(&threads, "threads", "t", -1, "download concurrency, in recursive mode")
|
||||||
|
|
||||||
rootCmd.AddCommand(getCmd)
|
rootCmd.AddCommand(getCmd)
|
||||||
rootCmd.AddCommand(listCmd)
|
rootCmd.AddCommand(listCmd)
|
||||||
|
|||||||
@@ -1,5 +0,0 @@
|
|||||||
package cmd
|
|
||||||
|
|
||||||
func getTableOfContent() {
|
|
||||||
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user