diff --git a/README.md b/README.md index 79c5af0..be53e9c 100644 --- a/README.md +++ b/README.md @@ -141,7 +141,7 @@ go install github.com/lapwat/papeer@latest ```sh # use platform=darwin for MacOS platform=linux -release=0.6.0 +release=0.6.1 # download and extract curl -L https://github.com/lapwat/papeer/releases/download/v$release/papeer-v$release-$platform-amd64.tar.gz > papeer.tar.gz diff --git a/book/link.go b/book/link.go index 8b43be1..e776ce4 100644 --- a/book/link.go +++ b/book/link.go @@ -1,18 +1,10 @@ package book type link struct { - href string - text string + Href string `json:"href"` + Text string `json:"name"` } func NewLink(href, text string) link { return link{href, text} } - -func (c link) Href() string { - return c.href -} - -func (c link) Text() string { - return c.text -} diff --git a/book/progress.go b/book/progress.go index 9865308..9dfa6e1 100644 --- a/book/progress.go +++ b/book/progress.go @@ -27,7 +27,7 @@ func NewProgress(links []link, parent string, depth int) progress { if len(links) <= 50 { for index, link := range links { bar := uiprogress.AddBar(1) - barText := fmt.Sprintf("%v#%v %v", indent, index+1, link.Text()) + barText := fmt.Sprintf("%v#%v %v", indent, index+1, link.Text) bar.AppendFunc(func(b *uiprogress.Bar) string { return barText }) diff --git a/book/scraper.go b/book/scraper.go index af076d0..2b8004d 100644 --- a/book/scraper.go +++ b/book/scraper.go @@ -156,12 +156,12 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int, // synchronous mode for index, link := range links { // and then use it to parse relative URLs - u, err := base.Parse(link.href) + u, err := base.Parse(link.Href) if err != nil { log.Fatal(err) } - sc := NewChapterFromURL(u.String(), link.text, configs[1:], index, p.UpdateName) + sc := NewChapterFromURL(u.String(), link.Text, configs[1:], index, p.UpdateName) subchapters[index] = sc if config.Quiet == false { p.Increment(index) @@ -189,12 +189,12 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int, defer wg.Done() // and then use it to parse relative URLs - u, err := base.Parse(l.href) + u, err := base.Parse(l.Href) if err != nil { log.Fatal(err) } - sc := NewChapterFromURL(u.String(), l.text, configs[1:], index, p.UpdateName) + sc := NewChapterFromURL(u.String(), l.Text, configs[1:], index, p.UpdateName) subchapters[index] = sc if config.Quiet == false { @@ -279,12 +279,12 @@ func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, q for index, l := range links { // and then use it to parse relative URLs - u, err := base.Parse(l.href) + u, err := base.Parse(l.Href) if err != nil { log.Fatal(err) } - chapters[index] = NewChapterFromURL(u.String(), l.text, []*ScrapeConfig{subConfig}, 0, func(index int, name string) {}) + chapters[index] = NewChapterFromURL(u.String(), l.Text, []*ScrapeConfig{subConfig}, 0, func(index int, name string) {}) if quiet == false { p.Increment(index) @@ -317,12 +317,12 @@ func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, q defer wg.Done() // and then use it to parse relative URLs - u, err := base.Parse(l.href) + u, err := base.Parse(l.Href) if err != nil { log.Fatal(err) } - chapters[index] = NewChapterFromURL(u.String(), l.text, []*ScrapeConfig{subConfig}, 0, func(index int, name string) {}) + chapters[index] = NewChapterFromURL(u.String(), l.Text, []*ScrapeConfig{subConfig}, 0, func(index int, name string) {}) if quiet == false { p.Increment(index) @@ -361,8 +361,6 @@ func GetLinks(url *urllib.URL, selector string, limit, offset int, reverse, incl parser := gofeed.NewParser() feed, err := parser.ParseURL(url.String()) - fmt.Println(feed, url.String(), err) - if err == nil { // RSS feed diff --git a/cmd/get.go b/cmd/get.go index c46d2e8..b2a2693 100644 --- a/cmd/get.go +++ b/cmd/get.go @@ -21,17 +21,15 @@ type GetOptions struct { output string stdout bool images bool - // ImagesOnly bool - quiet bool + quiet bool - Selector []string - depth int - limit int - offset int - reverse bool - delay int - threads int - // includeUrl bool + Selector []string + depth int + limit int + offset int + reverse bool + delay int + threads int include bool useLinkName bool } @@ -41,13 +39,13 @@ var getOpts *GetOptions func init() { getOpts = &GetOptions{} - getCmd.PersistentFlags().StringVarP(&getOpts.name, "name", "n", "", "book name (default: page title)") - getCmd.PersistentFlags().StringVarP(&getOpts.author, "author", "a", "", "book author") - getCmd.PersistentFlags().StringVarP(&getOpts.Format, "format", "f", "md", "file format [md, html, epub, mobi]") - getCmd.PersistentFlags().StringVarP(&getOpts.output, "output", "", "", "file name (default: book name)") - getCmd.PersistentFlags().BoolVarP(&getOpts.stdout, "stdout", "", false, "print to standard output") - getCmd.PersistentFlags().BoolVarP(&getOpts.images, "images", "", false, "retrieve images only") - getCmd.PersistentFlags().BoolVarP(&getOpts.quiet, "quiet", "q", false, "hide progress bar") + getCmd.Flags().StringVarP(&getOpts.name, "name", "n", "", "book name (default: page title)") + getCmd.Flags().StringVarP(&getOpts.author, "author", "a", "", "book author") + getCmd.Flags().StringVarP(&getOpts.Format, "format", "f", "md", "file format [md, html, epub, mobi]") + getCmd.Flags().StringVarP(&getOpts.output, "output", "", "", "file name (default: book name)") + getCmd.Flags().BoolVarP(&getOpts.stdout, "stdout", "", false, "print to standard output") + getCmd.Flags().BoolVarP(&getOpts.images, "images", "", false, "retrieve images only") + getCmd.Flags().BoolVarP(&getOpts.quiet, "quiet", "q", false, "hide progress bar") // common with list command getCmd.Flags().StringSliceVarP(&getOpts.Selector, "selector", "s", []string{}, "table of contents CSS selector") @@ -72,13 +70,13 @@ var getCmd = &cobra.Command{ return errors.New("requires an URL argument") } + // check provided format is in list formatEnum := map[string]bool{ "md": true, "html": true, "epub": true, "mobi": true, } - if formatEnum[getOpts.Format] != true { return fmt.Errorf("invalid format specified: %s", getOpts.Format) } diff --git a/cmd/list.go b/cmd/list.go index cd10e85..b7e5401 100644 --- a/cmd/list.go +++ b/cmd/list.go @@ -1,6 +1,7 @@ package cmd import ( + "encoding/json" "errors" "fmt" "log" @@ -17,14 +18,15 @@ import ( type ListOptions struct { // url string - Selector []string - depth int - limit int - offset int - reverse bool - delay int - threads int - // includeUrl bool + output string + + Selector []string + depth int + limit int + offset int + reverse bool + delay int + threads int include bool useLinkName bool } @@ -34,11 +36,13 @@ var listOpts *ListOptions func init() { listOpts = &ListOptions{} + listCmd.Flags().StringVarP(&listOpts.output, "output", "o", "table", "file format [table, json]") + // common with get command listCmd.Flags().StringSliceVarP(&listOpts.Selector, "selector", "s", []string{}, "table of contents CSS selector") listCmd.Flags().IntVarP(&listOpts.depth, "depth", "d", 0, "scraping depth") listCmd.Flags().IntVarP(&listOpts.limit, "limit", "l", -1, "limit number of chapters, use with depth/selector") - listCmd.Flags().IntVarP(&listOpts.offset, "offset", "o", 0, "skip first chapters, use with depth/selector") + listCmd.Flags().IntVarP(&listOpts.offset, "offset", "", 0, "skip first chapters, use with depth/selector") listCmd.Flags().BoolVarP(&listOpts.reverse, "reverse", "r", false, "reverse chapter order") listCmd.Flags().IntVarP(&listOpts.delay, "delay", "", -1, "time in milliseconds to wait before downloading next chapter, use with depth/selector") listCmd.Flags().IntVarP(&listOpts.threads, "threads", "t", -1, "download concurrency, use with depth/selector") @@ -57,6 +61,16 @@ var listCmd = &cobra.Command{ if len(args) < 1 { return errors.New("requires an URL argument") } + + // check provided output is in list + outputEnum := map[string]bool{ + "table": true, + "json": true, + } + if outputEnum[listOpts.output] != true { + return fmt.Errorf("invalid output specified: %s", listOpts.output) + } + return nil }, Run: func(cmd *cobra.Command, args []string) { @@ -74,34 +88,52 @@ var listCmd = &cobra.Command{ log.Fatal(err) } - t := table.NewWriter() - t.SetOutputMirror(os.Stdout) - t.Style().Options.DrawBorder = false - t.Style().Options.SeparateColumns = false - t.Style().Options.SeparateHeader = false + switch listOpts.output { - t.SetTitle(home.Name()) + // render as table + case "table": + t := table.NewWriter() + t.SetOutputMirror(os.Stdout) + t.Style().Options.DrawBorder = false + t.Style().Options.SeparateColumns = false + t.Style().Options.SeparateHeader = false - // format selector path - pathArray := strings.Split(path, "<") - // reverse path - for i, j := 0, len(pathArray)-1; i < j; i, j = i+1, j-1 { - pathArray[i], pathArray[j] = pathArray[j], pathArray[i] - } - pathFormatted := strings.Join(pathArray, ">") + t.SetTitle(home.Name()) - t.AppendHeader(table.Row{"#", "Name", fmt.Sprintf("Url [%s]", pathFormatted)}) + // format selector path + pathArray := strings.Split(path, "<") + // reverse path + for i, j := 0, len(pathArray)-1; i < j; i, j = i+1, j-1 { + pathArray[i], pathArray[j] = pathArray[j], pathArray[i] + } + pathFormatted := strings.Join(pathArray, ">") - for index, link := range links { - u, err := base.Parse(link.Href()) + t.AppendHeader(table.Row{"#", "Name", fmt.Sprintf("Url [%s]", pathFormatted)}) + + for index, link := range links { + u, err := base.Parse(link.Href) + if err != nil { + log.Fatal(err) + } + + t.AppendRow([]interface{}{index + 1, link.Text, u.String()}) + } + + t.Render() + + // render as json + case "json": + book := make(map[string]interface{}) + book["name"] = home.Name() + book["chapters"] = links + + bookJson, err := json.Marshal(book) if err != nil { log.Fatal(err) } - t.AppendRow([]interface{}{index + 1, link.Text(), u.String()}) + fmt.Println(string(bookJson)) } - t.Render() - }, } diff --git a/cmd/version.go b/cmd/version.go index 34b15b3..4739fa0 100644 --- a/cmd/version.go +++ b/cmd/version.go @@ -14,6 +14,6 @@ var versionCmd = &cobra.Command{ Use: "version", Short: "Print the version number of papeer", Run: func(cmd *cobra.Command, args []string) { - fmt.Println("papeer v0.6.0") + fmt.Println("papeer v0.6.1") }, }