chain selctors, depth & quiet options, split main commands

2026-05-25 20:00:47 +00:00 · 2022-02-04 19:42:27 +01:00
parent 26b144fb73
commit 4b760c9562
10 changed files with 528 additions and 279 deletions
--- a/book/format.go
+++ b/book/format.go
@@ -21,10 +21,10 @@ func Filename(name string) string {
 	return filename
 }

-func ToMarkdown(c chapter) string {
+func ToMarkdownString(c chapter) string {
 	markdown := ""

-	if c.config.include {
+	if c.config.Include {
 		// title
 		markdown += fmt.Sprintf("%s\n", c.Name())
 		markdown += fmt.Sprintf("%s\n\n", strings.Repeat("=", len(c.Name())))
@@ -39,12 +39,33 @@ func ToMarkdown(c chapter) string {

 	for _, sc := range c.SubChapters() {
 		// subchapters content
-		markdown += fmt.Sprintf("%s\n\n\n", ToMarkdown(sc))
+		markdown += fmt.Sprintf("%s\n\n\n", ToMarkdownString(sc))
 	}

 	return markdown
 }

+func ToMarkdown(c chapter, filename string) string {
+	if len(filename) == 0 {
+		filename = fmt.Sprintf("%s.md", Filename(c.Name()))
+	}
+
+	markdown := ToMarkdownString(c)
+
+	// write to file
+	f, err := os.Create(filename)
+	if err != nil {
+		log.Fatal(err)
+	}
+	_, err2 := f.WriteString(markdown)
+	if err2 != nil {
+		log.Fatal(err2)
+	}
+	f.Close()
+
+	return filename
+}
+
 func ToEpub(c chapter, filename string) string {
 	if len(filename) == 0 {
 		filename = fmt.Sprintf("%s.epub", Filename(c.Name()))
@@ -67,9 +88,9 @@ func ToEpub(c chapter, filename string) string {
 func AppendToEpub(e *epub.Epub, c chapter) {
 	content := ""

-	if c.config.include {
+	if c.config.Include {

-		if c.config.imagesOnly == false {
+		if c.config.ImagesOnly == false {
 			content = c.Content()
 		}

@@ -85,7 +106,7 @@ func AppendToEpub(e *epub.Epub, c chapter) {
 			src = strings.Split(src, "?")[0] // remove query part
 			imagePath, _ := e.AddImage(src, "")

-			if c.config.imagesOnly {
+			if c.config.ImagesOnly {
 				imageTag, _ := goquery.OuterHtml(s)
 				content += strings.Replace(imageTag, src, imagePath, 1)
 			} else {
@@ -94,8 +115,8 @@ func AppendToEpub(e *epub.Epub, c chapter) {
 		})

 		html := ""
-		// add title only if imagesOnly = false
-		if c.config.imagesOnly == false {
+		// add title only if ImagesOnly = false
+		if c.config.ImagesOnly == false {
 			html += fmt.Sprintf("<h1>%s</h1>", c.Name())
 		}
 		html += content
--- a/book/format_test.go
+++ b/book/format_test.go
@@ -17,11 +17,11 @@ func TestFilename(t *testing.T) {

 }

-func TestToMarkdown(t *testing.T) {
+func TestToMarkdownString(t *testing.T) {

-	c := NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})

-	got := ToMarkdown(c)
+	got := ToMarkdownString(c)
 	want := "Books\n=====\n\n- [Discours de la Méthode](https://books.lapw.at/posts/ren%C3%A9-descartes-discours-de-la-m%C3%A9thode/)clock 98 min read -\n1637\n\n- [The Twelve-Factor App](https://books.lapw.at/posts/adam-wiggins-the-twelve-factor-app/)clock 22 min read -\n2011\n\n\n"

 	if got != want {
@@ -30,12 +30,44 @@ func TestToMarkdown(t *testing.T) {

 }

+func TestToMarkdown(t *testing.T) {
+
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+	ToMarkdown(c, "")
+
+	filename := "Books.md"
+	if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
+		t.Errorf("%s does not exist: %v", filename, err)
+	} else {
+		if err := os.Remove(filename); err != nil {
+			t.Errorf("cannot remove %v: %v", filename, err)
+		}
+	}
+
+}
+
+func TestToMarkdownFilename(t *testing.T) {
+
+	filename := "ebook.md"
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+	ToMarkdown(c, filename)
+
+	if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
+		t.Errorf("%s does not exist: %v", filename, err)
+	} else {
+		if err := os.Remove(filename); err != nil {
+			t.Errorf("cannot remove %v: %v", filename, err)
+		}
+	}
+
+}
+
 func TestToEpub(t *testing.T) {

-	filename := "Books.epub"
-	c := NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
 	ToEpub(c, "")

+	filename := "Books.epub"
 	if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
 		t.Errorf("%s does not exist: %v", filename, err)
 	} else {
@@ -49,7 +81,7 @@ func TestToEpub(t *testing.T) {
 func TestToEpubFilename(t *testing.T) {

 	filename := "ebook.epub"
-	c := NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
 	ToEpub(c, filename)

 	if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
@@ -65,7 +97,23 @@ func TestToEpubFilename(t *testing.T) {
 func TestToMobi(t *testing.T) {

 	filename := "ebook.mobi"
-	c := NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+	ToMobi(c, filename)
+
+	if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
+		t.Errorf("%s does not exist: %v", filename, err)
+	} else {
+		if err := os.Remove(filename); err != nil {
+			t.Errorf("cannot remove %v: %v", filename, err)
+		}
+	}
+
+}
+
+func TestToMobiFilename(t *testing.T) {
+
+	filename := "ebook.mobi"
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
 	ToMobi(c, filename)

 	if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
--- a/book/scraper.go
+++ b/book/scraper.go
@@ -18,83 +18,100 @@ import (
 )

 type ScrapeConfig struct {
-	depth      int
-	selector   string
-	limit      int
-	offset     int
-	delay      int
-	threads    int
-	include    bool
-	imagesOnly bool
+	Depth       int
+	Selector    string
+	Quiet       bool
+	Limit       int
+	Offset      int
+	Delay       int
+	Threads     int
+	Include     bool
+	ImagesOnly  bool
+	UseLinkName bool
 }

 func NewScrapeConfig() *ScrapeConfig {
-	return &ScrapeConfig{0, "", -1, 0, -1, -1, true, false}
+	return &ScrapeConfig{0, "", false, -1, 0, -1, -1, true, false, false}
+}
+
+func NewScrapeConfigs(selectors []string) []*ScrapeConfig {
+	configs := []*ScrapeConfig{}
+
+	for _, s := range selectors {
+		config := NewScrapeConfig()
+		config.Selector = s
+
+		configs = append(configs, config)
+	}
+
+	return configs
 }

 func NewScrapeConfigsAjin() []*ScrapeConfig {
 	config0 := NewScrapeConfig()
-	config0.depth = 0
-	config0.selector = ".dt>a"
-	config0.limit = 3
-	config0.offset = 0
-	config0.delay = 5000
-	config0.include = false
+	config0.Depth = 0
+	config0.Selector = ".dt>a"
+	config0.Limit = 3
+	config0.Offset = 0
+	config0.Delay = 5000
+	config0.Include = false

 	config1 := NewScrapeConfig()
-	config1.depth = 1
-	config1.selector = ".nav_apb>a"
-	config1.limit = 3
-	config1.offset = 1
-	config1.delay = 5000
-	config1.include = false
+	config1.Depth = 1
+	config1.Selector = ".nav_apb>a"
+	config1.Limit = 3
+	config1.Offset = 1
+	config1.Delay = 5000
+	config1.Include = false

 	config2 := NewScrapeConfig()
-	config2.depth = 2
-	config2.imagesOnly = true
+	config2.Depth = 2
+	config2.ImagesOnly = true

 	return []*ScrapeConfig{config0, config1, config2}
 }

 func NewScrapeConfigsWikipedia() []*ScrapeConfig {
 	config0 := NewScrapeConfig()
-	config0.depth = 0
-	config0.threads = -1
-	config0.include = true
+	config0.Depth = 0
+	config0.Threads = -1
+	config0.Include = true

 	config1 := NewScrapeConfig()
-	config1.depth = 1
-	config1.include = true
+	config1.Depth = 1
+	config1.Include = true

 	return []*ScrapeConfig{config0, config1}
 }

 func NewScrapeConfigFake() *ScrapeConfig {
 	config := NewScrapeConfig()
-	config.include = false
+	config.Include = false

 	return config
 }

-func NewBookFromURL(url, selector, name, author string, recursive, include, imagesOnly, quiet bool, limit, offset, delay, threads int) book {
+func NewBookFromURL(url string, selector []string, name, author string, include, ImagesOnly, useLinkName, quiet bool, limit, offset, delay, threads int) book {
 	config1 := NewScrapeConfig()
-	config1.imagesOnly = imagesOnly
+	config1.ImagesOnly = ImagesOnly
+	config1.UseLinkName = useLinkName

 	var chapters []chapter
 	var home chapter

-	if recursive {
+	if len(selector) > 0 {
 		config2 := NewScrapeConfig()
-		config2.selector = selector
-		config2.limit = limit
-		config2.offset = offset
-		config2.delay = delay
-		config2.threads = threads
-		config2.include = include
-		config2.imagesOnly = imagesOnly
+		config2.Selector = selector[0]
+		config2.Limit = limit
+		config2.Offset = offset
+		config2.Delay = delay
+		config2.Threads = threads
+		config2.Include = include
+		config2.ImagesOnly = ImagesOnly
+		config2.UseLinkName = useLinkName
 		chapters, home = tableOfContent(url, config2, config1, quiet)
 	} else {
-		chapters = []chapter{NewChapterFromURL(url, []*ScrapeConfig{config1}, 0, func(index int, name string) {})}
+		chapters = []chapter{NewChapterFromURL(url, "", []*ScrapeConfig{config1}, 0, func(index int, name string) {})}
 		home = chapters[0]
 	}

@@ -114,7 +131,7 @@ func NewBookFromURL(url, selector, name, author string, recursive, include, imag
 	return b
 }

-func NewChapterFromURL(url string, configs []*ScrapeConfig, index int, updateProgressBarName func(index int, name string)) chapter {
+func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int, updateProgressBarName func(index int, name string)) chapter {
 	config := configs[0]

 	base, err := urllib.Parse(url)
@@ -141,24 +158,31 @@ func NewChapterFromURL(url string, configs []*ScrapeConfig, index int, updatePro
 	if err != nil {
 		log.Fatalf("failed to parse %s, %v\n", url, err)
 	}
-	name := article.Title

-	// notify progress bar with new name
-	updateProgressBarName(index, name)
+	name := linkName
+	if config.UseLinkName == false {
+		name = article.Title
+
+		// notify progressbar with new name
+		updateProgressBarName(index, name)
+	}

 	subchapters := []chapter{}
 	if len(configs) > 1 {
 		// add subchapters

-		links, _, err := GetLinks(base, config.selector, config.limit, config.offset, false)
+		links, _, _, err := GetLinks(base, config.Selector, config.Limit, config.Offset, false)
 		if err != nil {
 			log.Fatal(err)
 		}

 		subchapters = make([]chapter, len(links))
-		progress := NewProgress(links, name, config.depth)
+		var p progress
+		if config.Quiet == false {
+			p = NewProgress(links, name, config.Depth)
+		}

-		if config.delay >= 0 {
+		if config.Delay >= 0 {

 			// synchronous mode
 			for index, link := range links {
@@ -168,18 +192,20 @@ func NewChapterFromURL(url string, configs []*ScrapeConfig, index int, updatePro
 					log.Fatal(err)
 				}

-				sc := NewChapterFromURL(u.String(), configs[1:], index, progress.UpdateName)
+				sc := NewChapterFromURL(u.String(), link.text, configs[1:], index, p.UpdateName)
 				subchapters[index] = sc
-				progress.Increment(index)
+				if config.Quiet == false {
+					p.Increment(index)
+				}

-				time.Sleep(time.Duration(config.delay) * time.Millisecond)
+				time.Sleep(time.Duration(config.Delay) * time.Millisecond)
 			}

 		} else {
 			// asynchronous mode
 			var wg sync.WaitGroup

-			threads := config.threads
+			threads := config.Threads
 			if threads == -1 {
 				threads = len(links)
 			}
@@ -199,9 +225,12 @@ func NewChapterFromURL(url string, configs []*ScrapeConfig, index int, updatePro
 						log.Fatal(err)
 					}

-					sc := NewChapterFromURL(u.String(), configs[1:], index, progress.UpdateName)
+					sc := NewChapterFromURL(u.String(), l.text, configs[1:], index, p.UpdateName)
 					subchapters[index] = sc
-					progress.Increment(index)
+
+					if config.Quiet == false {
+						p.Increment(index)
+					}

 					<-semaphore
 				}(index, l)
@@ -211,13 +240,15 @@ func NewChapterFromURL(url string, configs []*ScrapeConfig, index int, updatePro
 	}

 	content := ""
-	if config.include {
+	if config.Include {

-		// we care about the content only if we include this level
+		// we care about the content only if:
+		// - we include this level
+		// - we use the page name
 		content = article.Content

 		// extract images
-		if config.imagesOnly {
+		if config.ImagesOnly {

 			// parse HTML
 			doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
@@ -246,13 +277,13 @@ func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, q
 		log.Fatal(err)
 	}

-	links, home, err := GetLinks(base, config.selector, config.limit, config.offset, config.include)
+	links, _, home, err := GetLinks(base, config.Selector, config.Limit, config.Offset, config.Include)
 	if err != nil {
 		log.Fatal(err)
 	}

 	chapters := make([]chapter, len(links))
-	delay := config.delay
+	delay := config.Delay

 	var p progress
 	if quiet == false {
@@ -262,15 +293,15 @@ func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, q
 	if delay >= 0 {
 		// synchronous mode

-		for index, link := range links {
+		for index, l := range links {
 			// and then use it to parse relative URLs
-			u, err := base.Parse(link.href)
+			u, err := base.Parse(l.href)
 			if err != nil {
 				log.Fatal(err)
 			}

-			chapters[index] = NewChapterFromURL(u.String(), []*ScrapeConfig{subConfig}, 0, func(index int, name string) {})
-			
+			chapters[index] = NewChapterFromURL(u.String(), l.text, []*ScrapeConfig{subConfig}, 0, func(index int, name string) {})
+
 			if quiet == false {
 				p.Increment(index)
 			}
@@ -287,7 +318,7 @@ func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, q
 		// asynchronous mode
 		var wg sync.WaitGroup

-		threads := config.threads
+		threads := config.Threads
 		if threads == -1 {
 			threads = len(links)
 		}
@@ -307,7 +338,7 @@ func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, q
 					log.Fatal(err)
 				}

-				chapters[index] = NewChapterFromURL(u.String(), []*ScrapeConfig{subConfig}, 0, func(index int, name string) {})
+				chapters[index] = NewChapterFromURL(u.String(), l.text, []*ScrapeConfig{subConfig}, 0, func(index int, name string) {})

 				if quiet == false {
 					p.Increment(index)
@@ -327,7 +358,7 @@ func GetPath(elm *goquery.Selection) string {

 	for {
 		selector := strings.ToLower(goquery.NodeName(elm))
-		if selector == "" {
+		if len(selector) == 0 {
 			break
 		}

@@ -339,18 +370,18 @@ func GetPath(elm *goquery.Selection) string {
 	return join
 }

-func GetLinks(url *urllib.URL, selector string, limit, offset int, include bool) ([]link, chapter, error) {
+func GetLinks(url *urllib.URL, selector string, limit, offset int, include bool) ([]link, string, chapter, error) {
 	selectorSet := true
-	if selector == "" {
+	if len(selector) == 0 {
 		selector = "a"
 		selectorSet = false
 	}

-	// visit and count link classes
 	pathLinks := map[string][]link{}
 	pathCount := map[string]int{}
 	pathMax := ""

+	// visit and count link classes
 	c := colly.NewCollector()
 	c.OnHTML(selector, func(e *colly.HTMLElement) {
 		href := e.Attr("href")
@@ -358,26 +389,40 @@ func GetLinks(url *urllib.URL, selector string, limit, offset int, include bool)
 		path := GetPath(e.DOM)
 		key := path

-		// include element class in key if selector is set
-		if !selectorSet {
-			class := e.Attr("class")
-			key = fmt.Sprintf("%s.%s", path, class)
-		}
+		if selectorSet {

-		if selectorSet || text != "" {
+			// if selector is set, we use the selector specified by the user
+
+			key = selector
 			pathLinks[key] = append(pathLinks[key], NewLink(href, text))
-			pathCount[key] += len(text)
+			pathCount[key] += 1
+			pathMax = key

-			if pathCount[key] > pathCount[pathMax] {
-				pathMax = key
+		} else {
+
+			// if selector is not set, we compute the selector ourselves
+
+			class := e.Attr("class")
+			// include the element class to make sure we have the same exact path for every link in the table of content
+			key = fmt.Sprintf("%s.%s", path, class)
+
+			// we count this key if the link text is not empty
+			if text != "" {
+				pathLinks[key] = append(pathLinks[key], NewLink(href, text))
+				pathCount[key] += len(text)
+
+				if pathCount[key] > pathCount[pathMax] {
+					pathMax = key
+				}
 			}
+
 		}
 	})
 	c.Visit(url.String())

 	links := pathLinks[pathMax]
 	if len(links) == 0 {
-		return []link{}, chapter{}, fmt.Errorf("no link found for selector: %s", selector)
+		return []link{}, pathMax, chapter{}, fmt.Errorf("no link found for selector: %s", selector)
 	}

 	end := len(links)
@@ -387,12 +432,12 @@ func GetLinks(url *urllib.URL, selector string, limit, offset int, include bool)

 	links = links[offset:end]

-	home := NewChapterFromURL(url.String(), []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+	home := NewChapterFromURL(url.String(), "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})

 	if include {
 		l := NewLink(url.String(), home.Name())
 		links = append([]link{l}, links...)
 	}

-	return links, home, nil
+	return links, pathMax, home, nil
 }
--- a/book/scraper_test.go
+++ b/book/scraper_test.go
@@ -8,7 +8,7 @@ import (
 func TestBody(t *testing.T) {

 	config := NewScrapeConfig()
-	c := NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{config}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})

 	got := c.Body()
 	want := "<!doctype html>\n<html lang=\"en-us\">\n  <head>\n    <title>Books</title>\n    <link rel=\"shortcut icon\" href=\"/favicon.ico\" />\n    <meta charset=\"utf-8\" />\n    <meta name=\"generator\" content=\"Hugo 0.59.1\" />\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />\n    <meta name=\"author\" content=\"John Doe\" />\n    <meta name=\"description\" content=\" \" />\n    <link rel=\"stylesheet\" href=\"https://books.lapw.at/css/main.min.88e7083eff65effb7485b6e6f38d10afbec25093a6fac42d734ce9024d3defbd.css\" />\n\n    \n    <meta name=\"twitter:card\" content=\"summary\"/>\n<meta name=\"twitter:title\" content=\"Books\"/>\n<meta name=\"twitter:description\" content=\" \"/>\n\n    <meta property=\"og:title\" content=\"Books\" />\n<meta property=\"og:description\" content=\" \" />\n<meta property=\"og:type\" content=\"website\" />\n<meta property=\"og:url\" content=\"https://books.lapw.at/\" />\n\n\n\n  </head>\n  <body>\n    <header class=\"app-header\">\n      <a href=\"https://books.lapw.at/\"><img class=\"app-header-avatar\" src=\"/book.svg\" alt=\"John Doe\" /></a>\n      <h1>Books</h1>\n      <p> </p>\n      <div class=\"app-header-social\">\n        \n      </div>\n    </header>\n    <main class=\"app-container\">\n      \n  <article>\n    <h1>Books</h1>\n    <ul class=\"posts-list\">\n      \n        <li class=\"posts-list-item\">\n          <a class=\"posts-list-item-title\" href=\"https://books.lapw.at/posts/ren%C3%A9-descartes-discours-de-la-m%C3%A9thode/\">Discours de la Méthode</a>\n          <span class=\"posts-list-item-description\">\n            <svg xmlns=\"http://www.w3.org/2000/svg\" width=\"24\" height=\"24\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"icon icon-clock\">\n  <title>clock</title>\n  <circle cx=\"12\" cy=\"12\" r=\"10\"></circle><polyline points=\"12 6 12 12 16 14\"></polyline>\n</svg> 98 min read -\n            1637\n          </span>\n        </li>\n      \n        <li class=\"posts-list-item\">\n          <a class=\"posts-list-item-title\" href=\"https://books.lapw.at/posts/adam-wiggins-the-twelve-factor-app/\">The Twelve-Factor App</a>\n          <span class=\"posts-list-item-description\">\n            <svg xmlns=\"http://www.w3.org/2000/svg\" width=\"24\" height=\"24\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"icon icon-clock\">\n  <title>clock</title>\n  <circle cx=\"12\" cy=\"12\" r=\"10\"></circle><polyline points=\"12 6 12 12 16 14\"></polyline>\n</svg> 22 min read -\n            2011\n          </span>\n        </li>\n      \n    </ul>\n    \n\n\n\n  </article>\n\n    </main>\n  </body>\n</html>\n"
@@ -22,7 +22,7 @@ func TestBody(t *testing.T) {
 func TestName(t *testing.T) {

 	config := NewScrapeConfig()
-	c := NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{config}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})

 	got := c.Name()
 	want := "Books"
@@ -33,10 +33,25 @@ func TestName(t *testing.T) {

 }

+func TestCustomName(t *testing.T) {
+
+	config := NewScrapeConfig()
+	config.UseLinkName = true
+	c := NewChapterFromURL("https://books.lapw.at/", "Custom Name", []*ScrapeConfig{config}, 0, func(index int, name string) {})
+
+	got := c.Name()
+	want := "Custom Name"
+
+	if got != want {
+		t.Errorf("got %v, wanted %v", got, want)
+	}
+
+}
+
 func TestAuthor(t *testing.T) {

 	config := NewScrapeConfig()
-	c := NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{config}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})

 	got := c.Author()
 	want := "John Doe"
@@ -50,7 +65,7 @@ func TestAuthor(t *testing.T) {
 func TestContent(t *testing.T) {

 	config := NewScrapeConfig()
-	c := NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{config}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})

 	got := c.Content()
 	want := "<div id=\"readability-page-1\" class=\"page\">\n    \n    <main>\n      \n  <article>\n    \n    <ul>\n      \n        <li>\n          <a href=\"https://books.lapw.at/posts/ren%C3%A9-descartes-discours-de-la-m%C3%A9thode/\">Discours de la Méthode</a>\n          <span>\n            <svg xmlns=\"http://www.w3.org/2000/svg\" width=\"24\" height=\"24\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\">\n  <title>clock</title>\n  <circle cx=\"12\" cy=\"12\" r=\"10\"></circle><polyline points=\"12 6 12 12 16 14\"></polyline>\n</svg> 98 min read -\n            1637\n          </span>\n        </li>\n      \n        <li>\n          <a href=\"https://books.lapw.at/posts/adam-wiggins-the-twelve-factor-app/\">The Twelve-Factor App</a>\n          <span>\n            <svg xmlns=\"http://www.w3.org/2000/svg\" width=\"24\" height=\"24\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\">\n  <title>clock</title>\n  <circle cx=\"12\" cy=\"12\" r=\"10\"></circle><polyline points=\"12 6 12 12 16 14\"></polyline>\n</svg> 22 min read -\n            2011\n          </span>\n        </li>\n      \n    </ul>\n    \n\n\n\n  </article>\n\n    </main>\n  \n\n</div>"
@@ -64,12 +79,12 @@ func TestContent(t *testing.T) {
 func TestDelay(t *testing.T) {

 	config0 := NewScrapeConfig()
-	config0.delay = 500
+	config0.Delay = 500

 	config1 := NewScrapeConfig()

 	start := time.Now()
-	NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
+	NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
 	elapsed := time.Since(start)

 	got := elapsed
@@ -84,9 +99,9 @@ func TestDelay(t *testing.T) {
 func TestContentImagesOnly(t *testing.T) {

 	config := NewScrapeConfig()
-	config.imagesOnly = true
+	config.ImagesOnly = true

-	c := NewChapterFromURL("https://books.lapw.at/posts/adam-wiggins-the-twelve-factor-app/", []*ScrapeConfig{config}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/posts/adam-wiggins-the-twelve-factor-app/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})

 	got := c.Content()
 	want := "<img src=\"https://books.lapw.at/images/codebase-deploys.png\" alt=\"One codebase maps to many deploys\"/><img src=\"https://books.lapw.at/images/attached-resources.png\" alt=\"A production deploy attached to four backing services.\"/><img src=\"https://books.lapw.at/images/release.png\" alt=\"Code becomes a build, which is combined with config to create a release.\"/><img src=\"https://books.lapw.at/images/process-types.png\" alt=\"Scale is expressed as running processes, workload diversity is expressed as process types.\"/>"
@@ -102,7 +117,7 @@ func TestSubChapters(t *testing.T) {
 	config0 := NewScrapeConfig()
 	config1 := NewScrapeConfig()

-	c := NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})

 	got := len(c.SubChapters())
 	want := 2
@@ -116,11 +131,11 @@ func TestSubChapters(t *testing.T) {
 func TestSubChaptersSelector(t *testing.T) {

 	config0 := NewScrapeConfig()
-	config0.selector = "section.concrete > article > h2 > a"
+	config0.Selector = "section.concrete > article > h2 > a"

 	config1 := NewScrapeConfig()

-	c := NewChapterFromURL("https://12factor.net/", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})

 	got := len(c.SubChapters())
 	want := 12
@@ -134,11 +149,11 @@ func TestSubChaptersSelector(t *testing.T) {
 func TestSubChaptersLimit(t *testing.T) {

 	config0 := NewScrapeConfig()
-	config0.limit = 1
+	config0.Limit = 1

 	config1 := NewScrapeConfig()

-	c := NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})

 	got := len(c.SubChapters())
 	want := 1
@@ -152,11 +167,11 @@ func TestSubChaptersLimit(t *testing.T) {
 func TestSubChaptersLimitOver(t *testing.T) {

 	config0 := NewScrapeConfig()
-	config0.limit = 3
+	config0.Limit = 3

 	config1 := NewScrapeConfig()

-	c := NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})

 	got := len(c.SubChapters())
 	want := 2
@@ -170,9 +185,9 @@ func TestSubChaptersLimitOver(t *testing.T) {
 func TestNotInclude(t *testing.T) {

 	config := NewScrapeConfig()
-	config.include = false
+	config.Include = false

-	c := NewChapterFromURL("https://books.lapw.at/", []*ScrapeConfig{config}, 0, func(index int, name string) {})
+	c := NewChapterFromURL("https://books.lapw.at/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})

 	got := c.Content()
 	want := ""