diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml new file mode 100644 index 0000000..bdf3dc6 --- /dev/null +++ b/.github/workflows/format.yml @@ -0,0 +1,15 @@ +name: Format + +on: + push: + paths: + - '**.go' + +jobs: + format: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Check formatting + run: if [[ -n "$(gofmt -l .)" ]]; then exit 1; fi diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..3c4c8fb --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,25 @@ +name: Test + +on: + push: + paths: + - '**.go' + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Set up Go + uses: actions/setup-go@v2 + with: + go-version: 1.17 + - name: Install kindlegen + run: | + curl -L https://github.com/lapwat/papeer/releases/download/kindlegen/kindlegen_linux_2.6_i386_v2_9.tar.gz > kindlegen.tar.gz + tar xzvf kindlegen.tar.gz + chmod +x kindlegen + mv kindlegen /usr/local/bin + - name: Checkout + uses: actions/checkout@v2 + - name: Test + run: make test diff --git a/book/scraper.go b/book/scraper.go index c4b2d2f..693ab60 100644 --- a/book/scraper.go +++ b/book/scraper.go @@ -398,11 +398,11 @@ func GetLinks(url *urllib.URL, selector string, limit, offset int, reverse, incl selector = "a" selectorSet = false } - + pathLinks := map[string][]link{} pathCount := map[string]int{} pathMax = "" - + // visit and count link classes c := colly.NewCollector() c.OnHTML(selector, func(e *colly.HTMLElement) { @@ -410,34 +410,34 @@ func GetLinks(url *urllib.URL, selector string, limit, offset int, reverse, incl text := strings.TrimSpace(e.Text) path := GetPath(e.DOM) key := path - + if selectorSet { - + // if selector is set, we use the selector specified by the user - + key = selector pathLinks[key] = append(pathLinks[key], NewLink(href, text)) pathCount[key] += 1 pathMax = key - + } else { - + // if selector is not set, we compute the selector ourselves - + class := e.Attr("class") // include the element class to make sure we have the same exact path for every link in the table of content key = fmt.Sprintf("%s.%s", path, class) - + // we count this key if the link text is not empty if text != "" { pathLinks[key] = append(pathLinks[key], NewLink(href, text)) pathCount[key] += len(text) - + if pathCount[key] > pathCount[pathMax] { pathMax = key } } - + } }) c.Visit(url.String())