Merge pull request #2 from ScottMnemonic/master

https://github.com/Dascienz/phpBB-forum-scraper/issues/1
This commit is contained in:
Dave Ascienzo
2019-09-24 11:26:46 -04:00
committed by GitHub

View File

@@ -7,10 +7,12 @@ from scrapy.http import Request
class PhpbbSpider(scrapy.Spider):
name = 'phpBB'
#Domain only, no urls
allowed_domains = ['']
start_urls = ['']
username = ''
password = ''
# False if you dont need to login, true if you do.
form_login = False
def parse(self, response):
@@ -22,7 +24,8 @@ class PhpbbSpider(scrapy.Spider):
formdata=formdata,
callback=self.after_login,
dont_click=True)]
return form_request
yield form_request
return
else:
# REQUEST SUB-FORUM TITLE LINKS
links = response.xpath('//a[@class="forumtitle"]/@href').extract()
@@ -88,4 +91,4 @@ class PhpbbSpider(scrapy.Spider):
# CLICK THROUGH NEXT PAGE
next_link = response.xpath('//li[@class="next"]//a[@rel="next"]/@href').extract_first()
if next_link:
yield scrapy.Request(response.urljoin(next_link), callback=self.parse_posts)
yield scrapy.Request(response.urljoin(next_link), callback=self.parse_posts)