Merge pull request #2 from ScottMnemonic/master

https://github.com/Dascienz/phpBB-forum-scraper/issues/1
This commit is contained in:
Dave Ascienzo
2019-09-24 11:26:46 -04:00
committed by GitHub

View File

@@ -7,10 +7,12 @@ from scrapy.http import Request
class PhpbbSpider(scrapy.Spider): class PhpbbSpider(scrapy.Spider):
name = 'phpBB' name = 'phpBB'
#Domain only, no urls
allowed_domains = [''] allowed_domains = ['']
start_urls = [''] start_urls = ['']
username = '' username = ''
password = '' password = ''
# False if you dont need to login, true if you do.
form_login = False form_login = False
def parse(self, response): def parse(self, response):
@@ -22,7 +24,8 @@ class PhpbbSpider(scrapy.Spider):
formdata=formdata, formdata=formdata,
callback=self.after_login, callback=self.after_login,
dont_click=True)] dont_click=True)]
return form_request yield form_request
return
else: else:
# REQUEST SUB-FORUM TITLE LINKS # REQUEST SUB-FORUM TITLE LINKS
links = response.xpath('//a[@class="forumtitle"]/@href').extract() links = response.xpath('//a[@class="forumtitle"]/@href').extract()