mirror of
https://github.com/NohamR/phpBB-forum-scraper.git
synced 2026-02-22 02:25:43 +00:00
Merge pull request #2 from ScottMnemonic/master
https://github.com/Dascienz/phpBB-forum-scraper/issues/1
This commit is contained in:
@@ -7,10 +7,12 @@ from scrapy.http import Request
|
|||||||
class PhpbbSpider(scrapy.Spider):
|
class PhpbbSpider(scrapy.Spider):
|
||||||
|
|
||||||
name = 'phpBB'
|
name = 'phpBB'
|
||||||
|
#Domain only, no urls
|
||||||
allowed_domains = ['']
|
allowed_domains = ['']
|
||||||
start_urls = ['']
|
start_urls = ['']
|
||||||
username = ''
|
username = ''
|
||||||
password = ''
|
password = ''
|
||||||
|
# False if you dont need to login, true if you do.
|
||||||
form_login = False
|
form_login = False
|
||||||
|
|
||||||
def parse(self, response):
|
def parse(self, response):
|
||||||
@@ -22,7 +24,8 @@ class PhpbbSpider(scrapy.Spider):
|
|||||||
formdata=formdata,
|
formdata=formdata,
|
||||||
callback=self.after_login,
|
callback=self.after_login,
|
||||||
dont_click=True)]
|
dont_click=True)]
|
||||||
return form_request
|
yield form_request
|
||||||
|
return
|
||||||
else:
|
else:
|
||||||
# REQUEST SUB-FORUM TITLE LINKS
|
# REQUEST SUB-FORUM TITLE LINKS
|
||||||
links = response.xpath('//a[@class="forumtitle"]/@href').extract()
|
links = response.xpath('//a[@class="forumtitle"]/@href').extract()
|
||||||
@@ -88,4 +91,4 @@ class PhpbbSpider(scrapy.Spider):
|
|||||||
# CLICK THROUGH NEXT PAGE
|
# CLICK THROUGH NEXT PAGE
|
||||||
next_link = response.xpath('//li[@class="next"]//a[@rel="next"]/@href').extract_first()
|
next_link = response.xpath('//li[@class="next"]//a[@rel="next"]/@href').extract_first()
|
||||||
if next_link:
|
if next_link:
|
||||||
yield scrapy.Request(response.urljoin(next_link), callback=self.parse_posts)
|
yield scrapy.Request(response.urljoin(next_link), callback=self.parse_posts)
|
||||||
|
|||||||
Reference in New Issue
Block a user