From c579babe4fbc355e9a93431be7841222834f27d1 Mon Sep 17 00:00:00 2001 From: Mnemonic Date: Tue, 24 Sep 2019 02:23:01 -0700 Subject: [PATCH] Added clarifiers and fixed the return --- phpBB_scraper/phpBB_scraper/spiders/phpBB.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/phpBB_scraper/phpBB_scraper/spiders/phpBB.py b/phpBB_scraper/phpBB_scraper/spiders/phpBB.py index 2f65f4a..cdb713e 100644 --- a/phpBB_scraper/phpBB_scraper/spiders/phpBB.py +++ b/phpBB_scraper/phpBB_scraper/spiders/phpBB.py @@ -7,10 +7,12 @@ from scrapy.http import Request class PhpbbSpider(scrapy.Spider): name = 'phpBB' + #Domain only, no urls allowed_domains = [''] start_urls = [''] username = '' password = '' + # False if you dont need to login, true if you do. form_login = False def parse(self, response): @@ -22,7 +24,8 @@ class PhpbbSpider(scrapy.Spider): formdata=formdata, callback=self.after_login, dont_click=True)] - return form_request + yield form_request + return else: # REQUEST SUB-FORUM TITLE LINKS links = response.xpath('//a[@class="forumtitle"]/@href').extract() @@ -88,4 +91,4 @@ class PhpbbSpider(scrapy.Spider): # CLICK THROUGH NEXT PAGE next_link = response.xpath('//li[@class="next"]//a[@rel="next"]/@href').extract_first() if next_link: - yield scrapy.Request(response.urljoin(next_link), callback=self.parse_posts) \ No newline at end of file + yield scrapy.Request(response.urljoin(next_link), callback=self.parse_posts)