diff --git a/phpBB_scraper/phpBB_scraper/__pycache__/__init__.cpython-36.pyc b/phpBB_scraper/phpBB_scraper/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 22e9ac3..0000000 Binary files a/phpBB_scraper/phpBB_scraper/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/phpBB_scraper/phpBB_scraper/__pycache__/settings.cpython-36.pyc b/phpBB_scraper/phpBB_scraper/__pycache__/settings.cpython-36.pyc deleted file mode 100644 index ef9c43f..0000000 Binary files a/phpBB_scraper/phpBB_scraper/__pycache__/settings.cpython-36.pyc and /dev/null differ diff --git a/phpBB_scraper/phpBB_scraper/spiders/__pycache__/__init__.cpython-36.pyc b/phpBB_scraper/phpBB_scraper/spiders/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 35e1f9f..0000000 Binary files a/phpBB_scraper/phpBB_scraper/spiders/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/phpBB_scraper/phpBB_scraper/spiders/__pycache__/phpBB.cpython-36.pyc b/phpBB_scraper/phpBB_scraper/spiders/__pycache__/phpBB.cpython-36.pyc deleted file mode 100644 index cdeb4be..0000000 Binary files a/phpBB_scraper/phpBB_scraper/spiders/__pycache__/phpBB.cpython-36.pyc and /dev/null differ diff --git a/phpBB_scraper/phpBB_scraper/spiders/phpBB.py b/phpBB_scraper/phpBB_scraper/spiders/phpBB.py index f7bebad..2f65f4a 100644 --- a/phpBB_scraper/phpBB_scraper/spiders/phpBB.py +++ b/phpBB_scraper/phpBB_scraper/spiders/phpBB.py @@ -56,7 +56,7 @@ class PhpbbSpider(scrapy.Spider): soup = BeautifulSoup(string, 'lxml') block_quotes = soup.find_all('blockquote') for i, quote in enumerate(block_quotes): - block_quotes[i] = '=' + str(i) + quote.get_text() + block_quotes[i] = '='%str(i+1) + quote.get_text() return ''.join(block_quotes) def clean_text(self, string): @@ -65,7 +65,7 @@ class PhpbbSpider(scrapy.Spider): soup = BeautifulSoup(string, 'lxml') for tag in tags: for i, item in enumerate(soup.find_all(tag)): - item.replaceWith('=' + str(i)) + item.replaceWith('='%str(i+1)) return re.sub(r' +', r' ', soup.get_text()) def parse_posts(self, response):