This commit is contained in:
√(noham)²
2025-04-14 00:46:41 +02:00
parent 1135b4ddac
commit 85b53eda9d
57 changed files with 138 additions and 23 deletions

68
grab.py
View File

@@ -29,6 +29,10 @@ class Part:
class Chapitre:
def __init__(self, title, url=None):
if ':' in title:
title = title.split(':')[1].strip()
title = title.lower()
title = title[0].upper() + title[1:] if title else ""
self.title = title
self.parts = []
self.url = url
@@ -124,7 +128,26 @@ class LatexFile:
self.add_footer()
self.save()
def fetch_chapitre(quoi):
def parse(content):
soup2 = BeautifulSoup(str(content), 'lxml')
for tag in soup2.find_all(['span', 'a', 'img']):
tag.extract()
questions = []
ol_list = soup2.find('ol', class_='enumeratechiffre')
if ol_list:
for i, li in enumerate(ol_list.find_all('li'), 1):
if i == 1:
# Add a line break before the first question
questions.append("\\par")
questions.append(f"{i}. {li.get_text().strip()}")
list_text = "\n\n".join(questions)
ol_list.replace_with(list_text)
content = soup2.get_text().strip()
return content
def fetch_chapitre(page):
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
'accept-language': 'fr-FR,fr;q=0.7',
@@ -142,12 +165,7 @@ def fetch_chapitre(quoi):
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
}
params = {
'action': 'affiche',
'quoi': quoi,
'type': 'fexo',
}
response = requests.get('https://bibmath.net/ressources/index.php', headers=headers, params=params)
response = requests.get(page, headers=headers)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'lxml')
article = soup.find('article', id='contenugauche')
@@ -175,20 +193,16 @@ def fetch_chapitre(quoi):
stars_count = len(exo_title_div.find_all('img'))
enonce = element.find('div', class_='enonce').find('div', class_='inner').get_text(strip=True)
if ".Existe-t" in enonce:
print(element)
enonce = element.find('div', class_='enonce').find('div', class_='inner')
enonce = parse(enonce)
indication_div = element.find('div', class_='indication')
indication = (
indication_div.find('div', class_='inner').get_text(strip=True)
if indication_div else "No indication"
)
indication = indication_div.find('div', class_='inner')
indication = parse(indication)
answer_div = element.find('div', class_='corrige')
answer = (
answer_div.find('div', class_='inner').get_text(strip=True)
if answer_div else "No answer"
)
answer = answer_div.find('div', class_='inner')
answer = parse(answer)
exercise = Exercise(
number=exercise_number,
@@ -207,9 +221,17 @@ def fetch_chapitre(quoi):
else:
print(f"Request failed with status code: {response.status_code}")
quoi = 'mpsi/feuillesexo/matrices'
chapitre = fetch_chapitre(quoi)
def get_page(page):
chapitre = fetch_chapitre(page)
title = chapitre.title
latex_file = LatexFile(f'dump/{title}.tex')
latex_file.generate_latex(chapitre)
print(f"LaTeX file generated: dump/{title}.tex")
latex_file = LatexFile('file.tex')
latex_file.generate_latex(chapitre)
print("LaTeX file generated: file.tex")
if __name__ == "__main__":
# page = "https://bibmath.net/ressources/index.php?action=affiche&quoi=mpsi/feuillesexo/prehilbert&type=fexo"
# get_page(page)
with open('pages.txt', 'r') as f:
for line in f:
page = line.strip()
get_page(page)