diff --git a/Makefile b/Makefile index 9c97ef3..b25e06d 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,10 @@ TEX=pdflatex -SRC=essaie.tex +SRC=file.tex OUTDIR=output -OUTPDF=$(OUTDIR)/essaie.pdf all: @mkdir -p $(OUTDIR) - $(TEX) -output-directory=$(OUTDIR) $(SRC) + $(TEX) -output-directory=$(OUTDIR) $(SRC) -interaction=batchmode clean: rm -rf $(OUTDIR)/*.aux $(OUTDIR)/*.log $(OUTDIR)/*.pdf diff --git a/grab.py b/grab.py index 1c80273..54616bc 100644 --- a/grab.py +++ b/grab.py @@ -1,81 +1,213 @@ +from pprint import pprint from bs4 import BeautifulSoup import requests import urllib.parse as urlparse -headers = { - 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', - 'accept-language': 'fr-FR,fr;q=0.7', - 'cache-control': 'no-cache', - 'pragma': 'no-cache', - 'priority': 'u=0, i', - 'sec-ch-ua': '"Brave";v="131", "Chromium";v="131", "Not_A Brand";v="24"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': '"macOS"', - 'sec-fetch-dest': 'document', - 'sec-fetch-mode': 'navigate', - 'sec-fetch-site': 'same-origin', - 'sec-fetch-user': '?1', - 'sec-gpc': '1', - 'upgrade-insecure-requests': '1', - 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', -} +class Exercise: + def __init__(self, number, title, id, stars, enonce, indication, answer): + self.number = number + self.title = title + self.id = id + self.stars = stars + self.enonce = enonce + self.indication = indication + self.answer = answer -params = { - 'action': 'affiche', - 'quoi': 'mpsi/feuillesexo/matrices', - 'type': 'fexo', -} + def __repr__(self): + return f"Exercise({self.number}, {self.title}, {self.id}, {self.stars})" + +class Part: + def __init__(self, title): + self.title = title + self.exercises = [] -response = requests.get('https://bibmath.net/ressources/index.php', headers=headers, params=params) + def add_exercise(self, exercise): + self.exercises.append(exercise) -if response.status_code == 200: - soup = BeautifulSoup(response.text, 'html.parser') - article = soup.find('article', id='contenugauche') - if article: - title = article.find('h1').get_text(strip=True) - print(f"Title: {title}") + def __repr__(self): + return f"Part({self.title}, {len(self.exercises)} exercises)" + +class Chapitre: + def __init__(self, title, url=None): + self.title = title + self.parts = [] + self.url = url + + def add_part(self, part): + self.parts.append(part) + + def __repr__(self): + return f"Chapitre({self.title}, {len(self.parts)} parts)" + + def show(self): + print(f"Chapitre: {self.title}") + for part in self.parts: + print(f" Part: {part.title}") + for exercise in part.exercises: + print(f" Exercise: {exercise.number}, {exercise.title}, {exercise.id}, {exercise.stars} stars") + print(f" Enonce: {exercise.enonce}") + print(f" Indication: {exercise.indication}") + print(f" Answer: {exercise.answer}") + +class LatexFile: + def __init__(self, filename): + self.filename = filename + self.content = "" + + def add_header(self): + with open('parts/header.tex', 'r') as f: + self.content += f.read() + + def add_footer(self): + with open('parts/footer.tex', 'r') as f: + self.content += f.read() + + def add_content(self, content): + self.content += content + "\n" + + def add_pagebreak(self): + self.content += "\\newpage\n" + + def add_source(self, chapitre): + self.content += f"\\noindent\\textbf{{Chapitre:}} \\href{{{chapitre.url}}}{{{chapitre.title}}}\n" + + def add_exercise(self, exercise): + self.content += f"""\\exercice{{{exercise.number}, name, date, {exercise.stars}, {exercise.title}}}\n""" + self.content += f"""\\enonce{{{exercise.number}}}{{}}\n""" + self.content += f"{exercise.enonce}\n" + self.content += f"""\\finenonce{{{exercise.number}}}\n""" + self.content += f"""\\finexercice\n""" + self.content += "\n" + + def add_indication(self, exercise): + self.content += f"""\\indication{{{exercise.number}}}\n""" + self.content += f"{exercise.indication}\n" + self.content += f"""\\finindication\n""" + self.content += "\n" + + def add_answer(self, exercise): + self.content += f"""\\correction{{{exercise.number}}}\n""" + self.content += f"{exercise.answer}\n" + self.content += f"""\\fincorrection\n""" + self.content += "\n" + + def sanitize(self): + # return ''.join(c for c in self.content if ord(c) < 128) + return self.content.encode('utf-8', 'ignore').decode('utf-8') + + def save(self): + with open(self.filename, 'w') as f: + f.write(self.sanitize()) + + def generate_latex(self, chapitre): + self.add_header() + + self.add_source(chapitre) + + self.add_content(f"\\title{{{chapitre.title}}}") + for part in chapitre.parts: + self.add_content(f"\\section{{{part.title}}}") + for ex in part.exercises: + self.add_exercise(ex) - # Find all part titles - part_titles = article.find_all('div', class_='titrepartie') - for part in part_titles: - part_text = part.get_text(strip=True) - print(f"Part of the File: {part_text}") + self.add_pagebreak() + + for part in chapitre.parts: + for ex in part.exercises: + self.add_indication(ex) + self.add_pagebreak() + + for part in chapitre.parts: + for ex in part.exercises: + self.add_answer(ex) + + self.add_footer() + self.save() + +def fetch_chapitre(quoi): + headers = { + 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', + 'accept-language': 'fr-FR,fr;q=0.7', + 'cache-control': 'no-cache', + 'pragma': 'no-cache', + 'priority': 'u=0, i', + 'sec-ch-ua': '"Brave";v="131", "Chromium";v="131", "Not_A Brand";v="24"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"macOS"', + 'sec-fetch-dest': 'document', + 'sec-fetch-mode': 'navigate', + 'sec-fetch-site': 'same-origin', + 'sec-fetch-user': '?1', + 'sec-gpc': '1', + 'upgrade-insecure-requests': '1', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', + } + params = { + 'action': 'affiche', + 'quoi': quoi, + 'type': 'fexo', + } + response = requests.get('https://bibmath.net/ressources/index.php', headers=headers, params=params) + if response.status_code == 200: + soup = BeautifulSoup(response.text, 'html.parser') + article = soup.find('article', id='contenugauche') - exercises = article.find_all('div', class_='exo') - for exo in exercises: - exo_title_div = exo.find('div', class_='titreexo') - - exercise_number = exo_title_div.find_all('span')[1].text.strip() - title_start = exo_title_div.text.find(' - ') + 3 - title_end = exo_title_div.text.find('[Signaler une erreur]') - exercise_title = exo_title_div.text[title_start:title_end].strip() + if article: + title = article.find('h1').get_text(strip=True) + chapitre = Chapitre(title, url=response.url) + current_part = None + for element in article.find_all(recursive=False): + if 'titrepartie' in element.get('class', []): + part_title = element.get_text(strip=True) + current_part = Part(part_title) + chapitre.add_part(current_part) + elif 'exo' in element.get('class', []) and current_part is not None: + exo_title_div = element.find('div', class_='titreexo') - error_link = exo_title_div.find('a', href=lambda href: href and 'signalerreur.php' in href)['href'] - parsed_url = urlparse.urlparse(error_link) - exercise_id = urlparse.parse_qs(parsed_url.query)['numero'][0] + exercise_number = exo_title_div.find_all('span')[1].text.strip() + title_start = exo_title_div.text.find(' - ') + 3 + title_end = exo_title_div.text.find('[Signaler une erreur]') + exercise_title = exo_title_div.text[title_start:title_end].strip() - stars_count = len(exo_title_div.find_all('img')) + error_link = exo_title_div.find('a', href=lambda href: href and 'signalerreur.php' in href)['href'] + parsed_url = urlparse.urlparse(error_link) + exercise_id = urlparse.parse_qs(parsed_url.query)['numero'][0] - enonce = exo.find('div', class_='enonce').find('div', class_='inner').get_text(strip=True) - indication_div = exo.find('div', class_='indication') - indication = ( - indication_div.find('div', class_='inner').get_text(strip=True) - if indication_div else "No indication" - ) + stars_count = len(exo_title_div.find_all('img')) - answer_div = exo.find('div', class_='corrige') - answer = ( - answer_div.find('div', class_='inner').get_text(strip=True) - if answer_div else "No answer" - ) - # print(f"Exercise Number: {exercise_number}") - # print(f"Exercise Title: {exercise_title}") - # print(f"Exercise ID: {exercise_id}") - # print(f"Stars: {stars_count} stars") - # print(f"Enonce: {enonce}") - # print(f"Indication: {indication}") - # print(f"Answer: {answer}") + enonce = element.find('div', class_='enonce').find('div', class_='inner').get_text(strip=True) + indication_div = element.find('div', class_='indication') + indication = ( + indication_div.find('div', class_='inner').get_text(strip=True) + if indication_div else "No indication" + ) + + answer_div = element.find('div', class_='corrige') + answer = ( + answer_div.find('div', class_='inner').get_text(strip=True) + if answer_div else "No answer" + ) + + exercise = Exercise( + number=exercise_number, + title=exercise_title, + id=exercise_id, + stars=stars_count, + enonce=enonce, + indication=indication, + answer=answer + ) + + current_part.add_exercise(exercise) + return chapitre + else: + print("Article with id 'contenugauche' not found.") else: - print("Article with id 'contenugauche' not found.") -else: - print(f"Request failed with status code: {response.status_code}") \ No newline at end of file + print(f"Request failed with status code: {response.status_code}") + +quoi = 'mpsi/feuillesexo/matrices' +chapitre = fetch_chapitre(quoi) + +latex_file = LatexFile('file.tex') +latex_file.generate_latex(chapitre) +print("LaTeX file generated: file.tex") \ No newline at end of file diff --git a/parts/header.tex b/parts/header.tex index 5bb4d42..95f8991 100644 --- a/parts/header.tex +++ b/parts/header.tex @@ -106,5 +106,5 @@ \usepackage{graphics} \usepackage[all]{xy} - -\begin{document} \ No newline at end of file +\pagestyle{empty} +\begin{document}