mirror of
https://github.com/NohamR/Bibmath2Tex.git
synced 2026-05-24 19:58:43 +00:00
pushh
This commit is contained in:
68
grab.py
68
grab.py
@@ -29,6 +29,10 @@ class Part:
|
||||
|
||||
class Chapitre:
|
||||
def __init__(self, title, url=None):
|
||||
if ':' in title:
|
||||
title = title.split(':')[1].strip()
|
||||
title = title.lower()
|
||||
title = title[0].upper() + title[1:] if title else ""
|
||||
self.title = title
|
||||
self.parts = []
|
||||
self.url = url
|
||||
@@ -124,7 +128,26 @@ class LatexFile:
|
||||
self.add_footer()
|
||||
self.save()
|
||||
|
||||
def fetch_chapitre(quoi):
|
||||
def parse(content):
|
||||
soup2 = BeautifulSoup(str(content), 'lxml')
|
||||
for tag in soup2.find_all(['span', 'a', 'img']):
|
||||
tag.extract()
|
||||
questions = []
|
||||
ol_list = soup2.find('ol', class_='enumeratechiffre')
|
||||
|
||||
if ol_list:
|
||||
for i, li in enumerate(ol_list.find_all('li'), 1):
|
||||
if i == 1:
|
||||
# Add a line break before the first question
|
||||
questions.append("\\par")
|
||||
questions.append(f"{i}. {li.get_text().strip()}")
|
||||
list_text = "\n\n".join(questions)
|
||||
ol_list.replace_with(list_text)
|
||||
|
||||
content = soup2.get_text().strip()
|
||||
return content
|
||||
|
||||
def fetch_chapitre(page):
|
||||
headers = {
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
||||
'accept-language': 'fr-FR,fr;q=0.7',
|
||||
@@ -142,12 +165,7 @@ def fetch_chapitre(quoi):
|
||||
'upgrade-insecure-requests': '1',
|
||||
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||
}
|
||||
params = {
|
||||
'action': 'affiche',
|
||||
'quoi': quoi,
|
||||
'type': 'fexo',
|
||||
}
|
||||
response = requests.get('https://bibmath.net/ressources/index.php', headers=headers, params=params)
|
||||
response = requests.get(page, headers=headers)
|
||||
if response.status_code == 200:
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
article = soup.find('article', id='contenugauche')
|
||||
@@ -175,20 +193,16 @@ def fetch_chapitre(quoi):
|
||||
|
||||
stars_count = len(exo_title_div.find_all('img'))
|
||||
|
||||
enonce = element.find('div', class_='enonce').find('div', class_='inner').get_text(strip=True)
|
||||
if ".Existe-t" in enonce:
|
||||
print(element)
|
||||
enonce = element.find('div', class_='enonce').find('div', class_='inner')
|
||||
enonce = parse(enonce)
|
||||
|
||||
indication_div = element.find('div', class_='indication')
|
||||
indication = (
|
||||
indication_div.find('div', class_='inner').get_text(strip=True)
|
||||
if indication_div else "No indication"
|
||||
)
|
||||
indication = indication_div.find('div', class_='inner')
|
||||
indication = parse(indication)
|
||||
|
||||
answer_div = element.find('div', class_='corrige')
|
||||
answer = (
|
||||
answer_div.find('div', class_='inner').get_text(strip=True)
|
||||
if answer_div else "No answer"
|
||||
)
|
||||
answer = answer_div.find('div', class_='inner')
|
||||
answer = parse(answer)
|
||||
|
||||
exercise = Exercise(
|
||||
number=exercise_number,
|
||||
@@ -207,9 +221,17 @@ def fetch_chapitre(quoi):
|
||||
else:
|
||||
print(f"Request failed with status code: {response.status_code}")
|
||||
|
||||
quoi = 'mpsi/feuillesexo/matrices'
|
||||
chapitre = fetch_chapitre(quoi)
|
||||
def get_page(page):
|
||||
chapitre = fetch_chapitre(page)
|
||||
title = chapitre.title
|
||||
latex_file = LatexFile(f'dump/{title}.tex')
|
||||
latex_file.generate_latex(chapitre)
|
||||
print(f"LaTeX file generated: dump/{title}.tex")
|
||||
|
||||
latex_file = LatexFile('file.tex')
|
||||
latex_file.generate_latex(chapitre)
|
||||
print("LaTeX file generated: file.tex")
|
||||
if __name__ == "__main__":
|
||||
# page = "https://bibmath.net/ressources/index.php?action=affiche&quoi=mpsi/feuillesexo/prehilbert&type=fexo"
|
||||
# get_page(page)
|
||||
with open('pages.txt', 'r') as f:
|
||||
for line in f:
|
||||
page = line.strip()
|
||||
get_page(page)
|
||||
Reference in New Issue
Block a user