mirror of
https://github.com/NohamR/Bibmath2Tex.git
synced 2026-05-25 12:17:12 +00:00
81 lines
3.3 KiB
Python
81 lines
3.3 KiB
Python
from bs4 import BeautifulSoup
|
|
import requests
|
|
import urllib.parse as urlparse
|
|
|
|
headers = {
|
|
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
'accept-language': 'fr-FR,fr;q=0.7',
|
|
'cache-control': 'no-cache',
|
|
'pragma': 'no-cache',
|
|
'priority': 'u=0, i',
|
|
'sec-ch-ua': '"Brave";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
|
'sec-ch-ua-mobile': '?0',
|
|
'sec-ch-ua-platform': '"macOS"',
|
|
'sec-fetch-dest': 'document',
|
|
'sec-fetch-mode': 'navigate',
|
|
'sec-fetch-site': 'same-origin',
|
|
'sec-fetch-user': '?1',
|
|
'sec-gpc': '1',
|
|
'upgrade-insecure-requests': '1',
|
|
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
}
|
|
|
|
params = {
|
|
'action': 'affiche',
|
|
'quoi': 'mpsi/feuillesexo/matrices',
|
|
'type': 'fexo',
|
|
}
|
|
|
|
response = requests.get('https://bibmath.net/ressources/index.php', headers=headers, params=params)
|
|
|
|
if response.status_code == 200:
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
article = soup.find('article', id='contenugauche')
|
|
if article:
|
|
title = article.find('h1').get_text(strip=True)
|
|
print(f"Title: {title}")
|
|
|
|
# Find all part titles
|
|
part_titles = article.find_all('div', class_='titrepartie')
|
|
for part in part_titles:
|
|
part_text = part.get_text(strip=True)
|
|
print(f"Part of the File: {part_text}")
|
|
|
|
exercises = article.find_all('div', class_='exo')
|
|
for exo in exercises:
|
|
exo_title_div = exo.find('div', class_='titreexo')
|
|
|
|
exercise_number = exo_title_div.find_all('span')[1].text.strip()
|
|
title_start = exo_title_div.text.find(' - ') + 3
|
|
title_end = exo_title_div.text.find('[Signaler une erreur]')
|
|
exercise_title = exo_title_div.text[title_start:title_end].strip()
|
|
|
|
error_link = exo_title_div.find('a', href=lambda href: href and 'signalerreur.php' in href)['href']
|
|
parsed_url = urlparse.urlparse(error_link)
|
|
exercise_id = urlparse.parse_qs(parsed_url.query)['numero'][0]
|
|
|
|
stars_count = len(exo_title_div.find_all('img'))
|
|
|
|
enonce = exo.find('div', class_='enonce').find('div', class_='inner').get_text(strip=True)
|
|
indication_div = exo.find('div', class_='indication')
|
|
indication = (
|
|
indication_div.find('div', class_='inner').get_text(strip=True)
|
|
if indication_div else "No indication"
|
|
)
|
|
|
|
answer_div = exo.find('div', class_='corrige')
|
|
answer = (
|
|
answer_div.find('div', class_='inner').get_text(strip=True)
|
|
if answer_div else "No answer"
|
|
)
|
|
# print(f"Exercise Number: {exercise_number}")
|
|
# print(f"Exercise Title: {exercise_title}")
|
|
# print(f"Exercise ID: {exercise_id}")
|
|
# print(f"Stars: {stars_count} stars")
|
|
# print(f"Enonce: {enonce}")
|
|
# print(f"Indication: {indication}")
|
|
# print(f"Answer: {answer}")
|
|
else:
|
|
print("Article with id 'contenugauche' not found.")
|
|
else:
|
|
print(f"Request failed with status code: {response.status_code}") |