from bs4 import BeautifulSoup import requests import urllib.parse as urlparse headers = { 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', 'accept-language': 'fr-FR,fr;q=0.7', 'cache-control': 'no-cache', 'pragma': 'no-cache', 'priority': 'u=0, i', 'sec-ch-ua': '"Brave";v="131", "Chromium";v="131", "Not_A Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'sec-gpc': '1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', } params = { 'action': 'affiche', 'quoi': 'mpsi/feuillesexo/matrices', 'type': 'fexo', } response = requests.get('https://bibmath.net/ressources/index.php', headers=headers, params=params) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') article = soup.find('article', id='contenugauche') if article: title = article.find('h1').get_text(strip=True) print(f"Title: {title}") # Find all part titles part_titles = article.find_all('div', class_='titrepartie') for part in part_titles: part_text = part.get_text(strip=True) print(f"Part of the File: {part_text}") exercises = article.find_all('div', class_='exo') for exo in exercises: exo_title_div = exo.find('div', class_='titreexo') exercise_number = exo_title_div.find_all('span')[1].text.strip() title_start = exo_title_div.text.find(' - ') + 3 title_end = exo_title_div.text.find('[Signaler une erreur]') exercise_title = exo_title_div.text[title_start:title_end].strip() error_link = exo_title_div.find('a', href=lambda href: href and 'signalerreur.php' in href)['href'] parsed_url = urlparse.urlparse(error_link) exercise_id = urlparse.parse_qs(parsed_url.query)['numero'][0] stars_count = len(exo_title_div.find_all('img')) enonce = exo.find('div', class_='enonce').find('div', class_='inner').get_text(strip=True) indication_div = exo.find('div', class_='indication') indication = ( indication_div.find('div', class_='inner').get_text(strip=True) if indication_div else "No indication" ) answer_div = exo.find('div', class_='corrige') answer = ( answer_div.find('div', class_='inner').get_text(strip=True) if answer_div else "No answer" ) # print(f"Exercise Number: {exercise_number}") # print(f"Exercise Title: {exercise_title}") # print(f"Exercise ID: {exercise_id}") # print(f"Stars: {stars_count} stars") # print(f"Enonce: {enonce}") # print(f"Indication: {indication}") # print(f"Answer: {answer}") else: print("Article with id 'contenugauche' not found.") else: print(f"Request failed with status code: {response.status_code}")