mirror of
https://github.com/NohamR/Manuelcontent.git
synced 2025-07-08 13:17:22 +00:00
Créé avec Colaboratory
This commit is contained in:
parent
501dd12b2a
commit
94a27ced0c
120
LLS_scrap.ipynb
Normal file
120
LLS_scrap.ipynb
Normal file
@ -0,0 +1,120 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"authorship_tag": "ABX9TyO+gk/tdKqfk6oA5OoSzNwI",
|
||||
"include_colab_link": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "view-in-github",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"<a href=\"https://colab.research.google.com/github/NohamR/Manuelcontent/blob/main/LLS_scrap.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "j3GU2ZHVpbja"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import urllib.request\n",
|
||||
"import csv\n",
|
||||
"import requests\n",
|
||||
"import os\n",
|
||||
"import threading\n",
|
||||
"\n",
|
||||
"def download_images(code):\n",
|
||||
" for page in range(1, 1000):\n",
|
||||
" url = \"https://assets.lls.fr/books/\" + str(code[5]) + \"/print/\" + str(page) + \".jpg\"\n",
|
||||
" print(url)\n",
|
||||
" r = requests.get(url, allow_redirects=True)\n",
|
||||
" if r.status_code == 200:\n",
|
||||
" path = 'gen/' + str(code[5]) + '/page' + str(page) + '.jpg'\n",
|
||||
" urllib.request.urlretrieve(url, path)\n",
|
||||
" else:\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
"code = []\n",
|
||||
"with open('liste.csv', newline='', encoding=\"utf8\") as csvfile:\n",
|
||||
" spamreader = csv.reader(csvfile, delimiter=',')\n",
|
||||
" for row in spamreader:\n",
|
||||
" code.append(row)\n",
|
||||
"\n",
|
||||
"threads = []\n",
|
||||
"for numcode in range(len(code)):\n",
|
||||
" os.mkdir('gen/' + str(code[numcode][5]))\n",
|
||||
" t = threading.Thread(target=download_images, args=(code[numcode],))\n",
|
||||
" threads.append(t)\n",
|
||||
" t.start()\n",
|
||||
"\n",
|
||||
"for t in threads:\n",
|
||||
" t.join()\n",
|
||||
"\n",
|
||||
"print(\"Toutes les images ont été téléchargées avec succès\") \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from google.colab import drive\n",
|
||||
"drive.mount('/content/drive')"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "glMJLK-btM1X",
|
||||
"outputId": "4d31ec89-f7b7-4b21-ff88-5ab30a9b4707"
|
||||
},
|
||||
"execution_count": 12,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"cp -r /content/gen.zip /content/gen2.zip\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "mb_7KJLIxPo-"
|
||||
},
|
||||
"execution_count": 15,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"!zip -r gen.zip /content/gen/\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "LP8SCsPr2pLm"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user