From 94a27ced0c5939ac9e62f7421a7c8eeac4ee5449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=88=9A=28noham=29=C2=B2?= <100566912+NohamR@users.noreply.github.com> Date: Tue, 2 May 2023 16:49:04 +0200 Subject: [PATCH] =?UTF-8?q?Cr=C3=A9=C3=A9=20avec=20Colaboratory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- LLS_scrap.ipynb | 120 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 LLS_scrap.ipynb diff --git a/LLS_scrap.ipynb b/LLS_scrap.ipynb new file mode 100644 index 00000000..d488b8fc --- /dev/null +++ b/LLS_scrap.ipynb @@ -0,0 +1,120 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyO+gk/tdKqfk6oA5OoSzNwI", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j3GU2ZHVpbja" + }, + "outputs": [], + "source": [ + "import urllib.request\n", + "import csv\n", + "import requests\n", + "import os\n", + "import threading\n", + "\n", + "def download_images(code):\n", + " for page in range(1, 1000):\n", + " url = \"https://assets.lls.fr/books/\" + str(code[5]) + \"/print/\" + str(page) + \".jpg\"\n", + " print(url)\n", + " r = requests.get(url, allow_redirects=True)\n", + " if r.status_code == 200:\n", + " path = 'gen/' + str(code[5]) + '/page' + str(page) + '.jpg'\n", + " urllib.request.urlretrieve(url, path)\n", + " else:\n", + " break\n", + "\n", + "code = []\n", + "with open('liste.csv', newline='', encoding=\"utf8\") as csvfile:\n", + " spamreader = csv.reader(csvfile, delimiter=',')\n", + " for row in spamreader:\n", + " code.append(row)\n", + "\n", + "threads = []\n", + "for numcode in range(len(code)):\n", + " os.mkdir('gen/' + str(code[numcode][5]))\n", + " t = threading.Thread(target=download_images, args=(code[numcode],))\n", + " threads.append(t)\n", + " t.start()\n", + "\n", + "for t in threads:\n", + " t.join()\n", + "\n", + "print(\"Toutes les images ont été téléchargées avec succès\") \n" + ] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "glMJLK-btM1X", + "outputId": "4d31ec89-f7b7-4b21-ff88-5ab30a9b4707" + }, + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "cp -r /content/gen.zip /content/gen2.zip\n" + ], + "metadata": { + "id": "mb_7KJLIxPo-" + }, + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!zip -r gen.zip /content/gen/\n" + ], + "metadata": { + "id": "LP8SCsPr2pLm" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file