{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyO+gk/tdKqfk6oA5OoSzNwI",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/NohamR/Manuelcontent/blob/main/LLS_scrap.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "j3GU2ZHVpbja"
      },
      "outputs": [],
      "source": [
        "import urllib.request\n",
        "import csv\n",
        "import requests\n",
        "import os\n",
        "import threading\n",
        "\n",
        "def download_images(code):\n",
        "    for page in range(1, 1000):\n",
        "        url = \"https://assets.lls.fr/books/\" + str(code[5]) + \"/print/\" + str(page) + \".jpg\"\n",
        "        print(url)\n",
        "        r = requests.get(url, allow_redirects=True)\n",
        "        if r.status_code == 200:\n",
        "            path = 'gen/' + str(code[5]) + '/page' + str(page) + '.jpg'\n",
        "            urllib.request.urlretrieve(url, path)\n",
        "        else:\n",
        "            break\n",
        "\n",
        "code = []\n",
        "with open('liste.csv', newline='', encoding=\"utf8\") as csvfile:\n",
        "    spamreader = csv.reader(csvfile, delimiter=',')\n",
        "    for row in spamreader:\n",
        "        code.append(row)\n",
        "\n",
        "threads = []\n",
        "for numcode in range(len(code)):\n",
        "    os.mkdir('gen/' + str(code[numcode][5]))\n",
        "    t = threading.Thread(target=download_images, args=(code[numcode],))\n",
        "    threads.append(t)\n",
        "    t.start()\n",
        "\n",
        "for t in threads:\n",
        "    t.join()\n",
        "\n",
        "print(\"Toutes les images ont été téléchargées avec succès\") \n"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "glMJLK-btM1X",
        "outputId": "4d31ec89-f7b7-4b21-ff88-5ab30a9b4707"
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "cp -r /content/gen.zip /content/gen2.zip\n"
      ],
      "metadata": {
        "id": "mb_7KJLIxPo-"
      },
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!zip -r gen.zip /content/gen/\n"
      ],
      "metadata": {
        "id": "LP8SCsPr2pLm"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}