From 55931d1343b4701e15b8acd18523e0a4bdbef113 Mon Sep 17 00:00:00 2001
From: kendrick <herzbergkendrick@gmail.com>
Date: Wed, 22 Jun 2022 10:47:18 +0200
Subject: [PATCH] remove file

---
 notebook_gouv/prix_carburant_gouv.ipynb | 396 ------------------------
 1 file changed, 396 deletions(-)
 delete mode 100644 notebook_gouv/prix_carburant_gouv.ipynb

diff --git a/notebook_gouv/prix_carburant_gouv.ipynb b/notebook_gouv/prix_carburant_gouv.ipynb
deleted file mode 100644
index 841f872..0000000
--- a/notebook_gouv/prix_carburant_gouv.ipynb
+++ /dev/null
@@ -1,396 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "d60999c6-2ae5-430b-934c-a95d309a496c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import zipfile\n",
-    "import os\n",
-    "import xml.etree.ElementTree as ET\n",
-    "import csv\n",
-    "import time\n",
-    "from urllib.request import urlretrieve\n",
-    "from datetime import date\n",
-    "from calendar import monthrange\n",
-    "\n",
-    "import pandas as pd\n",
-    "import requests"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "bbf067e2-95d6-4375-93f2-41ef842893b0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#recupération des bases de donnée sur le site du gouvernement.\n",
-    "def recuperation_xml(date_debut,date_fin):\n",
-    "    for date in range(date_debut, date_fin +1, 1):\n",
-    "        directory_to_extract_to = os.path.join(\"unzip_file\")\n",
-    "        path_to_zip_file  = os.path.join(\"zip_file\",f\"PrixCarburants_annuel_{date}.zip\")\n",
-    "        urlretrieve(f\"https://donnees.roulez-eco.fr/opendata/annee/{date}\", path_to_zip_file)\n",
-    "        with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:\n",
-    "            zip_ref.extractall(directory_to_extract_to)\n",
-    "#recuperation_xml(2007,2021)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "6c27528f-fbbd-4c34-86fe-a904c8181f77",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# utilisation de l'API de adress.data.gouv.fr pour passer de la latitude et longitude, au citycode\n",
-    "def citycode_from_lat_long(longitude,latitude):\n",
-    "    url = f\"https://api-adresse.data.gouv.fr/reverse/?lon={longitude}&lat={latitude}\"\n",
-    "    response = requests.get(url)\n",
-    "    contenu = response.json() \n",
-    "    features = contenu['features']\n",
-    "    if len(features) == 0:\n",
-    "        return None\n",
-    "    else:\n",
-    "        citycode = contenu['features'][0]['properties']['citycode']\n",
-    "        return citycode"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "d67ca228-3db6-446b-bcac-f1efafd129f6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# passage du citycode au code du departement\n",
-    "def code_departement_from_citycode(citycode):\n",
-    "    if citycode[ : 2] >= '97':\n",
-    "        code_departement = citycode[ : 3]\n",
-    "    else:\n",
-    "        code_departement = citycode[ : 2]\n",
-    "    return code_departement"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "e8b5e2f4-2095-4c8f-a11d-d11de4cff76c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# passage du code postal au code du departement\n",
-    "def code_departement_from_code_postal(code_postal):\n",
-    "    if code_postal == '99999':\n",
-    "        return None\n",
-    "    elif code_postal[ : 2] >= '97':\n",
-    "        code_departement = code_postal[ : 3]\n",
-    "    elif code_postal[ : 3] in [\"200\",\"201\"] :\n",
-    "        code_departement = \"2A\"\n",
-    "    elif code_postal[ : 3] in [\"202\",\"206\"]:\n",
-    "        code_departement = \"2B\"\n",
-    "    else:\n",
-    "        code_departement = code_postal[ : 2]     \n",
-    "    return code_departement"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "64a0d8fc-649a-4710-839e-416706a5f712",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# passage du code du departement au code région en utilisant l'API Métadonnées - V1 de l'INSEE\n",
-    "# documentation à API nomenclatures géographiques Insee\n",
-    "# attention, la clé doit être réactualisé tous les 7 jours...\n",
-    "# l'API est limité à 30 requêtes par minute\n",
-    "def code_region_from_code_departement(code_departement,date):\n",
-    "    headers = {\n",
-    "        'Accept': 'application/json',\n",
-    "        'Authorization': 'Bearer 82590123-79ba-3b05-ad0c-fdfe657eaf7a', #Le changement est ici\n",
-    "    }\n",
-    "    params = {\n",
-    "        'date': date,\n",
-    "    }\n",
-    "    response = requests.get(f'https://api.insee.fr/metadonnees/V1/geo/departement/{code_departement}/ascendants', params=params, headers=headers)\n",
-    "    contenu = response.json()\n",
-    "    time.sleep(2.1)\n",
-    "    if isinstance(contenu,dict):\n",
-    "        print(contenu)\n",
-    "    return contenu[0]['code']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "c5f67bd6-5cf9-4e09-a587-f4b2454f4618",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Les APIs sont relativement fragile, il arrive qu'il y ai des erreurs 500 ou 502. \n",
-    "#Dans le cas là if faut supprimer l'année qui était en train de boucler de \"prix_by_region\".\n",
-    "#Il faut ensuite recommencer la boucle à partir de cette date.\n",
-    "def debug_if_error_500(date_debut,date_fin):\n",
-    "    for region, prix_by_carburant in prix_by_region.items():\n",
-    "        for carburant,prix_by_annee in prix_by_carburant.items():\n",
-    "            for annee in range(date_debut,date_fin+1):\n",
-    "                if annee in prix_by_annee:\n",
-    "                    del prix_by_annee[annee]\n",
-    "debug_if_error_500(2013,2013)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "14979ff2-770a-4a6c-8780-13a76a98512a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tree = ET.parse('unzip_file/PrixCarburants_annuel_2021.xml')\n",
-    "pdv_liste = tree.getroot()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "id": "bb42e6c2-f9e8-49da-a372-88b9b869993b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "citycode_lat_long = {} \n",
-    "prix_by_region = {}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "b8a5473c-902a-4d1f-9318-ad52e425cf3e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#prix_by_region"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2cd9550a-5c9b-4787-a372-d4f8309eaf9d",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2007\n"
-     ]
-    }
-   ],
-   "source": [
-    "#boucle principale, qui récupére les données des fichiers XML,\n",
-    "#trouve le code région de chaque station, \n",
-    "#récupère les données importantes, dont le prix par jour, par carburant, par station,\n",
-    "#fait la moyenne par jour \n",
-    "for annee in range(2007,2022):\n",
-    "    print(annee)\n",
-    "    tree = ET.parse(f'unzip_file/PrixCarburants_annuel_{annee}.xml')\n",
-    "    pdv_liste = tree.getroot()\n",
-    "    date = f'{annee}-01-01'\n",
-    "    region = {}    \n",
-    "    for pdv in pdv_liste:\n",
-    "        longitude = pdv.attrib.get('longitude')\n",
-    "        latitude = pdv.attrib.get('latitude')\n",
-    "        citycode = None\n",
-    "        if latitude and longitude:\n",
-    "            lat_long = f\"{latitude},{longitude}\"\n",
-    "            citycode = citycode_lat_long.get(lat_long)\n",
-    "            if citycode is None:\n",
-    "                citycode = citycode_from_lat_long(float(longitude)/100000,float(latitude)/100000)\n",
-    "                if citycode is not None:\n",
-    "                    citycode_lat_long[lat_long] = citycode\n",
-    "        code_departement = (\n",
-    "            code_departement_from_code_postal(pdv.attrib['cp'])\n",
-    "            if citycode is None\n",
-    "            else code_departement_from_citycode(citycode)\n",
-    "        )\n",
-    "        if code_departement is None:\n",
-    "            print('code_departement is None')\n",
-    "            continue\n",
-    "        code_region = region.get(code_departement) \n",
-    "        if code_region is None:\n",
-    "            code_region = code_region_from_code_departement(code_departement,date)\n",
-    "            region[code_departement]= code_region\n",
-    "        for prix_element in pdv:\n",
-    "            if prix_element.tag != 'prix':\n",
-    "                continue\n",
-    "            if prix_element.attrib.get('maj') is None:\n",
-    "                continue\n",
-    "            if prix_element.attrib.get('nom') is None:\n",
-    "                continue\n",
-    "            if prix_element.attrib.get('valeur') is None:\n",
-    "                continue\n",
-    "            prix_by_carburant = prix_by_region.setdefault(code_region,{})\n",
-    "#            prix_by_carburant = prix_by_region.get(code_region)\n",
-    "#            if prix_by_carburant is None:\n",
-    "#                prix_by_carburant = prix_by_region[code_region] = {}\n",
-    "            if 'T' in prix_element.attrib['maj']:\n",
-    "                date_prix = prix_element.attrib['maj'].split('T')[0]\n",
-    "            else:\n",
-    "                date_prix = prix_element.attrib['maj'].split(' ')[0]\n",
-    "            annee_prix, mois_prix, jour_prix = date_prix.split('-')\n",
-    "            annee_prix, mois_prix, jour_prix = int(annee_prix), int(mois_prix), int(jour_prix)\n",
-    "            prix_by_annee = prix_by_carburant.setdefault(prix_element.attrib['nom'],{})\n",
-    "            prix_by_mois = prix_by_annee.setdefault(annee_prix,{})\n",
-    "            prix_by_jour = prix_by_mois.setdefault(mois_prix,{})\n",
-    "            prix_by_station = prix_by_jour.setdefault(jour_prix,{})\n",
-    "            prix_by_station[pdv.attrib['id']] = prix_element.attrib['valeur']\n",
-    "            \n",
-    "    for region, prix_by_carburant in prix_by_region.items():\n",
-    "        stations = set()\n",
-    "        prix_by_carburant = prix_by_region[region] \n",
-    "        for carburant,prix_by_annee in prix_by_carburant.items():\n",
-    "            dernier_prix_par_station = {}\n",
-    "            prix_by_mois = prix_by_annee[annee]\n",
-    "            for mois in range(1,13):\n",
-    "                prix_by_jour = prix_by_mois.setdefault(mois,{})\n",
-    "                dernier_jour = monthrange(annee, mois)[1]\n",
-    "                for jour in range(1,dernier_jour+1):\n",
-    "                    prix_by_station = prix_by_jour.get(jour)\n",
-    "                    stations = stations.union(prix_by_station.keys())\n",
-    "                    for station in stations:\n",
-    "                        prix = prix_by_station.get(station)\n",
-    "                        if prix is None:\n",
-    "                            prix_by_station[station] = dernier_prix_par_station.get(station)\n",
-    "                        else:\n",
-    "                            dernier_prix_par_station[station] = prix\n",
-    "\n",
-    "    for region, prix_by_carburant in prix_by_region.items():\n",
-    "        for carburant, prix_by_annee in prix_by_carburant.items():\n",
-    "            prix_by_mois = prix_by_annee[annee]\n",
-    "            for annee, prix_by_mois in prix_by_annee.items():\n",
-    "                for mois, prix_by_jour in prix_by_mois.items(): \n",
-    "                    for jour, prix_by_station in prix_by_jour.items():\n",
-    "                        count = 0\n",
-    "                        total = 0\n",
-    "                        for station, prix in prix_by_station.items():\n",
-    "                            if prix is not None:\n",
-    "                                total += float(prix)\n",
-    "                                count += 1 \n",
-    "                        prix_by_jour[jour] = round(total / count, 2) if count > 0 else None\n",
-    "\n",
-    "    for region, prix_by_carburant in prix_by_region.items():\n",
-    "        for carburant,prix_by_annee in prix_by_carburant.items():\n",
-    "            prix_by_mois = prix_by_annee[annee]\n",
-    "            count_annee = 0\n",
-    "            total_annee = 0\n",
-    "            for mois,prix_by_jour in prix_by_mois.items():\n",
-    "                count_mois = 0\n",
-    "                total_mois = 0\n",
-    "                for jour, prix in prix_by_jour.items():\n",
-    "                    if prix is not None:\n",
-    "                        count_mois += 1\n",
-    "                        total_mois += prix\n",
-    "                        count_annee += 1\n",
-    "                        total_annee += prix\n",
-    "                if count_mois == 0:\n",
-    "                    prix_by_mois[mois] = None\n",
-    "                else:\n",
-    "                    prix_by_mois[mois] = round(total_mois / count_mois,2)\n",
-    "            if count_annee == 0:\n",
-    "                prix_by_mois['moyenne'] = None\n",
-    "            else:\n",
-    "                prix_by_mois['moyenne'] = round(total_annee / count_annee,2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "0f26bf8a-397d-4522-8409-f9f4681ce870",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Lisse le dictionnaire \"prix_by_region\".\n",
-    "liste_prix_mensuel=[] \n",
-    "liste_prix_annuel=[]\n",
-    "for region, prix_by_carburant in prix_by_region.items():\n",
-    "    for carburant,prix_by_annee in prix_by_carburant.items():\n",
-    "        for annee,prix_by_mois in prix_by_annee.items():\n",
-    "            for mois,prix in prix_by_mois.items():\n",
-    "                if prix_by_mois.values == 'moyenne':\n",
-    "                    pass\n",
-    "                prix_region_mensuel = {\n",
-    "                            \"region\": region,\n",
-    "                            \"carburant\": carburant,\n",
-    "                            \"annee\": annee,\n",
-    "                            \"mois\": mois,\n",
-    "                            \"prix_moyen\": prix,\n",
-    "                        }\n",
-    "                liste_prix_mensuel.append(prix_region_mensuel)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "8a712431-90ff-42bb-9449-3f89bbaf2a15",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#créer la dataframe \"prix_mensuel_final.csv\"\n",
-    "df = pd.DataFrame.from_dict(liste_prix_mensuel)\n",
-    "indexNames = df[ df['mois'] == 'moyenne' ].index\n",
-    "df.drop(indexNames , inplace=True)\n",
-    "df.reset_index(drop = True, inplace = True)\n",
-    "df['prix_moyen'] = round(df['prix_moyen'] * 0.001,2)\n",
-    "df.to_csv (r'prix_mensuel_final.csv', index = False, header=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "0803570e-3b2c-4f0d-bc8b-aa34a3f6dfa6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#créer la dataframe \"prix_annuel_final.csv\"\n",
-    "df = pd.DataFrame.from_dict(liste_prix_mensuel)\n",
-    "indexNames = df[ df['mois'] != 'moyenne' ].index\n",
-    "df.drop(indexNames , inplace=True)\n",
-    "df.reset_index(drop = True, inplace = True)\n",
-    "df.drop(columns=['mois'],inplace=True)\n",
-    "df['prix_moyen'] = round(df['prix_moyen'] * 0.001,2)\n",
-    "df.to_csv (r'prix_annuel_final.csv', index = False, header=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b7816857-8629-4f2a-b49f-3b4ca0c9fc16",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "indirect-taxation-kernel",
-   "language": "python",
-   "name": "indirect-taxation-kernel"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
-- 
GitLab