Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
P
Prix carburants
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
leximpact
Simulateur socio-fiscal
Adaptations OpenFisca
Prix carburants
Commits
55931d13
Commit
55931d13
authored
3 years ago
by
kendrick herzberg
Browse files
Options
Downloads
Patches
Plain Diff
remove file
parent
b0c4e382
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
notebook_gouv/prix_carburant_gouv.ipynb
+0
-396
0 additions, 396 deletions
notebook_gouv/prix_carburant_gouv.ipynb
with
0 additions
and
396 deletions
notebook_gouv/prix_carburant_gouv.ipynb
deleted
100644 → 0
+
0
−
396
View file @
b0c4e382
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "d60999c6-2ae5-430b-934c-a95d309a496c",
"metadata": {},
"outputs": [],
"source": [
"import zipfile\n",
"import os\n",
"import xml.etree.ElementTree as ET\n",
"import csv\n",
"import time\n",
"from urllib.request import urlretrieve\n",
"from datetime import date\n",
"from calendar import monthrange\n",
"\n",
"import pandas as pd\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "bbf067e2-95d6-4375-93f2-41ef842893b0",
"metadata": {},
"outputs": [],
"source": [
"#recupération des bases de donnée sur le site du gouvernement.\n",
"def recuperation_xml(date_debut,date_fin):\n",
" for date in range(date_debut, date_fin +1, 1):\n",
" directory_to_extract_to = os.path.join(\"unzip_file\")\n",
" path_to_zip_file = os.path.join(\"zip_file\",f\"PrixCarburants_annuel_{date}.zip\")\n",
" urlretrieve(f\"https://donnees.roulez-eco.fr/opendata/annee/{date}\", path_to_zip_file)\n",
" with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:\n",
" zip_ref.extractall(directory_to_extract_to)\n",
"#recuperation_xml(2007,2021)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "6c27528f-fbbd-4c34-86fe-a904c8181f77",
"metadata": {},
"outputs": [],
"source": [
"# utilisation de l'API de adress.data.gouv.fr pour passer de la latitude et longitude, au citycode\n",
"def citycode_from_lat_long(longitude,latitude):\n",
" url = f\"https://api-adresse.data.gouv.fr/reverse/?lon={longitude}&lat={latitude}\"\n",
" response = requests.get(url)\n",
" contenu = response.json() \n",
" features = contenu['features']\n",
" if len(features) == 0:\n",
" return None\n",
" else:\n",
" citycode = contenu['features'][0]['properties']['citycode']\n",
" return citycode"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "d67ca228-3db6-446b-bcac-f1efafd129f6",
"metadata": {},
"outputs": [],
"source": [
"# passage du citycode au code du departement\n",
"def code_departement_from_citycode(citycode):\n",
" if citycode[ : 2] >= '97':\n",
" code_departement = citycode[ : 3]\n",
" else:\n",
" code_departement = citycode[ : 2]\n",
" return code_departement"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "e8b5e2f4-2095-4c8f-a11d-d11de4cff76c",
"metadata": {},
"outputs": [],
"source": [
"# passage du code postal au code du departement\n",
"def code_departement_from_code_postal(code_postal):\n",
" if code_postal == '99999':\n",
" return None\n",
" elif code_postal[ : 2] >= '97':\n",
" code_departement = code_postal[ : 3]\n",
" elif code_postal[ : 3] in [\"200\",\"201\"] :\n",
" code_departement = \"2A\"\n",
" elif code_postal[ : 3] in [\"202\",\"206\"]:\n",
" code_departement = \"2B\"\n",
" else:\n",
" code_departement = code_postal[ : 2] \n",
" return code_departement"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "64a0d8fc-649a-4710-839e-416706a5f712",
"metadata": {},
"outputs": [],
"source": [
"# passage du code du departement au code région en utilisant l'API Métadonnées - V1 de l'INSEE\n",
"# documentation à API nomenclatures géographiques Insee\n",
"# attention, la clé doit être réactualisé tous les 7 jours...\n",
"# l'API est limité à 30 requêtes par minute\n",
"def code_region_from_code_departement(code_departement,date):\n",
" headers = {\n",
" 'Accept': 'application/json',\n",
" 'Authorization': 'Bearer 82590123-79ba-3b05-ad0c-fdfe657eaf7a', #Le changement est ici\n",
" }\n",
" params = {\n",
" 'date': date,\n",
" }\n",
" response = requests.get(f'https://api.insee.fr/metadonnees/V1/geo/departement/{code_departement}/ascendants', params=params, headers=headers)\n",
" contenu = response.json()\n",
" time.sleep(2.1)\n",
" if isinstance(contenu,dict):\n",
" print(contenu)\n",
" return contenu[0]['code']"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "c5f67bd6-5cf9-4e09-a587-f4b2454f4618",
"metadata": {},
"outputs": [],
"source": [
"#Les APIs sont relativement fragile, il arrive qu'il y ai des erreurs 500 ou 502. \n",
"#Dans le cas là if faut supprimer l'année qui était en train de boucler de \"prix_by_region\".\n",
"#Il faut ensuite recommencer la boucle à partir de cette date.\n",
"def debug_if_error_500(date_debut,date_fin):\n",
" for region, prix_by_carburant in prix_by_region.items():\n",
" for carburant,prix_by_annee in prix_by_carburant.items():\n",
" for annee in range(date_debut,date_fin+1):\n",
" if annee in prix_by_annee:\n",
" del prix_by_annee[annee]\n",
"debug_if_error_500(2013,2013)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "14979ff2-770a-4a6c-8780-13a76a98512a",
"metadata": {},
"outputs": [],
"source": [
"tree = ET.parse('unzip_file/PrixCarburants_annuel_2021.xml')\n",
"pdv_liste = tree.getroot()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "bb42e6c2-f9e8-49da-a372-88b9b869993b",
"metadata": {},
"outputs": [],
"source": [
"citycode_lat_long = {} \n",
"prix_by_region = {}"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "b8a5473c-902a-4d1f-9318-ad52e425cf3e",
"metadata": {},
"outputs": [],
"source": [
"#prix_by_region"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2cd9550a-5c9b-4787-a372-d4f8309eaf9d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2007\n"
]
}
],
"source": [
"#boucle principale, qui récupére les données des fichiers XML,\n",
"#trouve le code région de chaque station, \n",
"#récupère les données importantes, dont le prix par jour, par carburant, par station,\n",
"#fait la moyenne par jour \n",
"for annee in range(2007,2022):\n",
" print(annee)\n",
" tree = ET.parse(f'unzip_file/PrixCarburants_annuel_{annee}.xml')\n",
" pdv_liste = tree.getroot()\n",
" date = f'{annee}-01-01'\n",
" region = {} \n",
" for pdv in pdv_liste:\n",
" longitude = pdv.attrib.get('longitude')\n",
" latitude = pdv.attrib.get('latitude')\n",
" citycode = None\n",
" if latitude and longitude:\n",
" lat_long = f\"{latitude},{longitude}\"\n",
" citycode = citycode_lat_long.get(lat_long)\n",
" if citycode is None:\n",
" citycode = citycode_from_lat_long(float(longitude)/100000,float(latitude)/100000)\n",
" if citycode is not None:\n",
" citycode_lat_long[lat_long] = citycode\n",
" code_departement = (\n",
" code_departement_from_code_postal(pdv.attrib['cp'])\n",
" if citycode is None\n",
" else code_departement_from_citycode(citycode)\n",
" )\n",
" if code_departement is None:\n",
" print('code_departement is None')\n",
" continue\n",
" code_region = region.get(code_departement) \n",
" if code_region is None:\n",
" code_region = code_region_from_code_departement(code_departement,date)\n",
" region[code_departement]= code_region\n",
" for prix_element in pdv:\n",
" if prix_element.tag != 'prix':\n",
" continue\n",
" if prix_element.attrib.get('maj') is None:\n",
" continue\n",
" if prix_element.attrib.get('nom') is None:\n",
" continue\n",
" if prix_element.attrib.get('valeur') is None:\n",
" continue\n",
" prix_by_carburant = prix_by_region.setdefault(code_region,{})\n",
"# prix_by_carburant = prix_by_region.get(code_region)\n",
"# if prix_by_carburant is None:\n",
"# prix_by_carburant = prix_by_region[code_region] = {}\n",
" if 'T' in prix_element.attrib['maj']:\n",
" date_prix = prix_element.attrib['maj'].split('T')[0]\n",
" else:\n",
" date_prix = prix_element.attrib['maj'].split(' ')[0]\n",
" annee_prix, mois_prix, jour_prix = date_prix.split('-')\n",
" annee_prix, mois_prix, jour_prix = int(annee_prix), int(mois_prix), int(jour_prix)\n",
" prix_by_annee = prix_by_carburant.setdefault(prix_element.attrib['nom'],{})\n",
" prix_by_mois = prix_by_annee.setdefault(annee_prix,{})\n",
" prix_by_jour = prix_by_mois.setdefault(mois_prix,{})\n",
" prix_by_station = prix_by_jour.setdefault(jour_prix,{})\n",
" prix_by_station[pdv.attrib['id']] = prix_element.attrib['valeur']\n",
" \n",
" for region, prix_by_carburant in prix_by_region.items():\n",
" stations = set()\n",
" prix_by_carburant = prix_by_region[region] \n",
" for carburant,prix_by_annee in prix_by_carburant.items():\n",
" dernier_prix_par_station = {}\n",
" prix_by_mois = prix_by_annee[annee]\n",
" for mois in range(1,13):\n",
" prix_by_jour = prix_by_mois.setdefault(mois,{})\n",
" dernier_jour = monthrange(annee, mois)[1]\n",
" for jour in range(1,dernier_jour+1):\n",
" prix_by_station = prix_by_jour.get(jour)\n",
" stations = stations.union(prix_by_station.keys())\n",
" for station in stations:\n",
" prix = prix_by_station.get(station)\n",
" if prix is None:\n",
" prix_by_station[station] = dernier_prix_par_station.get(station)\n",
" else:\n",
" dernier_prix_par_station[station] = prix\n",
"\n",
" for region, prix_by_carburant in prix_by_region.items():\n",
" for carburant, prix_by_annee in prix_by_carburant.items():\n",
" prix_by_mois = prix_by_annee[annee]\n",
" for annee, prix_by_mois in prix_by_annee.items():\n",
" for mois, prix_by_jour in prix_by_mois.items(): \n",
" for jour, prix_by_station in prix_by_jour.items():\n",
" count = 0\n",
" total = 0\n",
" for station, prix in prix_by_station.items():\n",
" if prix is not None:\n",
" total += float(prix)\n",
" count += 1 \n",
" prix_by_jour[jour] = round(total / count, 2) if count > 0 else None\n",
"\n",
" for region, prix_by_carburant in prix_by_region.items():\n",
" for carburant,prix_by_annee in prix_by_carburant.items():\n",
" prix_by_mois = prix_by_annee[annee]\n",
" count_annee = 0\n",
" total_annee = 0\n",
" for mois,prix_by_jour in prix_by_mois.items():\n",
" count_mois = 0\n",
" total_mois = 0\n",
" for jour, prix in prix_by_jour.items():\n",
" if prix is not None:\n",
" count_mois += 1\n",
" total_mois += prix\n",
" count_annee += 1\n",
" total_annee += prix\n",
" if count_mois == 0:\n",
" prix_by_mois[mois] = None\n",
" else:\n",
" prix_by_mois[mois] = round(total_mois / count_mois,2)\n",
" if count_annee == 0:\n",
" prix_by_mois['moyenne'] = None\n",
" else:\n",
" prix_by_mois['moyenne'] = round(total_annee / count_annee,2)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "0f26bf8a-397d-4522-8409-f9f4681ce870",
"metadata": {},
"outputs": [],
"source": [
"#Lisse le dictionnaire \"prix_by_region\".\n",
"liste_prix_mensuel=[] \n",
"liste_prix_annuel=[]\n",
"for region, prix_by_carburant in prix_by_region.items():\n",
" for carburant,prix_by_annee in prix_by_carburant.items():\n",
" for annee,prix_by_mois in prix_by_annee.items():\n",
" for mois,prix in prix_by_mois.items():\n",
" if prix_by_mois.values == 'moyenne':\n",
" pass\n",
" prix_region_mensuel = {\n",
" \"region\": region,\n",
" \"carburant\": carburant,\n",
" \"annee\": annee,\n",
" \"mois\": mois,\n",
" \"prix_moyen\": prix,\n",
" }\n",
" liste_prix_mensuel.append(prix_region_mensuel)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "8a712431-90ff-42bb-9449-3f89bbaf2a15",
"metadata": {},
"outputs": [],
"source": [
"#créer la dataframe \"prix_mensuel_final.csv\"\n",
"df = pd.DataFrame.from_dict(liste_prix_mensuel)\n",
"indexNames = df[ df['mois'] == 'moyenne' ].index\n",
"df.drop(indexNames , inplace=True)\n",
"df.reset_index(drop = True, inplace = True)\n",
"df['prix_moyen'] = round(df['prix_moyen'] * 0.001,2)\n",
"df.to_csv (r'prix_mensuel_final.csv', index = False, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "0803570e-3b2c-4f0d-bc8b-aa34a3f6dfa6",
"metadata": {},
"outputs": [],
"source": [
"#créer la dataframe \"prix_annuel_final.csv\"\n",
"df = pd.DataFrame.from_dict(liste_prix_mensuel)\n",
"indexNames = df[ df['mois'] != 'moyenne' ].index\n",
"df.drop(indexNames , inplace=True)\n",
"df.reset_index(drop = True, inplace = True)\n",
"df.drop(columns=['mois'],inplace=True)\n",
"df['prix_moyen'] = round(df['prix_moyen'] * 0.001,2)\n",
"df.to_csv (r'prix_annuel_final.csv', index = False, header=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b7816857-8629-4f2a-b49f-3b4ca0c9fc16",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "indirect-taxation-kernel",
"language": "python",
"name": "indirect-taxation-kernel"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
%% Cell type:code id:d60999c6-2ae5-430b-934c-a95d309a496c tags:
```
python
import
zipfile
import
os
import
xml.etree.ElementTree
as
ET
import
csv
import
time
from
urllib.request
import
urlretrieve
from
datetime
import
date
from
calendar
import
monthrange
import
pandas
as
pd
import
requests
```
%% Cell type:code id:bbf067e2-95d6-4375-93f2-41ef842893b0 tags:
```
python
#recupération des bases de donnée sur le site du gouvernement.
def
recuperation_xml
(
date_debut
,
date_fin
):
for
date
in
range
(
date_debut
,
date_fin
+
1
,
1
):
directory_to_extract_to
=
os
.
path
.
join
(
"
unzip_file
"
)
path_to_zip_file
=
os
.
path
.
join
(
"
zip_file
"
,
f
"
PrixCarburants_annuel_
{
date
}
.zip
"
)
urlretrieve
(
f
"
https://donnees.roulez-eco.fr/opendata/annee/
{
date
}
"
,
path_to_zip_file
)
with
zipfile
.
ZipFile
(
path_to_zip_file
,
'
r
'
)
as
zip_ref
:
zip_ref
.
extractall
(
directory_to_extract_to
)
#recuperation_xml(2007,2021)
```
%% Cell type:code id:6c27528f-fbbd-4c34-86fe-a904c8181f77 tags:
```
python
# utilisation de l'API de adress.data.gouv.fr pour passer de la latitude et longitude, au citycode
def
citycode_from_lat_long
(
longitude
,
latitude
):
url
=
f
"
https://api-adresse.data.gouv.fr/reverse/?lon=
{
longitude
}
&lat=
{
latitude
}
"
response
=
requests
.
get
(
url
)
contenu
=
response
.
json
()
features
=
contenu
[
'
features
'
]
if
len
(
features
)
==
0
:
return
None
else
:
citycode
=
contenu
[
'
features
'
][
0
][
'
properties
'
][
'
citycode
'
]
return
citycode
```
%% Cell type:code id:d67ca228-3db6-446b-bcac-f1efafd129f6 tags:
```
python
# passage du citycode au code du departement
def
code_departement_from_citycode
(
citycode
):
if
citycode
[
:
2
]
>=
'
97
'
:
code_departement
=
citycode
[
:
3
]
else
:
code_departement
=
citycode
[
:
2
]
return
code_departement
```
%% Cell type:code id:e8b5e2f4-2095-4c8f-a11d-d11de4cff76c tags:
```
python
# passage du code postal au code du departement
def
code_departement_from_code_postal
(
code_postal
):
if
code_postal
==
'
99999
'
:
return
None
elif
code_postal
[
:
2
]
>=
'
97
'
:
code_departement
=
code_postal
[
:
3
]
elif
code_postal
[
:
3
]
in
[
"
200
"
,
"
201
"
]
:
code_departement
=
"
2A
"
elif
code_postal
[
:
3
]
in
[
"
202
"
,
"
206
"
]:
code_departement
=
"
2B
"
else
:
code_departement
=
code_postal
[
:
2
]
return
code_departement
```
%% Cell type:code id:64a0d8fc-649a-4710-839e-416706a5f712 tags:
```
python
# passage du code du departement au code région en utilisant l'API Métadonnées - V1 de l'INSEE
# documentation à API nomenclatures géographiques Insee
# attention, la clé doit être réactualisé tous les 7 jours...
# l'API est limité à 30 requêtes par minute
def
code_region_from_code_departement
(
code_departement
,
date
):
headers
=
{
'
Accept
'
:
'
application/json
'
,
'
Authorization
'
:
'
Bearer 82590123-79ba-3b05-ad0c-fdfe657eaf7a
'
,
#Le changement est ici
}
params
=
{
'
date
'
:
date
,
}
response
=
requests
.
get
(
f
'
https://api.insee.fr/metadonnees/V1/geo/departement/
{
code_departement
}
/ascendants
'
,
params
=
params
,
headers
=
headers
)
contenu
=
response
.
json
()
time
.
sleep
(
2.1
)
if
isinstance
(
contenu
,
dict
):
print
(
contenu
)
return
contenu
[
0
][
'
code
'
]
```
%% Cell type:code id:c5f67bd6-5cf9-4e09-a587-f4b2454f4618 tags:
```
python
#Les APIs sont relativement fragile, il arrive qu'il y ai des erreurs 500 ou 502.
#Dans le cas là if faut supprimer l'année qui était en train de boucler de "prix_by_region".
#Il faut ensuite recommencer la boucle à partir de cette date.
def
debug_if_error_500
(
date_debut
,
date_fin
):
for
region
,
prix_by_carburant
in
prix_by_region
.
items
():
for
carburant
,
prix_by_annee
in
prix_by_carburant
.
items
():
for
annee
in
range
(
date_debut
,
date_fin
+
1
):
if
annee
in
prix_by_annee
:
del
prix_by_annee
[
annee
]
debug_if_error_500
(
2013
,
2013
)
```
%% Cell type:code id:14979ff2-770a-4a6c-8780-13a76a98512a tags:
```
python
tree
=
ET
.
parse
(
'
unzip_file/PrixCarburants_annuel_2021.xml
'
)
pdv_liste
=
tree
.
getroot
()
```
%% Cell type:code id:bb42e6c2-f9e8-49da-a372-88b9b869993b tags:
```
python
citycode_lat_long
=
{}
prix_by_region
=
{}
```
%% Cell type:code id:b8a5473c-902a-4d1f-9318-ad52e425cf3e tags:
```
python
#prix_by_region
```
%% Cell type:code id:2cd9550a-5c9b-4787-a372-d4f8309eaf9d tags:
```
python
#boucle principale, qui récupére les données des fichiers XML,
#trouve le code région de chaque station,
#récupère les données importantes, dont le prix par jour, par carburant, par station,
#fait la moyenne par jour
for
annee
in
range
(
2007
,
2022
):
print
(
annee
)
tree
=
ET
.
parse
(
f
'
unzip_file/PrixCarburants_annuel_
{
annee
}
.xml
'
)
pdv_liste
=
tree
.
getroot
()
date
=
f
'
{
annee
}
-01-01
'
region
=
{}
for
pdv
in
pdv_liste
:
longitude
=
pdv
.
attrib
.
get
(
'
longitude
'
)
latitude
=
pdv
.
attrib
.
get
(
'
latitude
'
)
citycode
=
None
if
latitude
and
longitude
:
lat_long
=
f
"
{
latitude
}
,
{
longitude
}
"
citycode
=
citycode_lat_long
.
get
(
lat_long
)
if
citycode
is
None
:
citycode
=
citycode_from_lat_long
(
float
(
longitude
)
/
100000
,
float
(
latitude
)
/
100000
)
if
citycode
is
not
None
:
citycode_lat_long
[
lat_long
]
=
citycode
code_departement
=
(
code_departement_from_code_postal
(
pdv
.
attrib
[
'
cp
'
])
if
citycode
is
None
else
code_departement_from_citycode
(
citycode
)
)
if
code_departement
is
None
:
print
(
'
code_departement is None
'
)
continue
code_region
=
region
.
get
(
code_departement
)
if
code_region
is
None
:
code_region
=
code_region_from_code_departement
(
code_departement
,
date
)
region
[
code_departement
]
=
code_region
for
prix_element
in
pdv
:
if
prix_element
.
tag
!=
'
prix
'
:
continue
if
prix_element
.
attrib
.
get
(
'
maj
'
)
is
None
:
continue
if
prix_element
.
attrib
.
get
(
'
nom
'
)
is
None
:
continue
if
prix_element
.
attrib
.
get
(
'
valeur
'
)
is
None
:
continue
prix_by_carburant
=
prix_by_region
.
setdefault
(
code_region
,{})
# prix_by_carburant = prix_by_region.get(code_region)
# if prix_by_carburant is None:
# prix_by_carburant = prix_by_region[code_region] = {}
if
'
T
'
in
prix_element
.
attrib
[
'
maj
'
]:
date_prix
=
prix_element
.
attrib
[
'
maj
'
].
split
(
'
T
'
)[
0
]
else
:
date_prix
=
prix_element
.
attrib
[
'
maj
'
].
split
(
'
'
)[
0
]
annee_prix
,
mois_prix
,
jour_prix
=
date_prix
.
split
(
'
-
'
)
annee_prix
,
mois_prix
,
jour_prix
=
int
(
annee_prix
),
int
(
mois_prix
),
int
(
jour_prix
)
prix_by_annee
=
prix_by_carburant
.
setdefault
(
prix_element
.
attrib
[
'
nom
'
],{})
prix_by_mois
=
prix_by_annee
.
setdefault
(
annee_prix
,{})
prix_by_jour
=
prix_by_mois
.
setdefault
(
mois_prix
,{})
prix_by_station
=
prix_by_jour
.
setdefault
(
jour_prix
,{})
prix_by_station
[
pdv
.
attrib
[
'
id
'
]]
=
prix_element
.
attrib
[
'
valeur
'
]
for
region
,
prix_by_carburant
in
prix_by_region
.
items
():
stations
=
set
()
prix_by_carburant
=
prix_by_region
[
region
]
for
carburant
,
prix_by_annee
in
prix_by_carburant
.
items
():
dernier_prix_par_station
=
{}
prix_by_mois
=
prix_by_annee
[
annee
]
for
mois
in
range
(
1
,
13
):
prix_by_jour
=
prix_by_mois
.
setdefault
(
mois
,{})
dernier_jour
=
monthrange
(
annee
,
mois
)[
1
]
for
jour
in
range
(
1
,
dernier_jour
+
1
):
prix_by_station
=
prix_by_jour
.
get
(
jour
)
stations
=
stations
.
union
(
prix_by_station
.
keys
())
for
station
in
stations
:
prix
=
prix_by_station
.
get
(
station
)
if
prix
is
None
:
prix_by_station
[
station
]
=
dernier_prix_par_station
.
get
(
station
)
else
:
dernier_prix_par_station
[
station
]
=
prix
for
region
,
prix_by_carburant
in
prix_by_region
.
items
():
for
carburant
,
prix_by_annee
in
prix_by_carburant
.
items
():
prix_by_mois
=
prix_by_annee
[
annee
]
for
annee
,
prix_by_mois
in
prix_by_annee
.
items
():
for
mois
,
prix_by_jour
in
prix_by_mois
.
items
():
for
jour
,
prix_by_station
in
prix_by_jour
.
items
():
count
=
0
total
=
0
for
station
,
prix
in
prix_by_station
.
items
():
if
prix
is
not
None
:
total
+=
float
(
prix
)
count
+=
1
prix_by_jour
[
jour
]
=
round
(
total
/
count
,
2
)
if
count
>
0
else
None
for
region
,
prix_by_carburant
in
prix_by_region
.
items
():
for
carburant
,
prix_by_annee
in
prix_by_carburant
.
items
():
prix_by_mois
=
prix_by_annee
[
annee
]
count_annee
=
0
total_annee
=
0
for
mois
,
prix_by_jour
in
prix_by_mois
.
items
():
count_mois
=
0
total_mois
=
0
for
jour
,
prix
in
prix_by_jour
.
items
():
if
prix
is
not
None
:
count_mois
+=
1
total_mois
+=
prix
count_annee
+=
1
total_annee
+=
prix
if
count_mois
==
0
:
prix_by_mois
[
mois
]
=
None
else
:
prix_by_mois
[
mois
]
=
round
(
total_mois
/
count_mois
,
2
)
if
count_annee
==
0
:
prix_by_mois
[
'
moyenne
'
]
=
None
else
:
prix_by_mois
[
'
moyenne
'
]
=
round
(
total_annee
/
count_annee
,
2
)
```
%% Output
2007
%% Cell type:code id:0f26bf8a-397d-4522-8409-f9f4681ce870 tags:
```
python
#Lisse le dictionnaire "prix_by_region".
liste_prix_mensuel
=
[]
liste_prix_annuel
=
[]
for
region
,
prix_by_carburant
in
prix_by_region
.
items
():
for
carburant
,
prix_by_annee
in
prix_by_carburant
.
items
():
for
annee
,
prix_by_mois
in
prix_by_annee
.
items
():
for
mois
,
prix
in
prix_by_mois
.
items
():
if
prix_by_mois
.
values
==
'
moyenne
'
:
pass
prix_region_mensuel
=
{
"
region
"
:
region
,
"
carburant
"
:
carburant
,
"
annee
"
:
annee
,
"
mois
"
:
mois
,
"
prix_moyen
"
:
prix
,
}
liste_prix_mensuel
.
append
(
prix_region_mensuel
)
```
%% Cell type:code id:8a712431-90ff-42bb-9449-3f89bbaf2a15 tags:
```
python
#créer la dataframe "prix_mensuel_final.csv"
df
=
pd
.
DataFrame
.
from_dict
(
liste_prix_mensuel
)
indexNames
=
df
[
df
[
'
mois
'
]
==
'
moyenne
'
].
index
df
.
drop
(
indexNames
,
inplace
=
True
)
df
.
reset_index
(
drop
=
True
,
inplace
=
True
)
df
[
'
prix_moyen
'
]
=
round
(
df
[
'
prix_moyen
'
]
*
0.001
,
2
)
df
.
to_csv
(
r
'
prix_mensuel_final.csv
'
,
index
=
False
,
header
=
True
)
```
%% Cell type:code id:0803570e-3b2c-4f0d-bc8b-aa34a3f6dfa6 tags:
```
python
#créer la dataframe "prix_annuel_final.csv"
df
=
pd
.
DataFrame
.
from_dict
(
liste_prix_mensuel
)
indexNames
=
df
[
df
[
'
mois
'
]
!=
'
moyenne
'
].
index
df
.
drop
(
indexNames
,
inplace
=
True
)
df
.
reset_index
(
drop
=
True
,
inplace
=
True
)
df
.
drop
(
columns
=
[
'
mois
'
],
inplace
=
True
)
df
[
'
prix_moyen
'
]
=
round
(
df
[
'
prix_moyen
'
]
*
0.001
,
2
)
df
.
to_csv
(
r
'
prix_annuel_final.csv
'
,
index
=
False
,
header
=
True
)
```
%% Cell type:code id:b7816857-8629-4f2a-b49f-3b4ca0c9fc16 tags:
```
python
```
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment