diff --git a/README.FR.md b/README.FR.md
index 7d306903858cb232c46a8e85f1cfa9598d3ddc0f..90729f52d2e801e23a72b5753a1e11118a0309a5 100644
--- a/README.FR.md
+++ b/README.FR.md
@@ -39,9 +39,8 @@ Ces scripts nécessitent Python3, Jupyterlab, Pandas et requests.
```shell
python3 -m venv .venv
source .venv/bin/activate
-pip install jupyterlab
-pip install pandas
-pip install requests
+# pip install jupyterlab
+pip install pandas requests retrying tqdm
```
### Lancement
@@ -60,6 +59,10 @@ Pour les prix agrégé par région, par moi et par année, ouvrir [notebook_gouv
**REMARQUE:** Les fichiers CSV sont disponible directement sans utiliser le code.
+#### Usage dans OpenFisca France Indirect Taxation
+
+Les fichiers doivent ensuite être copiés dans https://github.com/openfisca/openfisca-france-indirect-taxation/tree/cas-type/openfisca_france_indirect_taxation/assets/prix
+
## Copyright
LexImpact Prix carburants est un logiciel libre sous [licence GNU Affero General Public License](./LICENSE.md).
diff --git a/README.md b/README.md
index 956cd2717a864e952079144c4b5a0f197aa9cce8..2d5032280f54eda993a89625cc60d212b4b6a699 100644
--- a/README.md
+++ b/README.md
@@ -36,8 +36,7 @@ These scripts require Python3, Jupyterlab and Pandas and requests.
python3 -m venv .venv
source .venv/bin/activate
pip install jupyterlab
-pip install pandas
-pip install requests
+pip install pandas requests retrying tqdm
```
### Application
diff --git a/notebook_INSEE/prix_carburant.ipynb b/notebook_INSEE/prix_carburant.ipynb
index 73e65504663abe2d5fb102f353078b98eb274395..14a118d66ac8be8271a521e8191e304e3c3f0e87 100644
--- a/notebook_INSEE/prix_carburant.ipynb
+++ b/notebook_INSEE/prix_carburant.ipynb
@@ -10,21 +10,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
+ "id": "5410c3e2-56f4-4b71-8062-914b4de93ba9",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# !pip install pandas requests"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
"id": "b887ef4a-6f33-4152-af83-daeeffba5a79",
- "metadata": {},
- "outputs": [
- {
- "ename": "",
- "evalue": "",
- "output_type": "error",
- "traceback": [
- "\u001b[1;31mRunning cells with 'Python 3.8.10 64-bit' requires ipykernel package.\n",
- "Run the following command to install 'ipykernel' into the Python environment. \n",
- "Command: '/bin/python3 -m pip install ipykernel -U --user --force-reinstall'"
- ]
- }
- ],
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
"source": [
"import zipfile\n",
"import os\n",
@@ -39,20 +42,10 @@
"cell_type": "code",
"execution_count": 3,
"id": "4268984b-2876-498d-85e1-110f242376f4",
- "metadata": {},
- "outputs": [
- {
- "ename": "FileNotFoundError",
- "evalue": "[Errno 2] No such file or directory: 'prix_litre_mensuel_carburant.csv'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m<ipython-input-3-e86df01e3e85>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Si il y a déjà des fichiers avec ces noms, le script ne les remplaces pas, donc il faut les suprimer au debut.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mremove\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"prix_litre_mensuel_carburant.csv\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mremove\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"prix_hectolitre_mensuel_carburant.csv\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mremove\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"prix_litre_annuel_carburant.csv\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mremove\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"prix_hectolitre_annuel_carburant.csv\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'prix_litre_mensuel_carburant.csv'"
- ]
- }
- ],
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
"source": [
"# Si il y a déjà des fichiers avec ces noms, le script ne les remplaces pas, donc il faut les suprimer au debut.\n",
"os.remove(\"prix_litre_mensuel_carburant.csv\")\n",
@@ -65,7 +58,9 @@
"cell_type": "code",
"execution_count": 4,
"id": "36d19b81-57cc-4b3a-be5c-2c5813367b6f",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"carburants = {\"diesel\":\"000442588\",\n",
@@ -77,9 +72,11 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 5,
"id": "5fbf5893-de0b-4522-91e6-49bf992cb768",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"def get_df(carburant,id_url):\n",
@@ -96,7 +93,9 @@
"cell_type": "code",
"execution_count": 6,
"id": "1925b873-dc73-4d8b-b837-463f5f846f47",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"def creat_df(carburant,df):\n",
@@ -111,7 +110,9 @@
"cell_type": "code",
"execution_count": 7,
"id": "f1f6d31d-ed11-4d44-9a2c-d11a0ca2b8fb",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"def clean_df(carburant,df):\n",
@@ -121,39 +122,41 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 8,
"id": "f566506b-95b9-4390-a6a6-f4ebfd844469",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "<ipython-input-6-d23337e5192c>:5: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_1004711/3305679562.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df['date'] = df['date'].astype(str) + '-01'\n",
- "<ipython-input-6-d23337e5192c>:5: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_1004711/3305679562.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df['date'] = df['date'].astype(str) + '-01'\n",
- "<ipython-input-6-d23337e5192c>:5: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_1004711/3305679562.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df['date'] = df['date'].astype(str) + '-01'\n",
- "<ipython-input-6-d23337e5192c>:5: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_1004711/3305679562.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df['date'] = df['date'].astype(str) + '-01'\n",
- "<ipython-input-6-d23337e5192c>:5: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_1004711/3305679562.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@@ -174,9 +177,11 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 9,
"id": "a7354947-55e1-47e5-9fcd-7be37b00131c",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [
{
"data": {
@@ -210,48 +215,48 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
- " <td>2022-04-01</td>\n",
- " <td>1.87</td>\n",
- " <td>1.87</td>\n",
- " <td>1.82</td>\n",
+ " <td>2023-08-01</td>\n",
+ " <td>1.85</td>\n",
+ " <td>1.99</td>\n",
+ " <td>1.94</td>\n",
" <td>NaN</td>\n",
- " <td>1.76</td>\n",
+ " <td>1.93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
- " <td>2022-03-01</td>\n",
- " <td>2.02</td>\n",
- " <td>2.05</td>\n",
- " <td>2.00</td>\n",
+ " <td>2023-07-01</td>\n",
+ " <td>1.72</td>\n",
+ " <td>1.91</td>\n",
+ " <td>1.85</td>\n",
" <td>NaN</td>\n",
- " <td>1.96</td>\n",
+ " <td>1.84</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
- " <td>2022-02-01</td>\n",
- " <td>1.72</td>\n",
- " <td>1.86</td>\n",
- " <td>1.80</td>\n",
+ " <td>2023-06-01</td>\n",
+ " <td>1.70</td>\n",
+ " <td>1.94</td>\n",
+ " <td>1.88</td>\n",
" <td>NaN</td>\n",
- " <td>1.77</td>\n",
+ " <td>1.86</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
- " <td>2022-01-01</td>\n",
- " <td>1.63</td>\n",
- " <td>1.77</td>\n",
- " <td>1.71</td>\n",
- " <td>NaN</td>\n",
+ " <td>2023-05-01</td>\n",
" <td>1.69</td>\n",
+ " <td>1.93</td>\n",
+ " <td>1.87</td>\n",
+ " <td>NaN</td>\n",
+ " <td>1.85</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
- " <td>2021-12-01</td>\n",
- " <td>1.54</td>\n",
- " <td>1.70</td>\n",
- " <td>1.64</td>\n",
+ " <td>2023-04-01</td>\n",
+ " <td>1.81</td>\n",
+ " <td>2.00</td>\n",
+ " <td>1.95</td>\n",
" <td>NaN</td>\n",
- " <td>1.61</td>\n",
+ " <td>1.93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
@@ -263,7 +268,7 @@
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>359</th>\n",
+ " <th>375</th>\n",
" <td>1992-05-01</td>\n",
" <td>0.54</td>\n",
" <td>0.78</td>\n",
@@ -272,7 +277,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>360</th>\n",
+ " <th>376</th>\n",
" <td>1992-04-01</td>\n",
" <td>0.53</td>\n",
" <td>0.77</td>\n",
@@ -281,7 +286,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>361</th>\n",
+ " <th>377</th>\n",
" <td>1992-03-01</td>\n",
" <td>0.54</td>\n",
" <td>0.77</td>\n",
@@ -290,7 +295,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>362</th>\n",
+ " <th>378</th>\n",
" <td>1992-02-01</td>\n",
" <td>0.54</td>\n",
" <td>0.78</td>\n",
@@ -299,7 +304,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>363</th>\n",
+ " <th>379</th>\n",
" <td>1992-01-01</td>\n",
" <td>0.54</td>\n",
" <td>0.78</td>\n",
@@ -309,40 +314,40 @@
" </tr>\n",
" </tbody>\n",
"</table>\n",
- "<p>364 rows × 6 columns</p>\n",
+ "<p>380 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" date diesel_ttc super_98_ttc super_95_ttc super_plombe_ttc \\\n",
- "0 2022-04-01 1.87 1.87 1.82 NaN \n",
- "1 2022-03-01 2.02 2.05 2.00 NaN \n",
- "2 2022-02-01 1.72 1.86 1.80 NaN \n",
- "3 2022-01-01 1.63 1.77 1.71 NaN \n",
- "4 2021-12-01 1.54 1.70 1.64 NaN \n",
+ "0 2023-08-01 1.85 1.99 1.94 NaN \n",
+ "1 2023-07-01 1.72 1.91 1.85 NaN \n",
+ "2 2023-06-01 1.70 1.94 1.88 NaN \n",
+ "3 2023-05-01 1.69 1.93 1.87 NaN \n",
+ "4 2023-04-01 1.81 2.00 1.95 NaN \n",
".. ... ... ... ... ... \n",
- "359 1992-05-01 0.54 0.78 NaN 0.81 \n",
- "360 1992-04-01 0.53 0.77 NaN 0.81 \n",
- "361 1992-03-01 0.54 0.77 NaN 0.81 \n",
- "362 1992-02-01 0.54 0.78 NaN 0.81 \n",
- "363 1992-01-01 0.54 0.78 NaN 0.80 \n",
+ "375 1992-05-01 0.54 0.78 NaN 0.81 \n",
+ "376 1992-04-01 0.53 0.77 NaN 0.81 \n",
+ "377 1992-03-01 0.54 0.77 NaN 0.81 \n",
+ "378 1992-02-01 0.54 0.78 NaN 0.81 \n",
+ "379 1992-01-01 0.54 0.78 NaN 0.80 \n",
"\n",
" super_95_e10_ttc \n",
- "0 1.76 \n",
- "1 1.96 \n",
- "2 1.77 \n",
- "3 1.69 \n",
- "4 1.61 \n",
+ "0 1.93 \n",
+ "1 1.84 \n",
+ "2 1.86 \n",
+ "3 1.85 \n",
+ "4 1.93 \n",
".. ... \n",
- "359 NaN \n",
- "360 NaN \n",
- "361 NaN \n",
- "362 NaN \n",
- "363 NaN \n",
+ "375 NaN \n",
+ "376 NaN \n",
+ "377 NaN \n",
+ "378 NaN \n",
+ "379 NaN \n",
"\n",
- "[364 rows x 6 columns]"
+ "[380 rows x 6 columns]"
]
},
- "execution_count": 11,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -354,27 +359,29 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 10,
"id": "49128573-c7ca-4d64-bac1-d84edcf65742",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
- "Int64Index: 364 entries, 0 to 363\n",
+ "RangeIndex: 380 entries, 0 to 379\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
- " 0 date 364 non-null object \n",
- " 1 diesel_ttc 364 non-null float64\n",
- " 2 super_98_ttc 364 non-null float64\n",
- " 3 super_95_ttc 244 non-null float64\n",
+ " 0 date 380 non-null object \n",
+ " 1 diesel_ttc 380 non-null float64\n",
+ " 2 super_98_ttc 380 non-null float64\n",
+ " 3 super_95_ttc 260 non-null float64\n",
" 4 super_plombe_ttc 157 non-null float64\n",
- " 5 super_95_e10_ttc 40 non-null float64\n",
+ " 5 super_95_e10_ttc 56 non-null float64\n",
"dtypes: float64(5), object(1)\n",
- "memory usage: 19.9+ KB\n"
+ "memory usage: 17.9+ KB\n"
]
}
],
@@ -385,9 +392,11 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 11,
"id": "8edc5227-ee31-45fd-8d94-a7e803012312",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [
{
"data": {
@@ -420,13 +429,22 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
+ " <th>33</th>\n",
+ " <td>2023</td>\n",
+ " <td>1.80</td>\n",
+ " <td>1.96</td>\n",
+ " <td>1.90</td>\n",
+ " <td>NaN</td>\n",
+ " <td>1.88</td>\n",
+ " </tr>\n",
+ " <tr>\n",
" <th>32</th>\n",
" <td>2022</td>\n",
- " <td>1.81</td>\n",
- " <td>1.89</td>\n",
+ " <td>1.86</td>\n",
+ " <td>1.88</td>\n",
" <td>1.83</td>\n",
" <td>NaN</td>\n",
- " <td>1.80</td>\n",
+ " <td>1.78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
@@ -704,7 +722,8 @@
],
"text/plain": [
" date diesel_ttc super_98_ttc super_95_ttc super_plombe_ttc \\\n",
- "32 2022 1.81 1.89 1.83 NaN \n",
+ "33 2023 1.80 1.96 1.90 NaN \n",
+ "32 2022 1.86 1.88 1.83 NaN \n",
"31 2021 1.44 1.62 1.56 NaN \n",
"30 2020 1.27 1.42 1.37 NaN \n",
"29 2019 1.44 1.56 1.51 NaN \n",
@@ -737,7 +756,8 @@
"2 1992 0.54 0.78 NaN 0.81 \n",
"\n",
" super_95_e10_ttc \n",
- "32 1.80 \n",
+ "33 1.88 \n",
+ "32 1.78 \n",
"31 1.54 \n",
"30 1.35 \n",
"29 1.49 \n",
@@ -770,7 +790,7 @@
"2 NaN "
]
},
- "execution_count": 13,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -796,27 +816,29 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 12,
"id": "ca94602c-9898-479d-aa7b-1ede366e6850",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
- "Int64Index: 31 entries, 32 to 2\n",
+ "Index: 32 entries, 33 to 2\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
- " 0 date 31 non-null object \n",
- " 1 diesel_ttc 31 non-null float64\n",
- " 2 super_98_ttc 31 non-null float64\n",
- " 3 super_95_ttc 21 non-null float64\n",
+ " 0 date 32 non-null object \n",
+ " 1 diesel_ttc 32 non-null float64\n",
+ " 2 super_98_ttc 32 non-null float64\n",
+ " 3 super_95_ttc 22 non-null float64\n",
" 4 super_plombe_ttc 14 non-null float64\n",
- " 5 super_95_e10_ttc 4 non-null float64\n",
+ " 5 super_95_e10_ttc 5 non-null float64\n",
"dtypes: float64(5), object(1)\n",
- "memory usage: 1.7+ KB\n"
+ "memory usage: 1.8+ KB\n"
]
}
],
@@ -827,9 +849,11 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 13,
"id": "94720f2b-1184-4b0d-8d74-e03c5b80fd39",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [
{
"data": {
@@ -863,48 +887,48 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
- " <td>2022-04-01</td>\n",
- " <td>1.87</td>\n",
- " <td>1.87</td>\n",
- " <td>1.82</td>\n",
+ " <td>2023-08-01</td>\n",
+ " <td>1.85</td>\n",
+ " <td>1.99</td>\n",
+ " <td>1.94</td>\n",
" <td>NaN</td>\n",
- " <td>1.76</td>\n",
+ " <td>1.93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
- " <td>2022-03-01</td>\n",
- " <td>2.02</td>\n",
- " <td>2.05</td>\n",
- " <td>2.00</td>\n",
+ " <td>2023-07-01</td>\n",
+ " <td>1.72</td>\n",
+ " <td>1.91</td>\n",
+ " <td>1.85</td>\n",
" <td>NaN</td>\n",
- " <td>1.96</td>\n",
+ " <td>1.84</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
- " <td>2022-02-01</td>\n",
- " <td>1.72</td>\n",
- " <td>1.86</td>\n",
- " <td>1.80</td>\n",
+ " <td>2023-06-01</td>\n",
+ " <td>1.70</td>\n",
+ " <td>1.94</td>\n",
+ " <td>1.88</td>\n",
" <td>NaN</td>\n",
- " <td>1.77</td>\n",
+ " <td>1.86</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
- " <td>2022-01-01</td>\n",
- " <td>1.63</td>\n",
- " <td>1.77</td>\n",
- " <td>1.71</td>\n",
- " <td>NaN</td>\n",
+ " <td>2023-05-01</td>\n",
" <td>1.69</td>\n",
+ " <td>1.93</td>\n",
+ " <td>1.87</td>\n",
+ " <td>NaN</td>\n",
+ " <td>1.85</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
- " <td>2021-12-01</td>\n",
- " <td>1.54</td>\n",
- " <td>1.70</td>\n",
- " <td>1.64</td>\n",
+ " <td>2023-04-01</td>\n",
+ " <td>1.81</td>\n",
+ " <td>2.00</td>\n",
+ " <td>1.95</td>\n",
" <td>NaN</td>\n",
- " <td>1.61</td>\n",
+ " <td>1.93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
@@ -916,7 +940,7 @@
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>359</th>\n",
+ " <th>375</th>\n",
" <td>1992-05-01</td>\n",
" <td>0.54</td>\n",
" <td>0.78</td>\n",
@@ -925,7 +949,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>360</th>\n",
+ " <th>376</th>\n",
" <td>1992-04-01</td>\n",
" <td>0.53</td>\n",
" <td>0.77</td>\n",
@@ -934,7 +958,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>361</th>\n",
+ " <th>377</th>\n",
" <td>1992-03-01</td>\n",
" <td>0.54</td>\n",
" <td>0.77</td>\n",
@@ -943,7 +967,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>362</th>\n",
+ " <th>378</th>\n",
" <td>1992-02-01</td>\n",
" <td>0.54</td>\n",
" <td>0.78</td>\n",
@@ -952,7 +976,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>363</th>\n",
+ " <th>379</th>\n",
" <td>1992-01-01</td>\n",
" <td>0.54</td>\n",
" <td>0.78</td>\n",
@@ -962,40 +986,40 @@
" </tr>\n",
" </tbody>\n",
"</table>\n",
- "<p>364 rows × 6 columns</p>\n",
+ "<p>380 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" date diesel_ttc super_98_ttc super_95_ttc super_plombe_ttc \\\n",
- "0 2022-04-01 1.87 1.87 1.82 NaN \n",
- "1 2022-03-01 2.02 2.05 2.00 NaN \n",
- "2 2022-02-01 1.72 1.86 1.80 NaN \n",
- "3 2022-01-01 1.63 1.77 1.71 NaN \n",
- "4 2021-12-01 1.54 1.70 1.64 NaN \n",
+ "0 2023-08-01 1.85 1.99 1.94 NaN \n",
+ "1 2023-07-01 1.72 1.91 1.85 NaN \n",
+ "2 2023-06-01 1.70 1.94 1.88 NaN \n",
+ "3 2023-05-01 1.69 1.93 1.87 NaN \n",
+ "4 2023-04-01 1.81 2.00 1.95 NaN \n",
".. ... ... ... ... ... \n",
- "359 1992-05-01 0.54 0.78 NaN 0.81 \n",
- "360 1992-04-01 0.53 0.77 NaN 0.81 \n",
- "361 1992-03-01 0.54 0.77 NaN 0.81 \n",
- "362 1992-02-01 0.54 0.78 NaN 0.81 \n",
- "363 1992-01-01 0.54 0.78 NaN 0.80 \n",
+ "375 1992-05-01 0.54 0.78 NaN 0.81 \n",
+ "376 1992-04-01 0.53 0.77 NaN 0.81 \n",
+ "377 1992-03-01 0.54 0.77 NaN 0.81 \n",
+ "378 1992-02-01 0.54 0.78 NaN 0.81 \n",
+ "379 1992-01-01 0.54 0.78 NaN 0.80 \n",
"\n",
" super_95_e10_ttc \n",
- "0 1.76 \n",
- "1 1.96 \n",
- "2 1.77 \n",
- "3 1.69 \n",
- "4 1.61 \n",
+ "0 1.93 \n",
+ "1 1.84 \n",
+ "2 1.86 \n",
+ "3 1.85 \n",
+ "4 1.93 \n",
".. ... \n",
- "359 NaN \n",
- "360 NaN \n",
- "361 NaN \n",
- "362 NaN \n",
- "363 NaN \n",
+ "375 NaN \n",
+ "376 NaN \n",
+ "377 NaN \n",
+ "378 NaN \n",
+ "379 NaN \n",
"\n",
- "[364 rows x 6 columns]"
+ "[380 rows x 6 columns]"
]
},
- "execution_count": 15,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -1006,9 +1030,11 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 14,
"id": "02992af3-7110-4f17-9ac2-f4f0f2fdc4de",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"df_mens.loc[:,['diesel_ttc','super_98_ttc','super_95_ttc','super_plombe_ttc','super_95_e10_ttc']] *= 100\n",
@@ -1018,9 +1044,11 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 15,
"id": "62b9b211-5aa0-4180-8eea-33b6bc2ac4eb",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [
{
"data": {
@@ -1054,48 +1082,48 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
- " <td>2022-04-01</td>\n",
- " <td>187.0</td>\n",
- " <td>187.0</td>\n",
- " <td>182.0</td>\n",
+ " <td>2023-08-01</td>\n",
+ " <td>185.0</td>\n",
+ " <td>199.0</td>\n",
+ " <td>194.0</td>\n",
" <td>NaN</td>\n",
- " <td>176.0</td>\n",
+ " <td>193.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
- " <td>2022-03-01</td>\n",
- " <td>202.0</td>\n",
- " <td>205.0</td>\n",
- " <td>200.0</td>\n",
+ " <td>2023-07-01</td>\n",
+ " <td>172.0</td>\n",
+ " <td>191.0</td>\n",
+ " <td>185.0</td>\n",
" <td>NaN</td>\n",
- " <td>196.0</td>\n",
+ " <td>184.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
- " <td>2022-02-01</td>\n",
- " <td>172.0</td>\n",
- " <td>186.0</td>\n",
- " <td>180.0</td>\n",
+ " <td>2023-06-01</td>\n",
+ " <td>170.0</td>\n",
+ " <td>194.0</td>\n",
+ " <td>188.0</td>\n",
" <td>NaN</td>\n",
- " <td>177.0</td>\n",
+ " <td>186.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
- " <td>2022-01-01</td>\n",
- " <td>163.0</td>\n",
- " <td>177.0</td>\n",
- " <td>171.0</td>\n",
- " <td>NaN</td>\n",
+ " <td>2023-05-01</td>\n",
" <td>169.0</td>\n",
+ " <td>193.0</td>\n",
+ " <td>187.0</td>\n",
+ " <td>NaN</td>\n",
+ " <td>185.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
- " <td>2021-12-01</td>\n",
- " <td>154.0</td>\n",
- " <td>170.0</td>\n",
- " <td>164.0</td>\n",
+ " <td>2023-04-01</td>\n",
+ " <td>181.0</td>\n",
+ " <td>200.0</td>\n",
+ " <td>195.0</td>\n",
" <td>NaN</td>\n",
- " <td>161.0</td>\n",
+ " <td>193.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
@@ -1107,7 +1135,7 @@
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>359</th>\n",
+ " <th>375</th>\n",
" <td>1992-05-01</td>\n",
" <td>54.0</td>\n",
" <td>78.0</td>\n",
@@ -1116,7 +1144,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>360</th>\n",
+ " <th>376</th>\n",
" <td>1992-04-01</td>\n",
" <td>53.0</td>\n",
" <td>77.0</td>\n",
@@ -1125,7 +1153,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>361</th>\n",
+ " <th>377</th>\n",
" <td>1992-03-01</td>\n",
" <td>54.0</td>\n",
" <td>77.0</td>\n",
@@ -1134,7 +1162,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>362</th>\n",
+ " <th>378</th>\n",
" <td>1992-02-01</td>\n",
" <td>54.0</td>\n",
" <td>78.0</td>\n",
@@ -1143,7 +1171,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>363</th>\n",
+ " <th>379</th>\n",
" <td>1992-01-01</td>\n",
" <td>54.0</td>\n",
" <td>78.0</td>\n",
@@ -1153,40 +1181,40 @@
" </tr>\n",
" </tbody>\n",
"</table>\n",
- "<p>364 rows × 6 columns</p>\n",
+ "<p>380 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" date diesel_ttc super_98_ttc super_95_ttc super_plombe_ttc \\\n",
- "0 2022-04-01 187.0 187.0 182.0 NaN \n",
- "1 2022-03-01 202.0 205.0 200.0 NaN \n",
- "2 2022-02-01 172.0 186.0 180.0 NaN \n",
- "3 2022-01-01 163.0 177.0 171.0 NaN \n",
- "4 2021-12-01 154.0 170.0 164.0 NaN \n",
+ "0 2023-08-01 185.0 199.0 194.0 NaN \n",
+ "1 2023-07-01 172.0 191.0 185.0 NaN \n",
+ "2 2023-06-01 170.0 194.0 188.0 NaN \n",
+ "3 2023-05-01 169.0 193.0 187.0 NaN \n",
+ "4 2023-04-01 181.0 200.0 195.0 NaN \n",
".. ... ... ... ... ... \n",
- "359 1992-05-01 54.0 78.0 NaN 81.0 \n",
- "360 1992-04-01 53.0 77.0 NaN 81.0 \n",
- "361 1992-03-01 54.0 77.0 NaN 81.0 \n",
- "362 1992-02-01 54.0 78.0 NaN 81.0 \n",
- "363 1992-01-01 54.0 78.0 NaN 80.0 \n",
+ "375 1992-05-01 54.0 78.0 NaN 81.0 \n",
+ "376 1992-04-01 53.0 77.0 NaN 81.0 \n",
+ "377 1992-03-01 54.0 77.0 NaN 81.0 \n",
+ "378 1992-02-01 54.0 78.0 NaN 81.0 \n",
+ "379 1992-01-01 54.0 78.0 NaN 80.0 \n",
"\n",
" super_95_e10_ttc \n",
- "0 176.0 \n",
- "1 196.0 \n",
- "2 177.0 \n",
- "3 169.0 \n",
- "4 161.0 \n",
+ "0 193.0 \n",
+ "1 184.0 \n",
+ "2 186.0 \n",
+ "3 185.0 \n",
+ "4 193.0 \n",
".. ... \n",
- "359 NaN \n",
- "360 NaN \n",
- "361 NaN \n",
- "362 NaN \n",
- "363 NaN \n",
+ "375 NaN \n",
+ "376 NaN \n",
+ "377 NaN \n",
+ "378 NaN \n",
+ "379 NaN \n",
"\n",
- "[364 rows x 6 columns]"
+ "[380 rows x 6 columns]"
]
},
- "execution_count": 17,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -1197,9 +1225,11 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 16,
"id": "372a7fe6-f63d-496b-87b8-171c1888ae81",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"df_ann.loc[:,['diesel_ttc','super_98_ttc','super_95_ttc','super_plombe_ttc','super_95_e10_ttc']] *= 100\n",
@@ -1209,9 +1239,11 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 17,
"id": "96ecd7ec-784e-46d6-92ea-5e53a29984f6",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [
{
"data": {
@@ -1244,13 +1276,22 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
+ " <th>33</th>\n",
+ " <td>2023</td>\n",
+ " <td>180.0</td>\n",
+ " <td>196.0</td>\n",
+ " <td>190.0</td>\n",
+ " <td>NaN</td>\n",
+ " <td>188.0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
" <th>32</th>\n",
" <td>2022</td>\n",
- " <td>181.0</td>\n",
- " <td>189.0</td>\n",
+ " <td>186.0</td>\n",
+ " <td>188.0</td>\n",
" <td>183.0</td>\n",
" <td>NaN</td>\n",
- " <td>180.0</td>\n",
+ " <td>178.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
@@ -1528,7 +1569,8 @@
],
"text/plain": [
" date diesel_ttc super_98_ttc super_95_ttc super_plombe_ttc \\\n",
- "32 2022 181.0 189.0 183.0 NaN \n",
+ "33 2023 180.0 196.0 190.0 NaN \n",
+ "32 2022 186.0 188.0 183.0 NaN \n",
"31 2021 144.0 162.0 156.0 NaN \n",
"30 2020 127.0 142.0 137.0 NaN \n",
"29 2019 144.0 156.0 151.0 NaN \n",
@@ -1561,7 +1603,8 @@
"2 1992 54.0 78.0 NaN 81.0 \n",
"\n",
" super_95_e10_ttc \n",
- "32 180.0 \n",
+ "33 188.0 \n",
+ "32 178.0 \n",
"31 154.0 \n",
"30 135.0 \n",
"29 149.0 \n",
@@ -1594,7 +1637,7 @@
"2 NaN "
]
},
- "execution_count": 19,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -1617,9 +1660,9 @@
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
},
"kernelspec": {
- "display_name": "Python 3.8.10 64-bit",
+ "display_name": "prix-carburant",
"language": "python",
- "name": "python3"
+ "name": "prix-carburant"
},
"language_info": {
"codemirror_mode": {
@@ -1631,7 +1674,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.10"
+ "version": "3.9.2"
},
"toc-autonumbering": true,
"toc-showmarkdowntxt": false
diff --git a/notebook_gouv/prix_carburant_gouv.ipynb b/notebook_gouv/prix_carburant_gouv.ipynb
index 1899d2032851f0b5a180bb6fb03ef62ebd2f6f60..6713f408603c5c406758b27f16df1796cdaf2368 100644
--- a/notebook_gouv/prix_carburant_gouv.ipynb
+++ b/notebook_gouv/prix_carburant_gouv.ipynb
@@ -2,9 +2,11 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"id": "d60999c6-2ae5-430b-934c-a95d309a496c",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"import zipfile\n",
@@ -15,37 +17,303 @@
"from urllib.request import urlretrieve\n",
"from datetime import date\n",
"from calendar import monthrange\n",
- "\n",
+ "from tqdm import tqdm\n",
"import pandas as pd\n",
- "import requests"
+ "import requests\n",
+ "import json\n",
+ "from retrying import retry"
]
},
{
"cell_type": "code",
- "execution_count": 3,
- "id": "bbf067e2-95d6-4375-93f2-41ef842893b0",
+ "execution_count": 2,
+ "id": "523b21af-b7db-4fba-9ba5-831774c8e699",
"metadata": {},
"outputs": [],
+ "source": [
+ "START_DATE=2007\n",
+ "END_DATE=2023"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "bbf067e2-95d6-4375-93f2-41ef842893b0",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 0%| | 0/17 [00:00<?, ?it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2007\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 6%|▌ | 1/17 [00:01<00:27, 1.71s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2008\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 12%|█▏ | 2/17 [00:04<00:31, 2.13s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2009\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 18%|█▊ | 3/17 [00:06<00:30, 2.18s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2010\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 24%|██▎ | 4/17 [00:08<00:30, 2.33s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2011\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 29%|██▉ | 5/17 [00:10<00:25, 2.10s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2012\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 35%|███▌ | 6/17 [00:12<00:24, 2.18s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2013\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 41%|████ | 7/17 [00:15<00:24, 2.41s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2014\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 47%|████▋ | 8/17 [00:17<00:20, 2.25s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2015\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 53%|█████▎ | 9/17 [00:20<00:18, 2.35s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2016\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 59%|█████▉ | 10/17 [00:23<00:18, 2.61s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2017\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 65%|██████▍ | 11/17 [00:26<00:16, 2.70s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2018\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 71%|███████ | 12/17 [00:28<00:12, 2.60s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2019\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 76%|███████▋ | 13/17 [00:31<00:10, 2.72s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2020\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 82%|████████▏ | 14/17 [00:34<00:07, 2.63s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2021\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 88%|████████▊ | 15/17 [00:36<00:05, 2.60s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2022\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 94%|█████████▍| 16/17 [00:40<00:02, 2.97s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://donnees.roulez-eco.fr/opendata/annee/2023\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 17/17 [00:43<00:00, 2.56s/it]\n"
+ ]
+ }
+ ],
"source": [
"#recupération des bases de donnée sur le site du gouvernement.\n",
"def recuperation_xml(date_debut,date_fin):\n",
- " for date in range(date_debut, date_fin +1, 1):\n",
+ " for date in tqdm(range(date_debut, date_fin +1, 1)):\n",
" directory_to_extract_to = os.path.join(\"unzip_file\")\n",
" path_to_zip_file = os.path.join(\"zip_file\",f\"PrixCarburants_annuel_{date}.zip\")\n",
- " urlretrieve(f\"https://donnees.roulez-eco.fr/opendata/annee/{date}\", path_to_zip_file)\n",
+ " url = f\"https://donnees.roulez-eco.fr/opendata/annee/{date}\"\n",
+ " print(url)\n",
+ " urlretrieve(url, path_to_zip_file)\n",
" with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:\n",
" zip_ref.extractall(directory_to_extract_to)\n",
- "#recuperation_xml(2007,2021)"
+ "recuperation_xml(START_DATE,END_DATE)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6c27528f-fbbd-4c34-86fe-a904c8181f77",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"# utilisation de l'API de adress.data.gouv.fr pour passer de la latitude et longitude, au citycode\n",
+ "@retry(stop_max_attempt_number=5, wait_fixed=2500)\n",
"def citycode_from_lat_long(longitude,latitude):\n",
" url = f\"https://api-adresse.data.gouv.fr/reverse/?lon={longitude}&lat={latitude}\"\n",
" response = requests.get(url)\n",
@@ -62,7 +330,9 @@
"cell_type": "code",
"execution_count": 5,
"id": "d67ca228-3db6-446b-bcac-f1efafd129f6",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"# passage du citycode au code du departement\n",
@@ -78,7 +348,9 @@
"cell_type": "code",
"execution_count": 6,
"id": "e8b5e2f4-2095-4c8f-a11d-d11de4cff76c",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"# passage du code postal au code du departement\n",
@@ -100,34 +372,78 @@
"cell_type": "code",
"execution_count": 7,
"id": "64a0d8fc-649a-4710-839e-416706a5f712",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"# passage du code du departement au code région en utilisant l'API Métadonnées - V1 de l'INSEE\n",
"# documentation à API nomenclatures géographiques Insee\n",
"# attention, la clé doit être réactualisé tous les 7 jours...\n",
"# l'API est limité à 30 requêtes par minute\n",
+ "cache_code_region_from_code_departement = {}\n",
+ "@retry(stop_max_attempt_number=5, wait_fixed=2000)\n",
"def code_region_from_code_departement(code_departement,date):\n",
+ " if cache_code_region_from_code_departement.get(code_departement):\n",
+ " if cache_code_region_from_code_departement.get(code_departement).get(date):\n",
+ " return cache_code_region_from_code_departement.get(code_departement).get(date)\n",
+ " else:\n",
+ " cache_code_region_from_code_departement[code_departement][date]=None\n",
+ " else:\n",
+ " cache_code_region_from_code_departement[code_departement]={}\n",
+ " # Cache non trouvé, on appel l'INSEE\n",
" headers = {\n",
" 'Accept': 'application/json',\n",
- " 'Authorization': 'Bearer ################', #Le changement est ici\n",
+ " 'Authorization': 'Bearer 64011ad9-a729-3fc1-bcfe-93521808e51a', #Le changement est ici\n",
" }\n",
" params = {\n",
" 'date': date,\n",
" }\n",
- " response = requests.get(f'https://api.insee.fr/metadonnees/V1/geo/departement/{code_departement}/ascendants', params=params, headers=headers)\n",
+ " url = f'https://api.insee.fr/metadonnees/V1/geo/departement/{code_departement}/ascendants'\n",
+ " response = requests.get(url, params=params, headers=headers)\n",
+ " if response.status_code != 200:\n",
+ " error = f\"code_region_from_code_departement - Warning : code retour {response.status_code}, {response.text} retrying...\"\n",
+ " print(error)\n",
+ " raise Exception(error)\n",
" contenu = response.json()\n",
+ " # l'API est limité à 30 requêtes par minute\n",
" time.sleep(2.1)\n",
" if isinstance(contenu,dict):\n",
" print(contenu)\n",
- " return contenu[0]['code']"
+ " cache_code_region_from_code_departement[code_departement][date]=contenu[0]['code']\n",
+ " return cache_code_region_from_code_departement[code_departement][date]"
]
},
{
"cell_type": "code",
"execution_count": 8,
+ "id": "0f05b801-1601-4d78-858e-30fdadf4608a",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'27'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "code_region_from_code_departement(\"21\",\"2023-01-01\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
"id": "c5f67bd6-5cf9-4e09-a587-f4b2454f4618",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"#Les APIs sont relativement fragile, il arrive qu'il y ai des erreurs 500 ou 502. \n",
@@ -139,25 +455,29 @@
" for annee in range(date_debut,date_fin+1):\n",
" if annee in prix_by_annee:\n",
" del prix_by_annee[annee]\n",
- "#debug_if_error_500(2007,2007)"
+ "# debug_if_error_500(2007,2007)"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 10,
"id": "14979ff2-770a-4a6c-8780-13a76a98512a",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "tree = ET.parse('unzip_file/PrixCarburants_annuel_2021.xml')\n",
- "pdv_liste = tree.getroot()"
+ "# tree = ET.parse('unzip_file/PrixCarburants_annuel_2021.xml')\n",
+ "# pdv_liste = tree.getroot()"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 11,
"id": "bb42e6c2-f9e8-49da-a372-88b9b869993b",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"citycode_lat_long = {} "
@@ -165,9 +485,11 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 12,
"id": "4d1a148e-db02-42b5-b4b9-35c1ab57d924",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"prix_by_region = {}"
@@ -175,33 +497,43 @@
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": null,
"id": "2cd9550a-5c9b-4787-a372-d4f8309eaf9d",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "2020\n",
- "2021\n"
+ "2007\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 80%|███████▉ | 6315/7904 [10:14<01:10, 22.62it/s] "
]
}
],
"source": [
+ "# Temps de traitement : 5 minutes par année.\n",
"#boucle principale, qui récupére les données des fichiers XML,\n",
"#trouve le code région de chaque station, \n",
"#récupère les données importantes, dont le prix par jour, par carburant, par station,\n",
"#nous avons uniquement les prix des jours ou il y a eu un changement, il faut créer un prix aux jours ou il n'y en a pas eu,\n",
"#fait la moyenne par jour de toutes les stations,\n",
"#fait la moyenne par région, par mois et par annee, des prix des différents carburants.\n",
- "for annee in range(2007,2022):\n",
+ "\n",
+ "for annee in range(START_DATE,END_DATE+1):\n",
" print(annee)\n",
" tree = ET.parse(f'unzip_file/PrixCarburants_annuel_{annee}.xml')\n",
" pdv_liste = tree.getroot()\n",
" date = f'{annee}-01-01'\n",
" region = {} \n",
- " for pdv in pdv_liste:\n",
+ " for pdv in tqdm(pdv_liste):\n",
" longitude = pdv.attrib.get('longitude')\n",
" latitude = pdv.attrib.get('latitude')\n",
" citycode = None\n",
@@ -309,9 +641,27 @@
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": null,
+ "id": "a57cc7e5-fbd2-4d3f-bdec-0d0e56118478",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "\n",
+ "with open(\"cache_code_region_from_code_departement.json\", \"w\") as outfile:\n",
+ " outfile.write(json.dumps(cache_code_region_from_code_departement, indent=4))\n",
+ "with open(\"prix_by_region.json\", \"w\") as outfile:\n",
+ " outfile.write(json.dumps(prix_by_region, indent=4))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"id": "0f26bf8a-397d-4522-8409-f9f4681ce870",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"#Lisse le dictionnaire \"prix_by_region\".\n",
@@ -335,106 +685,234 @@
},
{
"cell_type": "code",
- "execution_count": 71,
+ "execution_count": null,
+ "id": "9f4a3d64-7221-4cb4-82a4-b33622fdedcc",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "prix_region_mensuel"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6e6ab168-50df-48b1-995f-f31813e23dda",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "\n",
+ "with open(\"liste_prix_mensuel_region.json\", \"w\") as outfile:\n",
+ " outfile.write(json.dumps(liste_prix_mensuel, indent=4))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5aa8e3f5-45e0-482c-8510-5fb7e8d79edc",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df_prix_region_litre = pd.DataFrame.from_dict(liste_prix_mensuel)\n",
+ "df_prix_region_litre"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0ea9fe1d-4247-45a6-8c6a-26dd5dd8407a",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df_prix_region_litre.query(\"mois == 'moyenne' and annee == 2022 and region == '82'\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8e5043a5-0519-46d4-a935-654ea6cae005",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df_prix_region_litre.query(\"mois == 'moyenne' and annee == 2022 and region == '75'\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2184e8e0-d785-4083-9591-87ec10e2d2f8",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df_liste_prix_mensuel_region = pd.DataFrame.from_dict(liste_prix_mensuel)\n",
+ "df_liste_prix_mensuel_region.tail(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"id": "8a712431-90ff-42bb-9449-3f89bbaf2a15",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"#créer la dataframe \"prix_mensuel_carburants_par_regions_litre.csv\"\n",
- "df = pd.DataFrame.from_dict(liste_prix_mensuel)\n",
- "indexNames = df[ df['mois'] == 'moyenne' ].index\n",
- "df.drop(indexNames , inplace=True)\n",
- "df.reset_index(drop = True, inplace = True)\n",
- "df['prix_moyen'] = round(df['prix_moyen'] * 0.001,2)\n",
- "df.rename(columns = {'prix_moyen':'prix_moyen_by_litre'}, inplace = True)\n",
- "df.to_csv(r'prix_mensuel_carburants_par_regions_litre.csv', index = False, header=True)"
+ "indexNames = df_liste_prix_mensuel_region[ df_liste_prix_mensuel_region['mois'] == 'moyenne' ].index\n",
+ "df_prix_mensuel_carburants_par_regions_litre = df_liste_prix_mensuel_region.copy().drop(indexNames)\n",
+ "df_prix_mensuel_carburants_par_regions_litre.reset_index(drop = True, inplace = True)\n",
+ "df_prix_mensuel_carburants_par_regions_litre['prix_moyen'] = round(df_prix_mensuel_carburants_par_regions_litre['prix_moyen'] * 1,2)\n",
+ "df_prix_mensuel_carburants_par_regions_litre.rename(columns = {'prix_moyen':'prix_moyen_by_litre'}, inplace = True)\n",
+ "df_prix_mensuel_carburants_par_regions_litre.to_csv(r'prix_mensuel_carburants_par_regions_litre.csv', index = False, header=True)\n",
+ "df_prix_mensuel_carburants_par_regions_litre.tail(3)"
]
},
{
"cell_type": "code",
- "execution_count": 72,
+ "execution_count": null,
"id": "0803570e-3b2c-4f0d-bc8b-aa34a3f6dfa6",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"#créer la dataframe \"prix_annuel_carburants_par_regions_litre.csv\"\n",
- "df = pd.DataFrame.from_dict(liste_prix_mensuel)\n",
- "indexNames = df[ df['mois'] != 'moyenne' ].index\n",
- "df.drop(indexNames , inplace=True)\n",
- "df.reset_index(drop = True, inplace = True)\n",
- "df.drop(columns=['mois'],inplace=True)\n",
- "df['prix_moyen'] = round(df['prix_moyen'] * 0.001,2)\n",
- "df.rename(columns = {'prix_moyen':'prix_moyen_par_litre'}, inplace = True)\n",
- "df.to_csv(r'prix_annuel_carburants_par_regions_litre.csv', index = False, header=True)"
+ "\n",
+ "indexNames = df_liste_prix_mensuel_region[ df_liste_prix_mensuel_region['mois'] != 'moyenne' ].index\n",
+ "df_prix_annuel_carburants_par_regions_litre = df_liste_prix_mensuel_region.copy().drop(indexNames , inplace=False)\n",
+ "df_prix_annuel_carburants_par_regions_litre.reset_index(drop = True, inplace = True)\n",
+ "df_prix_annuel_carburants_par_regions_litre.drop(columns=['mois'],inplace=True)\n",
+ "df_prix_annuel_carburants_par_regions_litre['prix_moyen'] = round(df_prix_annuel_carburants_par_regions_litre['prix_moyen'] * 1,2)\n",
+ "df_prix_annuel_carburants_par_regions_litre.rename(columns = {'prix_moyen':'prix_moyen_par_litre'}, inplace = True)\n",
+ "df_prix_annuel_carburants_par_regions_litre.to_csv(r'prix_annuel_carburants_par_regions_litre.csv', index = False, header=True)\n",
+ "df_prix_annuel_carburants_par_regions_litre.tail(3)"
]
},
{
"cell_type": "code",
- "execution_count": 73,
+ "execution_count": null,
"id": "c48fc388-00cb-4c5d-a373-29be65b2559e",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"#créer la dataframe \"prix_mensuel_carburants_par_regions_hectolitre.csv\"\n",
- "df = pd.DataFrame.from_dict(liste_prix_mensuel)\n",
- "indexNames = df[ df['mois'] == 'moyenne' ].index\n",
- "df.drop(indexNames , inplace=True)\n",
- "df.reset_index(drop = True, inplace = True)\n",
- "df['prix_moyen'] = round(df['prix_moyen'] * 0.1,2)\n",
- "df.rename(columns = {'prix_moyen':'prix_moyen_par_hectolitre'}, inplace = True)\n",
- "df.to_csv(r'prix_mensuel_carburants_par_regions_hectolitre.csv', index = False, header=True)"
+ "indexNames = df_liste_prix_mensuel_region[ df_liste_prix_mensuel_region['mois'] == 'moyenne' ].index\n",
+ "df_prix_mensuel_carburants_par_regions_hectolitre = df_liste_prix_mensuel_region.copy().drop(indexNames)\n",
+ "df_prix_mensuel_carburants_par_regions_hectolitre.reset_index(drop = True, inplace = True)\n",
+ "df_prix_mensuel_carburants_par_regions_hectolitre['prix_moyen'] = round(df_prix_mensuel_carburants_par_regions_hectolitre['prix_moyen'] * 100,2)\n",
+ "df_prix_mensuel_carburants_par_regions_hectolitre.rename(columns = {'prix_moyen':'prix_moyen_par_hectolitre'}, inplace = True)\n",
+ "df_prix_mensuel_carburants_par_regions_hectolitre.to_csv(r'prix_mensuel_carburants_par_regions_hectolitre.csv', index = False, header=True)\n",
+ "df_prix_mensuel_carburants_par_regions_hectolitre.tail(3)"
]
},
{
"cell_type": "code",
- "execution_count": 74,
+ "execution_count": null,
"id": "6d54c6f0-0d79-4292-b13b-f1ada4a621a6",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"#créer la dataframe \"prix_annuel_carburants_par_regions_hectolitre.csv\"\n",
- "df = pd.DataFrame.from_dict(liste_prix_mensuel)\n",
- "indexNames = df[ df['mois'] != 'moyenne' ].index\n",
- "df.drop(indexNames , inplace=True)\n",
- "df.reset_index(drop = True, inplace = True)\n",
- "df.drop(columns=['mois'],inplace=True)\n",
- "df['prix_moyen'] = round(df['prix_moyen'] * 0.1,2)\n",
- "df.rename(columns = {'prix_moyen':'prix_moyen_par_hectolitre'}, inplace = True)\n",
- "df.to_csv(r'prix_annuel_carburants_par_regions_hectolitre.csv', index = False, header=True)"
+ "indexNames = df_liste_prix_mensuel_region[ df_liste_prix_mensuel_region['mois'] != 'moyenne' ].index\n",
+ "prix_annuel_carburants_par_regions_hectolitre = df_liste_prix_mensuel_region.copy().drop(indexNames)\n",
+ "prix_annuel_carburants_par_regions_hectolitre.reset_index(drop = True, inplace = True)\n",
+ "prix_annuel_carburants_par_regions_hectolitre.drop(columns=['mois'],inplace=True)\n",
+ "prix_annuel_carburants_par_regions_hectolitre['prix_moyen'] = round(prix_annuel_carburants_par_regions_hectolitre['prix_moyen'] * 100,2)\n",
+ "prix_annuel_carburants_par_regions_hectolitre.rename(columns = {'prix_moyen':'prix_moyen_par_hectolitre'}, inplace = True)\n",
+ "prix_annuel_carburants_par_regions_hectolitre.to_csv(r'prix_annuel_carburants_par_regions_hectolitre.csv', index = False, header=True)\n",
+ "prix_annuel_carburants_par_regions_hectolitre.tail(3)"
]
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": null,
"id": "9d0cc5e4-1054-4efc-aa17-b23b0a46b2e0",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"#agrege les prix au niveau national, pour pouvoir les verifier par rapport aux données de l'INSEE, et voir si il y a une coeherence.\n",
"df_ann = pd.read_csv(\"prix_annuel_carburants_par_regions_litre.csv\", sep=\",\")\n",
"df_ann = df_ann.groupby(['carburant','annee'])[['prix_moyen_par_litre']].mean().reset_index().round(3)\n",
- "df_ann.to_csv(r'prix_par_carburant_annee.csv',index = False, header=True)"
+ "df_ann.to_csv(r'prix_par_carburant_annee.csv',index = False, header=True)\n",
+ "df_ann.tail(3)"
]
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"id": "11c180d5-6c67-4a13-8b75-7f13dfd80712",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"#agrege les prix au niveau national, pour pouvoir les verifier par rapport aux données de l'INSEE, et voir si il y a une coeherence.\n",
"df_mens = pd.read_csv(\"prix_mensuel_carburants_par_regions_litre.csv\", sep=\",\")\n",
"df_mens = df_mens.groupby(['carburant','annee','mois'])[['prix_moyen_by_litre']].mean().reset_index().round(3)\n",
- "df_mens.to_csv(r'prix_par_carburant_mois.csv',index = False, header=True)"
+ "df_mens.to_csv(r'prix_par_carburant_mois.csv',index = False, header=True)\n",
+ "df_mens.tail(3)"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "058ae67f-3dd9-40cc-8215-f7a633028329",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Création de prix_par_carburant_annee_hectolitre.csv\n",
+ "df_ann_hecto = df_ann.copy()\n",
+ "df_ann_hecto[\"prix_moyen_par_hectolitre\"] = round(df_ann['prix_moyen_par_litre'] * 100,2)\n",
+ "df_ann_hecto.drop([\"prix_moyen_par_litre\"], inplace=True, axis=1)\n",
+ "df_ann_hecto.to_csv(r'prix_par_carburant_annee_hectolitre.csv',index = False, header=True)\n",
+ "df_ann_hecto.tail(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ad5b8499-9eaf-4a8f-84c2-2237b94ab818",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df_ann"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "40fd5c27-a827-47d7-912e-072dc71ca860",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "indirect-taxation-kernel",
+ "display_name": "prix-carburant",
"language": "python",
- "name": "indirect-taxation-kernel"
+ "name": "prix-carburant"
},
"language_info": {
"codemirror_mode": {
@@ -446,7 +924,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.10"
+ "version": "3.9.2"
}
},
"nbformat": 4,