diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2e8040d3bc308c2e1a0b18998f0e57c8c918c3b4..960d2424e49dc59f43932a39fa30361c4e8fd68b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,18 +5,18 @@ repos:
   - id: nbdev_clean
 - repo: https://github.com/charliermarsh/ruff-pre-commit
   # Ruff version.
-  rev: 'v0.6.9'
+  rev: 'v0.11.2'
   hooks:
     - id: ruff
       exclude: ^(notebooks/analyse/|notebooks/code_CASD/|notebooks/analyses/)
 -   repo: https://github.com/psf/black
-    rev: 24.10.0
+    rev: 25.1.0
     hooks:
     - id: black
       args: [--safe]
 # Check notebook formating
 - repo: https://github.com/nbQA-dev/nbQA
-  rev: 1.8.7
+  rev: 1.9.1
   hooks:
     - id: nbqa-black
     - id: nbqa-pyupgrade
diff --git a/notebooks/analyse/test_cas_types_cotisations.ipynb b/notebooks/analyse/test_cas_types_cotisations.ipynb
index 3a55caeaf561a2cbaf858059e8bc4cbdac6b93ca..210360eb6670fb32c57d5200eba15354330260b9 100644
--- a/notebooks/analyse/test_cas_types_cotisations.ipynb
+++ b/notebooks/analyse/test_cas_types_cotisations.ipynb
@@ -550,10 +550,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# cotisations non nulles\n",
-    "selection = copy.deepcopy(donnees_01)\n",
-    "donnees_check = selection.loc[:, selection.any()]\n",
-    "donnees_check.columns"
+    "# # cotisations non nulles\n",
+    "# selection = copy.deepcopy(donnees_01)\n",
+    "# donnees_check = selection.loc[:, selection.any()]\n",
+    "# donnees_check.columns"
    ]
   },
   {
@@ -562,7 +562,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dessiner_graphique(donnees=donnees_07, revenu=\"salaire_de_base\", affichage_en_taux=True)"
+    "# dessiner_graphique(donnees=donnees_07, revenu=\"salaire_de_base\", affichage_en_taux=True)"
    ]
   },
   {
@@ -571,12 +571,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "comparer_donnees(\n",
-    "    donnees_A=donnees_01,\n",
-    "    donnees_B=donnees_02,\n",
-    "    revenu=\"salaire_de_base\",\n",
-    "    affichage_en_taux=False,\n",
-    ")"
+    "# comparer_donnees(\n",
+    "#     donnees_A=donnees_01,\n",
+    "#     donnees_B=donnees_02,\n",
+    "#     revenu=\"salaire_de_base\",\n",
+    "#     affichage_en_taux=False,\n",
+    "# )"
    ]
   },
   {
@@ -592,7 +592,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "exporter(donnees_C)"
+    "# exporter(donnees_C)"
    ]
   },
   {
diff --git a/notebooks/analyse/test_cas_types_cotisations_boucle.ipynb b/notebooks/analyse/test_cas_types_cotisations_boucle.ipynb
index 19a45d19c2d0bdbee25a6b2b95d61738213e749c..41939e53bc397558de1dd137f6937c5d5293a182 100644
--- a/notebooks/analyse/test_cas_types_cotisations_boucle.ipynb
+++ b/notebooks/analyse/test_cas_types_cotisations_boucle.ipynb
@@ -14,7 +14,6 @@
     "import copy\n",
     "import plotly.graph_objects as go\n",
     "from datetime import datetime\n",
-    "import copy\n",
     "\n",
     "from openfisca_core.simulation_builder import SimulationBuilder\n",
     "from leximpact_survey_scenario.leximpact_tax_and_benefit_system import leximpact_tbs"
@@ -555,9 +554,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dessiner_graphique(\n",
-    "    donnees=donnees_03, revenu=\"traitement_indiciaire_brut\", affichage_en_taux=True\n",
-    ")"
+    "# dessiner_graphique(\n",
+    "#     donnees=donnees_03, revenu=\"traitement_indiciaire_brut\", affichage_en_taux=True\n",
+    "# )"
    ]
   },
   {
@@ -2000,12 +1999,12 @@
     }
    ],
    "source": [
-    "comparer_donnees(\n",
-    "    donnees_A=donnees_03,\n",
-    "    donnees_B=donnees_05,\n",
-    "    revenu=\"traitement_indiciaire_brut\",\n",
-    "    affichage_en_taux=True,\n",
-    ")"
+    "# comparer_donnees(\n",
+    "#     donnees_A=donnees_03,\n",
+    "#     donnees_B=donnees_05,\n",
+    "#     revenu=\"traitement_indiciaire_brut\",\n",
+    "#     affichage_en_taux=True,\n",
+    "# )"
    ]
   },
   {
@@ -2021,7 +2020,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "exporter(donnees_C)"
+    "# exporter(donnees_C)"
    ]
   },
   {
diff --git a/notebooks/analyses/graphiques_cas_type/cas_type_prestations_familiales.ipynb b/notebooks/analyses/graphiques_cas_type/cas_type_prestations_familiales.ipynb
index 36302a72cad48f3b976d327bee3f333dd33f0a5c..4242fd32ef2df3ca3ece5a6674467644067f2e3c 100644
--- a/notebooks/analyses/graphiques_cas_type/cas_type_prestations_familiales.ipynb
+++ b/notebooks/analyses/graphiques_cas_type/cas_type_prestations_familiales.ipynb
@@ -9,26 +9,27 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "revenus_capital has been updated in leximpact-survey-scenario\n",
       "iaidrdi has been updated in leximpact-survey-scenario\n",
-      "plus_values_prelevement_forfaitaire_unique_ir has been updated in leximpact-survey-scenario\n",
       "rfr_plus_values_hors_rni has been updated in leximpact-survey-scenario\n",
+      "assiette_csg_plus_values has been updated in leximpact-survey-scenario\n",
       "rpns_imposables has been updated in leximpact-survey-scenario\n",
       "rpns_autres_revenus has been updated in leximpact-survey-scenario\n",
-      "prelevement_forfaitaire_non_liberatoire has been updated in leximpact-survey-scenario\n",
-      "paje_naissance has been updated in leximpact-survey-scenario\n"
+      "paje_naissance has been updated in leximpact-survey-scenario\n",
+      "allegement_general_mode_recouvrement has been updated in leximpact-survey-scenario\n"
      ]
     },
     {
      "ename": "ImportError",
-     "evalue": "cannot import name 'smic_annuel_brut_by_year' from 'openfisca_france_data.erfs_fpr.input_data_builder.step_03_variables_individuelles' (/home/cgl/leximpact/leximpact-survey-scenario/.venv/lib/python3.10/site-packages/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py)",
+     "evalue": "cannot import name 'smic_annuel_brut_by_year' from 'openfisca_france_data.erfs_fpr.input_data_builder.step_03_variables_individuelles' (/home/cgl/leximpact/simulateur_socio_fiscal/budget/leximpact-prepare-data/.venv/lib/python3.11/site-packages/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py)",
      "output_type": "error",
      "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mleximpact_survey_scenario\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgraphiques_cas_type\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdecomposition\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (decomposition_data, decompo_prestations_familiales)\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mplotly\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgraph_objects\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mgo\u001b[39;00m\n",
-      "File \u001b[0;32m~/leximpact/leximpact-survey-scenario/leximpact_survey_scenario/graphiques_cas_type/decomposition.py:3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mleximpact_survey_scenario\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mleximpact_tax_and_benefit_system\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m leximpact_tbs\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mleximpact_survey_scenario\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgraphiques_cas_type\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcas_type_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m      4\u001b[0m     calculate,\n\u001b[1;32m      5\u001b[0m     create_scenario_inferieur_smic,\n\u001b[1;32m      6\u001b[0m     create_scenario_superieur_smic,\n\u001b[1;32m      7\u001b[0m     \u001b[38;5;66;03m#    smic_annuel_brut_by_year,\u001b[39;00m\n\u001b[1;32m      8\u001b[0m )\n\u001b[1;32m     10\u001b[0m decompo_revenu_disponible \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m     11\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrevenu_disponible\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     12\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprestations_sociales\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     19\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcotisations_salariales\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     20\u001b[0m ]\n\u001b[1;32m     22\u001b[0m decompo_ir \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m     23\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mirpp\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     24\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontribution_exceptionnelle_hauts_revenus\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     30\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrni\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     31\u001b[0m ]\n",
-      "File \u001b[0;32m~/leximpact/leximpact-survey-scenario/leximpact_survey_scenario/graphiques_cas_type/cas_type_utils.py:12\u001b[0m\n\u001b[1;32m     10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopenfisca_core\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m periods\n\u001b[1;32m     11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopenfisca_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrates\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m marginal_rate, average_rate\n\u001b[0;32m---> 12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopenfisca_france_data\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01merfs_fpr\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minput_data_builder\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mstep_03_variables_individuelles\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m     13\u001b[0m     smic_annuel_net_by_year,\n\u001b[1;32m     14\u001b[0m     smic_annuel_brut_by_year,\n\u001b[1;32m     15\u001b[0m )\n\u001b[1;32m     16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtaxipp\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneral_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m to_percent_round_formatter\n\u001b[1;32m     17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtaxipp\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtest_case\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m base\n",
-      "\u001b[0;31mImportError\u001b[0m: cannot import name 'smic_annuel_brut_by_year' from 'openfisca_france_data.erfs_fpr.input_data_builder.step_03_variables_individuelles' (/home/cgl/leximpact/leximpact-survey-scenario/.venv/lib/python3.10/site-packages/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py)"
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mImportError\u001b[39m                               Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mleximpact_survey_scenario\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mgraphiques_cas_type\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdecomposition\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m      2\u001b[39m     decomposition_data,\n\u001b[32m      3\u001b[39m     decompo_prestations_familiales,\n\u001b[32m      4\u001b[39m )\n\u001b[32m      7\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mplotly\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mgraph_objects\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mgo\u001b[39;00m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/leximpact/simulateur_socio_fiscal/budget/leximpact-prepare-data/.venv/lib/python3.11/site-packages/leximpact_survey_scenario/graphiques_cas_type/decomposition.py:3\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mleximpact_survey_scenario\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mleximpact_survey_scenario\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m leximpact_tbs\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mleximpact_survey_scenario\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mgraphiques_cas_type\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcas_type_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m      4\u001b[39m     calculate,\n\u001b[32m      5\u001b[39m     create_scenario_inferieur_smic,\n\u001b[32m      6\u001b[39m     create_scenario_superieur_smic,\n\u001b[32m      7\u001b[39m     \u001b[38;5;66;03m#    smic_annuel_brut_by_year,\u001b[39;00m\n\u001b[32m      8\u001b[39m )\n\u001b[32m     10\u001b[39m decompo_revenu_disponible = [\n\u001b[32m     11\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mrevenu_disponible\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     12\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mprestations_sociales\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m     19\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mcotisations_salariales\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     20\u001b[39m ]\n\u001b[32m     22\u001b[39m decompo_ir = [\n\u001b[32m     23\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mirpp\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     24\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mcontribution_exceptionnelle_hauts_revenus\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m     30\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mrni\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     31\u001b[39m ]\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/leximpact/simulateur_socio_fiscal/budget/leximpact-prepare-data/.venv/lib/python3.11/site-packages/leximpact_survey_scenario/graphiques_cas_type/cas_type_utils.py:12\u001b[39m\n\u001b[32m     10\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mopenfisca_core\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m periods\n\u001b[32m     11\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mopenfisca_core\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mrates\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m marginal_rate, average_rate\n\u001b[32m---> \u001b[39m\u001b[32m12\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mopenfisca_france_data\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01merfs_fpr\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01minput_data_builder\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mstep_03_variables_individuelles\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     13\u001b[39m     smic_annuel_net_by_year,\n\u001b[32m     14\u001b[39m     smic_annuel_brut_by_year,\n\u001b[32m     15\u001b[39m )\n\u001b[32m     16\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtaxipp\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mgeneral_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m to_percent_round_formatter\n\u001b[32m     17\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtaxipp\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mtest_case\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m base\n",
+      "\u001b[31mImportError\u001b[39m: cannot import name 'smic_annuel_brut_by_year' from 'openfisca_france_data.erfs_fpr.input_data_builder.step_03_variables_individuelles' (/home/cgl/leximpact/simulateur_socio_fiscal/budget/leximpact-prepare-data/.venv/lib/python3.11/site-packages/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py)"
      ]
     }
    ],
@@ -49,12 +50,13 @@
    "outputs": [],
    "source": [
     "year = 2023\n",
-    "smic = (\n",
-    "    tax_and_benefit_system.parameters(\n",
-    "        year\n",
-    "    ).marche_travail.salaire_minimum.smic.smic_b_mensuel\n",
-    "    * 12\n",
-    ")"
+    "smic = 12\n",
+    "# smic = (\n",
+    "#     tax_and_benefit_system.parameters(\n",
+    "#         year\n",
+    "#     ).marche_travail.salaire_minimum.smic.smic_b_mensuel\n",
+    "#     * 12\n",
+    "# )"
    ]
   },
   {
@@ -96818,7 +96820,7 @@
     "\n",
     "# fig_paje.show()\n",
     "# fig_ars.show()\n",
-    "fig_af.show()\n",
+    "# fig_af.show()\n",
     "# fig_cf.show()"
    ]
   },
@@ -118951,9 +118953,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "leximpact-survey-scenario-kernel",
+   "display_name": "python3",
    "language": "python",
-   "name": "leximpact-survey-scenario-kernel"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/code_CASD/extractions_base_des_impots/00_generate_fake_data.ipynb b/notebooks/code_CASD/extractions_base_des_impots/00_generate_fake_data.ipynb
deleted file mode 100644
index ca58c33ae2fe25b31078942a72ba12cd425a2398..0000000000000000000000000000000000000000
--- a/notebooks/code_CASD/extractions_base_des_impots/00_generate_fake_data.ipynb
+++ /dev/null
@@ -1,968 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "western-orientation",
-   "metadata": {},
-   "source": [
-    "# Generate a fake POTE input file\n",
-    "\n",
-    "The goal is to be able to test CASD script locally.\n",
-    "\n",
-    "Objectif : nous voulons des revenus entre 0 et 300 millions avec beaucoup de personnes vers 0 et un petit peut vers le million.\n",
-    "\n",
-    "La distribution de Pareto peut nous donner cela.\n",
-    "\n",
-    "TODO:\n",
-    "- Actuellement tous les revenus sont indépendants, il faudrait plutôt tout calculer à partir du RFR (avec un random)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "downtown-player",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from leximpact_common_python_libraries.config import Configuration"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "72ee9624",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "config = Configuration(project_folder=\"leximpact-prepare-data\")\n",
-    "OUT_PATH = config.get(\"FAKE_DATA\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "mental-composition",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Activate multi-output in notebook\n",
-    "from IPython.core.interactiveshell import InteractiveShell\n",
-    "\n",
-    "InteractiveShell.ast_node_interactivity = \"all\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "enhanced-shopping",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# import numpy\n",
-    "import matplotlib.pyplot as plt\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import vaex\n",
-    "import seaborn as sns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "hungarian-freeze",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Python 3.10.12\n"
-     ]
-    }
-   ],
-   "source": [
-    "!python --version"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "inside-complex",
-   "metadata": {},
-   "source": [
-    "## Utilisation de Pareto généralisé\n",
-    "Voir la doc de scipy : https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.genpareto.html"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "unlike-harmony",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "mean=array(1.11111111) var=array(1.54320988) skew=array(2.81105689) kurt=array(14.82857143)\n",
-      "x.shape=(100,)\n"
-     ]
-    },
-    {
-     "data": {
-      "image/png": "",
-      "text/plain": [
-       "<Figure size 640x480 with 1 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from scipy.stats import genpareto\n",
-    "\n",
-    "c = 0.1\n",
-    "\n",
-    "mean, var, skew, kurt = genpareto.stats(c, moments=\"mvsk\")\n",
-    "\n",
-    "print(f\"{mean=} {var=} {skew=} {kurt=}\")\n",
-    "\n",
-    "x = np.linspace(genpareto.ppf(0.01, c), genpareto.ppf(0.99, c), 100)\n",
-    "print(f\"{x.shape=}\")\n",
-    "\n",
-    "fig, ax = plt.subplots(1, 1)\n",
-    "_ = ax.plot(x, genpareto.pdf(x, c), \"r-\", lw=5, alpha=0.6, label=\"genpareto pdf\")\n",
-    "\n",
-    "r = genpareto.rvs(c, size=100_000)\n",
-    "_ = ax.hist(r, density=True, histtype=\"stepfilled\", alpha=0.2)\n",
-    "_ = ax.legend(loc=\"best\", frameon=False)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "incomplete-olympus",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[1092.9815492159842]"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sorted(r * 10e6)[10:2:-10]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "legal-powell",
-   "metadata": {},
-   "source": [
-    "## Exemple de distribution"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "wireless-function",
-   "metadata": {},
-   "source": [
-    "Par exemple, ici on demande :\n",
-    "- Un coefficient c (de Pareto ?) de 0.5, plus il se rapproche de 1, plus la \"cassure\" entre haut et bas revenus est forte\n",
-    "- size=100 personnes\n",
-    "- scale=100 l'amplitude de revenu : le revenu maximum dépendra de ce paramètre et du coéfficient c. Avec c=0.5 et scale=1 000, le revenu maximum est de 17 000.\n",
-    "- loc=0 le revenu minimum."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "incorrect-valuation",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "",
-      "text/plain": [
-       "<Figure size 640x480 with 1 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "c = 0.5\n",
-    "r = genpareto.rvs(c, size=100, loc=0, scale=1_000, random_state=1)\n",
-    "count, bins, ax = plt.hist(r, 4, density=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "increased-joseph",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "r.shape=(100,)  r.min()=0.11438462947924068 r.mean()=1657.342156816486 r.max()=16949.974367913703\n",
-      "Frontière de l'histogramme :[1.14384629e-01 4.23757938e+03 8.47504438e+03 1.27125094e+04\n",
-      " 1.69499744e+04]\n",
-      "Nombre de personnes dans chaque bars :[90.  7.  2.  1.]\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f\"{r.shape=}  {r.min()=} {r.mean()=} {r.max()=}\")\n",
-    "print(f\"Frontière de l'histogramme :{bins}\")\n",
-    "print(f\"Nombre de personnes dans chaque bars :{count}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "exterior-knitting",
-   "metadata": {},
-   "source": [
-    "On voit sur l'histogramme qu'on a 90% des personnes qui gagnent moins de 4 237 €\n",
-    "\n",
-    "Et seulement 1% qui gagnent plus de 12 700 €, avec un maximum de 16 949 €"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "standing-dispatch",
-   "metadata": {},
-   "source": [
-    "## Génération d'un grand nombre de données"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "excess-candy",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_random_data(nb_zeros: int, nb_above_zero: int, c=0.9, scale=80_000):\n",
-    "    \"\"\"\n",
-    "    ::nb_zeros:: Nombre d'individus à 0\n",
-    "    ::nb_above_zero:: Nombre d'individus pour lesquels générer une valeur\n",
-    "    \"\"\"\n",
-    "    zeros = np.zeros((nb_zeros,))\n",
-    "    # Using numpy.random.pareto() method\n",
-    "    pareto = genpareto.rvs(c, size=nb_above_zero, loc=0, scale=scale, random_state=1)\n",
-    "    return np.concatenate((zeros, pareto), axis=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "variable-hunter",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "r.shape=(38000000,)  r.min()=0.0 r.mean()=15795.173430857798 r.max()=375678496.8283032\n"
-     ]
-    }
-   ],
-   "source": [
-    "c = 0.6\n",
-    "r = get_random_data(\n",
-    "    nb_zeros=2_000_000, nb_above_zero=36_000_000, c=c, scale=4000 * (1 / c)\n",
-    ")\n",
-    "print(f\"{r.shape=}  {r.min()=} {r.mean()=} {r.max()=}\")\n",
-    "df = pd.DataFrame({\"revkire\": r})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "imposed-scene",
-   "metadata": {},
-   "source": [
-    "### Calcul des centiles"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "terminal-minority",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.5"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "nb_quantiles = 100\n",
-    "centile = [(1 / nb_quantiles) * (i + 1) for i in range(nb_quantiles)]\n",
-    "centile[49]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "sized-benefit",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "quantiles = df.quantile(centile)\n",
-    "df_quantiles = pd.DataFrame(quantiles)\n",
-    "df_quantiles[\"quantiles\"] = df_quantiles.index * 100\n",
-    "df_quantiles[\"quantiles\"] = df_quantiles[\"quantiles\"].astype(int)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "eleven-excerpt",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "",
-      "text/plain": [
-       "<Figure size 2000x800 with 1 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "sns.set(rc={\"figure.figsize\": (20, 8)})\n",
-    "ax = sns.barplot(data=df_quantiles, x=\"quantiles\", y=\"revkire\")\n",
-    "_ = ax.set_yscale(\"log\")\n",
-    "_ = ax.set_xticklabels(labels=ax.get_xticklabels(), rotation=90)\n",
-    "_ = ax.set_title(\"Centiles de RFR dans FAKE POTE 2019\\nEchelle logarithmique\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "canadian-flavor",
-   "metadata": {},
-   "source": [
-    "On est très proche de ce qu'on l'on a vu sur POTE. 👏 "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "separate-sperm",
-   "metadata": {},
-   "source": [
-    "## On le fait pour plusieurs de colonnes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "revolutionary-companion",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.44367670147237526"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from random import random\n",
-    "\n",
-    "random()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "nervous-bradley",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sas_columns = [\n",
-    "    \"rnsgbd\",\n",
-    "    \"rnsgld\",\n",
-    "    \"revkire\",\n",
-    "    \"Z1aj\",\n",
-    "    \"Z1ap\",\n",
-    "    \"Z1as\",\n",
-    "    \"Z1bj\",\n",
-    "    \"Z1bp\",\n",
-    "    \"Z1bs\",\n",
-    "    \"Z1cj\",\n",
-    "    \"Z1cw\",\n",
-    "    \"Z1dw\",\n",
-    "    \"Z2ch\",\n",
-    "    \"Z2dc\",\n",
-    "    \"Z2dh\",\n",
-    "    \"Z2tr\",\n",
-    "    \"Z3ua\",\n",
-    "    \"Z3vg\",\n",
-    "    \"Z3vz\",\n",
-    "    \"Z4ba\",\n",
-    "    \"Z4bb\",\n",
-    "    \"Z4bc\",\n",
-    "    \"Z4bd\",\n",
-    "    \"Z4be\",\n",
-    "    \"Z6de\",\n",
-    "    \"Z8sc\",\n",
-    "    \"Z8sw\",\n",
-    "    \"Z8sx\",\n",
-    "    \"CICS\",\n",
-    "    \"MNIMQG\",\n",
-    "]\n",
-    "col_to_fake = []\n",
-    "for col in sas_columns:\n",
-    "    c = 0.6 + random() / 5\n",
-    "    col_dict = {\n",
-    "        \"name\": col,\n",
-    "        \"c\": c,\n",
-    "        \"nb_zeros\": int(2_000_000 * random()),\n",
-    "        \"scale\": 4000 * (1 / c),\n",
-    "    }\n",
-    "    col_to_fake.append(col_dict)\n",
-    "# col_to_fake"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "stretch-wells",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def gen_all_data(reduce_output_ratio=100, filepath=None):\n",
-    "    \"\"\"\n",
-    "    ::reduce_output_ratio::  1 for full data, 2 for alf.\n",
-    "    ::filepath:: Chemin et nom du fichier à sauver\n",
-    "    \"\"\"\n",
-    "    df_pote = None\n",
-    "    nb_ff = 39_000_000\n",
-    "    for col in col_to_fake:\n",
-    "        values = get_random_data(\n",
-    "            nb_zeros=col[\"nb_zeros\"] // reduce_output_ratio,\n",
-    "            nb_above_zero=(nb_ff - col[\"nb_zeros\"]) // reduce_output_ratio,\n",
-    "            c=col[\"c\"],\n",
-    "            scale=col[\"scale\"],\n",
-    "        )\n",
-    "        df_temp = vaex.from_dict({col[\"name\"]: values.astype(int)})\n",
-    "        if df_pote is not None:\n",
-    "            df_pote = df_pote.join(df_temp)\n",
-    "        else:\n",
-    "            df_pote = df_temp\n",
-    "    if filepath:\n",
-    "        df_pote.export_parquet(filepath)\n",
-    "    return df_pote"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "australian-holly",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_pote = gen_all_data(reduce_output_ratio=10_000)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "certified-legislation",
-   "metadata": {},
-   "source": [
-    "## On le sauve dans le même format que sur le CASD\n",
-    "\n",
-    "Parquet occupe moins d'espace que Arrow. Mais arrow ne nécessite quasiment pas de CPU car le fichier est chargé en mémoire tel quel.\n",
-    "\n",
-    "Test avec 39 millions de lignes et 29 colonnes int64 sur notre machine dans DC5:\n",
-    " - Parquet : 2.7G enregistrés en 22 secondes. (CPU Time = Wall time)\n",
-    " - Arrow : 4.9G enregistrés en moins de 4 secondes de temps CPU. (mais 44s de Wall time)\n",
-    " \n",
-    " => Arrow est donc plus consommateur en espace disque, beaucoup moins en CPU. Mais en temps d'attente perçu c'est équivalent."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "nutritional-minneapolis",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_pote.export_parquet(f\"{OUT_PATH}fake_pote_light.parquet\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7a08c916-7857-4abb-88b9-6ef066bc14a1",
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
-     ]
-    }
-   ],
-   "source": [
-    "df_pote_full = gen_all_data(reduce_output_ratio=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "13d1e9ad-dae8-413e-a76f-05998f8a16f7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "29"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(df_pote_full.get_column_names())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "sought-iraqi",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "39000000"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(df_pote_full)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "197cfe3f-d432-4cc7-a701-a3cd1414a1c7",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 22.2 s, sys: 667 ms, total: 22.9 s\n",
-      "Wall time: 22.9 s\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "df_pote_full.export_parquet(f\"{OUT_PATH}fake_pote_full.parquet\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "817bb0ba-0784-463d-bb47-1af1b4880ea6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 482 ms, sys: 3.29 s, total: 3.77 s\n",
-      "Wall time: 44.3 s\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "df_pote_full.export(f\"{OUT_PATH}fake_pote_full.arrow\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "272b74e7-e19f-4a58-b944-26b6f47f16be",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "4.9G\t/mnt/data-out/leximpact/fake//fake_pote_full.arrow\n",
-      "2.7G\t/mnt/data-out/leximpact/fake//fake_pote_full.parquet\n",
-      "608K\t/mnt/data-out/leximpact/fake//fake_pote_light.parquet\n"
-     ]
-    }
-   ],
-   "source": [
-    "!du -h {OUT_PATH}/*"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "correct-revision",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>rnsgbd</th>\n",
-       "      <th>rnsgld</th>\n",
-       "      <th>revkireZ1aj</th>\n",
-       "      <th>Z1ap</th>\n",
-       "      <th>Z1as</th>\n",
-       "      <th>Z1bj</th>\n",
-       "      <th>Z1bp</th>\n",
-       "      <th>Z1bs</th>\n",
-       "      <th>Z1cj</th>\n",
-       "      <th>Z1cw</th>\n",
-       "      <th>...</th>\n",
-       "      <th>Z4bb</th>\n",
-       "      <th>Z4bc</th>\n",
-       "      <th>Z4bd</th>\n",
-       "      <th>Z4be</th>\n",
-       "      <th>Z6de</th>\n",
-       "      <th>Z8sc</th>\n",
-       "      <th>Z8sw</th>\n",
-       "      <th>Z8sx</th>\n",
-       "      <th>CICS</th>\n",
-       "      <th>MNIMQG</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>data_type</th>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>...</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "      <td>int64</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>count</th>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "      <td>39000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>NA</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>mean</th>\n",
-       "      <td>23612.393908666665</td>\n",
-       "      <td>16686.731979897435</td>\n",
-       "      <td>17999.188046692307</td>\n",
-       "      <td>22921.20886292308</td>\n",
-       "      <td>17040.07205202564</td>\n",
-       "      <td>17793.07362046154</td>\n",
-       "      <td>17940.101138589744</td>\n",
-       "      <td>22387.16365876923</td>\n",
-       "      <td>21754.137213666665</td>\n",
-       "      <td>17330.203434615385</td>\n",
-       "      <td>...</td>\n",
-       "      <td>16978.268014820515</td>\n",
-       "      <td>16787.83353846154</td>\n",
-       "      <td>19892.917132</td>\n",
-       "      <td>18082.292583871797</td>\n",
-       "      <td>16897.733525717947</td>\n",
-       "      <td>20601.45104748718</td>\n",
-       "      <td>16426.29728551282</td>\n",
-       "      <td>16835.938336615385</td>\n",
-       "      <td>20841.969054974357</td>\n",
-       "      <td>20157.805337358976</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>std</th>\n",
-       "      <td>1985728.12425</td>\n",
-       "      <td>168367.593804</td>\n",
-       "      <td>468129.681965</td>\n",
-       "      <td>1757926.167845</td>\n",
-       "      <td>348813.11135</td>\n",
-       "      <td>407738.350287</td>\n",
-       "      <td>447581.264892</td>\n",
-       "      <td>1463643.637909</td>\n",
-       "      <td>1409093.278272</td>\n",
-       "      <td>341201.458036</td>\n",
-       "      <td>...</td>\n",
-       "      <td>257858.16848</td>\n",
-       "      <td>304306.782644</td>\n",
-       "      <td>792156.835406</td>\n",
-       "      <td>452162.364351</td>\n",
-       "      <td>206769.636616</td>\n",
-       "      <td>933589.30553</td>\n",
-       "      <td>174934.347089</td>\n",
-       "      <td>200883.42422</td>\n",
-       "      <td>1257896.347618</td>\n",
-       "      <td>1004222.29896</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>min</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>max</th>\n",
-       "      <td>6365242793</td>\n",
-       "      <td>391375749</td>\n",
-       "      <td>1309974940</td>\n",
-       "      <td>5585390554</td>\n",
-       "      <td>939653071</td>\n",
-       "      <td>1120062522</td>\n",
-       "      <td>1245031261</td>\n",
-       "      <td>4582824917</td>\n",
-       "      <td>4402079217</td>\n",
-       "      <td>913693388</td>\n",
-       "      <td>...</td>\n",
-       "      <td>658515059</td>\n",
-       "      <td>802706621</td>\n",
-       "      <td>2348740734</td>\n",
-       "      <td>1258784526</td>\n",
-       "      <td>504485451</td>\n",
-       "      <td>2811703378</td>\n",
-       "      <td>411754892</td>\n",
-       "      <td>487116689</td>\n",
-       "      <td>3897076655</td>\n",
-       "      <td>3048988994</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>7 rows × 29 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                       rnsgbd              rnsgld         revkireZ1aj  \\\n",
-       "data_type               int64               int64               int64   \n",
-       "count                39000000            39000000            39000000   \n",
-       "NA                          0                   0                   0   \n",
-       "mean       23612.393908666665  16686.731979897435  17999.188046692307   \n",
-       "std             1985728.12425       168367.593804       468129.681965   \n",
-       "min                         0                   0                   0   \n",
-       "max                6365242793           391375749          1309974940   \n",
-       "\n",
-       "                        Z1ap               Z1as               Z1bj  \\\n",
-       "data_type              int64              int64              int64   \n",
-       "count               39000000           39000000           39000000   \n",
-       "NA                         0                  0                  0   \n",
-       "mean       22921.20886292308  17040.07205202564  17793.07362046154   \n",
-       "std           1757926.167845       348813.11135      407738.350287   \n",
-       "min                        0                  0                  0   \n",
-       "max               5585390554          939653071         1120062522   \n",
-       "\n",
-       "                         Z1bp               Z1bs                Z1cj  \\\n",
-       "data_type               int64              int64               int64   \n",
-       "count                39000000           39000000            39000000   \n",
-       "NA                          0                  0                   0   \n",
-       "mean       17940.101138589744  22387.16365876923  21754.137213666665   \n",
-       "std             447581.264892     1463643.637909      1409093.278272   \n",
-       "min                         0                  0                   0   \n",
-       "max                1245031261         4582824917          4402079217   \n",
-       "\n",
-       "                         Z1cw  ...                Z4bb               Z4bc  \\\n",
-       "data_type               int64  ...               int64              int64   \n",
-       "count                39000000  ...            39000000           39000000   \n",
-       "NA                          0  ...                   0                  0   \n",
-       "mean       17330.203434615385  ...  16978.268014820515  16787.83353846154   \n",
-       "std             341201.458036  ...        257858.16848      304306.782644   \n",
-       "min                         0  ...                   0                  0   \n",
-       "max                 913693388  ...           658515059          802706621   \n",
-       "\n",
-       "                    Z4bd                Z4be                Z6de  \\\n",
-       "data_type          int64               int64               int64   \n",
-       "count           39000000            39000000            39000000   \n",
-       "NA                     0                   0                   0   \n",
-       "mean        19892.917132  18082.292583871797  16897.733525717947   \n",
-       "std        792156.835406       452162.364351       206769.636616   \n",
-       "min                    0                   0                   0   \n",
-       "max           2348740734          1258784526           504485451   \n",
-       "\n",
-       "                        Z8sc               Z8sw                Z8sx  \\\n",
-       "data_type              int64              int64               int64   \n",
-       "count               39000000           39000000            39000000   \n",
-       "NA                         0                  0                   0   \n",
-       "mean       20601.45104748718  16426.29728551282  16835.938336615385   \n",
-       "std             933589.30553      174934.347089        200883.42422   \n",
-       "min                        0                  0                   0   \n",
-       "max               2811703378          411754892           487116689   \n",
-       "\n",
-       "                         CICS              MNIMQG  \n",
-       "data_type               int64               int64  \n",
-       "count                39000000            39000000  \n",
-       "NA                          0                   0  \n",
-       "mean       20841.969054974357  20157.805337358976  \n",
-       "std            1257896.347618       1004222.29896  \n",
-       "min                         0                   0  \n",
-       "max                3897076655          3048988994  \n",
-       "\n",
-       "[7 rows x 29 columns]"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "df_pote_full.describe()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "banned-speech",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# df_pote_sample = df_pote_full.sample(n=300_000)\n",
-    "# df_pote_sample.export_parquet(f\"{OUT_PATH}/fake_pote_partial.parquet\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "hazardous-edition",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# df_pote_sample.describe()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "leximpact-prepare-data-kernel",
-   "language": "python",
-   "name": "leximpact-prepare-data-kernel"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/code_CASD/extractions_base_des_impots/00_tests_avant_casd.ipynb b/notebooks/code_CASD/extractions_base_des_impots/00_tests_avant_casd.ipynb
deleted file mode 100644
index 6db89bfb530f2a4e2a64e61c1ee2b1d7586b7d46..0000000000000000000000000000000000000000
--- a/notebooks/code_CASD/extractions_base_des_impots/00_tests_avant_casd.ipynb
+++ /dev/null
@@ -1,234 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "cd323ce4-f97f-487f-98f5-a85a3f45a54c",
-   "metadata": {},
-   "source": [
-    "# Tests avant CASD"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5499d16b-b59f-4ee3-a11a-7598ccf06a1b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Activate multi-output in notebook\n",
-    "from IPython.core.interactiveshell import InteractiveShell\n",
-    "\n",
-    "InteractiveShell.ast_node_interactivity = \"all\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "195ded86-f330-44eb-b27c-61c5259d143a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import vaex\n",
-    "import pandas as pd\n",
-    "from leximpact_prepare_data.scenario_tools.calib_and_copules import Quantile"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4097cb49-754c-4bc2-a25f-301c4bba15d5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# vdf = vaex.open(\"/mnt/data-out/leximpact/fake//fake_pote_full.parquet\")\n",
-    "vdf = vaex.open(\"/mnt/data-out/leximpact/fake//fake_pote_light.parquet\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5d82ea4e-5342-448a-884f-6488a2ef9b16",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# vdf.get_column_names()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e8550ac7-4a55-44e2-996a-7b12f3a4be51",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 4.94 ms, sys: 798 µs, total: 5.74 ms\n",
-      "Wall time: 10.8 ms\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "data = vdf.Z1ap.tolist()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "84dbb029-51e7-40a5-b034-52f903ccc471",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 4.49 ms, sys: 422 µs, total: 4.91 ms\n",
-      "Wall time: 4.76 ms\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "q = Quantile(data)\n",
-    "decile = q.get_quantile(10)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "676d9ba6-763a-4b60-af22-1701b45a77b4",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[389, 779, 1169, 1559, 1949, 2339, 2729, 3119, 3509, 3899]"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "390"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "1109"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "q.borders\n",
-    "data[q.borders[0]]\n",
-    "data[q.borders[1]]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "faaa1158-1615-4f57-a880-e7faf1b3ff2a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 39.2 ms, sys: 23.5 ms, total: 62.7 ms\n",
-      "Wall time: 48.1 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<Axes: xlabel='lower_bound'>"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "image/png": "",
-      "text/plain": [
-       "<Figure size 640x480 with 1 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "df = pd.DataFrame(decile[\"buckets\"])\n",
-    "df.plot.bar(x=\"lower_bound\", y=\"bucket_mean\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7b0effe9-03c1-4b5d-a5f4-b52f2fee8810",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 152 ms, sys: 115 ms, total: 268 ms\n",
-      "Wall time: 127 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<Axes: xlabel='lower_bound'>"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "image/png": "",
-      "text/plain": [
-       "<Figure size 1000x700 with 1 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "centile = q.get_quantile(100)\n",
-    "df = pd.DataFrame(centile[\"buckets\"])\n",
-    "df.plot.bar(x=\"lower_bound\", y=\"bucket_mean\", figsize=(10, 7))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "leximpact-prepare-data-kernel",
-   "language": "python",
-   "name": "leximpact-prepare-data-kernel"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/code_CASD/extractions_base_des_impots/20_Convert_SAS.ipynb b/notebooks/code_CASD/extractions_base_des_impots/20_Convert_SAS.ipynb
deleted file mode 100644
index 0f851b40154c49f1a4ebd8e5f68000997e9a8bc5..0000000000000000000000000000000000000000
--- a/notebooks/code_CASD/extractions_base_des_impots/20_Convert_SAS.ipynb
+++ /dev/null
@@ -1,291 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# CASD : Conversion de l'extraction SAS en Apache Parquet + retraitement de certaines variables de POTE brut"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from IPython.core.interactiveshell import InteractiveShell\n",
-    "\n",
-    "InteractiveShell.ast_node_interactivity = \"all\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "year = \"2019\"\n",
-    "# year = \"2019\"\n",
-    "# year = \"2018\"\n",
-    "SAS_FILE = (\n",
-    "    r\"C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\sas/agregats_pote_\"\n",
-    "    + year\n",
-    "    + \".sas7bdat\"\n",
-    ")\n",
-    "OUT_PATH = r\"C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\data/\"\n",
-    "OUT_PATH = OUT_PATH + \"assiettes_pote_brutes_\" + year + \"-chunk/\"\n",
-    "taille_chunk = 2 * 2**20  # 2**20 = 1_048_576\n",
-    "# taille_chunk = 5000\n",
-    "# taille_chunk = 600_000"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import shutil\n",
-    "from pathlib import Path\n",
-    "\n",
-    "import pandas as pd\n",
-    "import vaex\n",
-    "from tqdm import tqdm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# sas_col = \"FIP18_c revkire rimp tsirna rbg srbg mnrvr3 mnrvi2 mnrvk mnrvni Z8uy nbpart zn nbefi nbfoy nbpldm mat agec aged clirpg frf Z1ak Z1bk txmoy impot impotnet j rnirp8 rnimeh tsirna mnipeg rnirai rnirdu rnirgi f g h i r p Z1az Z1bz stutile zf zp\".split(    \" \")\n",
-    "# sas_col = \"FIP18_c revkire rimp tsirna rbg srbg mnrvr3 mnrvi2 mnrvk mnrvni Z8uy nbpart zn nbefi nbfoy nbpldm mat agec aged clirpg frf Z1ak Z1bk txmoy impot impotnet j rnirp8 rnimeh tsirna mnipeg rnirai rnirdu rnirgi f g h i r p Z1az Z1bz stutile zf zp\".split(\n",
-    "#     \" \"\n",
-    "# )"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Traitement des données du fichier SAS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# On somme les colonnes qui nous intéressent, pour l'instant n'est valable que pour 2019\n",
-    "def sum_columns(df):\n",
-    "    # Pour assiette de CSG\n",
-    "    df[\"revenus_capitaux_prelevement_bareme\"] = df.z2ch\n",
-    "    df[\"revenus_capitaux_prelevement_liberatoire\"] = df.z2dh + df.z2xx\n",
-    "    df[\"revenus_capitaux_prelevement_forfaitaire_unique_ir\"] = df.z2dc + df.z2tr + df.z2ts + df.z2ww + df.z2zz + df.z2fu + df.z2tt\n",
-    "    df[\"rente_viagere_titre_onereux_net\"] = df.z1cw + df.z1dw\n",
-    "    # Micro-foncier : on applique l'abattement de 30%\n",
-    "    df[\"revenu_categoriel_foncier\"] = (\n",
-    "        df.z4ba + (df.z4be * 0.7) - (df.z4bb + df.z4bc + df.z4bd)\n",
-    "    )\n",
-    "    # df['rev_categ_foncier4ba']=df.Z4ba\n",
-    "    df[\"assiette_csg_plus_values\"] = df.z3vg + df.z3ua - df.z3va\n",
-    "    df[\"assiette_csg_revenus_capital\"] = (\n",
-    "        df[\"revenus_capitaux_prelevement_bareme\"]\n",
-    "        + df[\"revenus_capitaux_prelevement_liberatoire\"]\n",
-    "        + df[\"revenus_capitaux_prelevement_forfaitaire_unique_ir\"]\n",
-    "        + df[\"rente_viagere_titre_onereux_net\"]\n",
-    "        + df[\"revenu_categoriel_foncier\"]\n",
-    "        + df[\"assiette_csg_plus_values\"]\n",
-    "    )\n",
-    "\n",
-    "    # Autres\n",
-    "    df[\"retraites\"] = df.z1as + df.z1bs  # + df.mnimqg\n",
-    "    #     df[\n",
-    "    #         \"pre_retraites_etranger\"\n",
-    "    #     ] = df.z8sc  # Attention, seulement de l'étranger, sinon c'est df.Z1ap + df.Z1bp\n",
-    "\n",
-    "    df[\"chomage_et_indemnites\"] = df.z1ap + df.z1bp\n",
-    "\n",
-    "    df[\"rev_salaire\"] = df.z1aj + df.z1bj + df.z1cj + df.z8tk + df.z1af + df.z1ag + df.z1aa + df.z1ba + df.z1gb + df.z1hb + df.z1gf + df.z1hf + df.z1gg + df.z1hg +\n",
-    "                        df.z1aq + df.z1bq + df.z1gh + df.z1hh \n",
-    "\n",
-    "    # Revenus des non salariés\n",
-    "    df['rag'] = df.z5hd + df.z5id + df.z5hb + df.z5ib + df.z5hh + df.z5ih + df.z5hc + df.z5ic + df.z5hi + df.z5ii + df.z5ak + df.z5bk + df.z5al + df.z5bl - df.z5hf -\n",
-    "                df.z5if - df.z5hl - df.z5il + df.z5hm + df.z5im + df.z5hz + df.z5iz + df.z5xa + df.z5ya + df.z5xb + df.z5yb + df.z5xt + df.z5xu + df.z5xv + df.z5xw\n",
-    "    \n",
-    "    df['ric'] = df.z5ta + df.z5ua + df.z5tb + df.z5ub + df.z5kn + df.z5ln + df.z5ko + df.z5lo + df.z5kp + df.z5lp + df.z5kb + df.z5lb + df.z5kh + df.z5lh + df.z5kc +\n",
-    "                df.z5lc + df.z5ki + df.z5li + df.z5df + df.z5ef + df.z5dg + df.z5eg - df.z5kf - df.z5lf - df.z5kl - df.z5ll\n",
-    "\n",
-    "    df['rnc'] = df.z5te + df.z5ue + df.z5hp + df.z5ip + df.z5hq + df.z5iq + df.z5qb + df.z5rb + df.z5qh + df.z5rh + df.z5qc + df.z5rc + df.z5qi + df.z5ri + df.z5xj +\n",
-    "                df.z5yj + df.z5xk + df.z5yk - df.z5qe - df.z5re - df.z5qk - df.z5rk + df.z5ql + df.z5rl + df.z5qm + df.z5rm\n",
-    "\n",
-    "    df[\"pension_invalidite\"] = df.z1az + df.z1bz\n",
-    "\n",
-    "    df[\"pension_alimentaire\"] = df.z1ao + df.z1bo\n",
-    "\n",
-    "    df[\"revenus_individuels\"] = df[\"rev_salaire\"] + df[\"retraites\"] + df[\"chomage_et_indemnites\"] + df[\"rag\"] + df[\"ric\"] + df[\"rnc\"] + df[\"pension_invalidite\"] + df.[\"pension_alimentaire\"]\n",
-    "\n",
-    "    df[\"revenus_individuels_par_part\"] = df.revenus_individuels / df.nbpart\n",
-    "    df[\"revkire_par_part\"] = df.revkire / df.nbpart\n",
-    "    \n",
-    "    df.drop(['z2ch', 'z2dh', 'z2xx', 'z2dc', 'z2tr', 'z2ts', 'z2ww', 'z2zz', 'z2fu', 'z2tt', 'z1cw', 'z1dw', 'z4ba', 'z4be', 'z4bb', 'z4bc', 'z4bd',\n",
-    "             'z3vg', 'z3ua', 'z3va', 'z1as', 'z1bs', 'z1ap', 'z1bp', 'z1aj', 'z1bj', 'z1cj', 'z8tk', 'z1af', 'z1ag', 'z1aa', 'z1ba', 'z1gb',\n",
-    "             'z1hb', 'z1gf', 'z1hf', 'z1gg', 'z1hg', 'z1aq', 'z1bq', 'z1gh', 'z1hh', 'z5hd', 'z5id', 'z5hb', 'z5ib', 'z5hh', 'z5ih', \n",
-    "             'z5hc', 'z5ic', 'z5hi', 'z5ii', 'z5ak', 'z5bk', 'z5al', 'z5bl', 'z5hf', 'z5if', 'z5hl', 'z5il', 'z5hm', 'z5im', 'z5hz',\n",
-    "             'z5iz', 'z5xa', 'z5ya', 'z5xb', 'z5yb', 'z5xt', 'z5xu', 'z5xv', 'z5xw', 'z5ta', 'z5ua', 'z5tb', 'z5ub', 'z5kn', 'z5ln', \n",
-    "             'z5ko', 'z5lo', 'z5kp', 'z5lp', 'z5kb', 'z5lb', 'z5kh', 'z5lh', 'z5kc', 'z5lc', 'z5ki', 'z5li', 'z5df', 'z5ef', 'z5dg',\n",
-    "             'z5eg', 'z5kf', 'z5lf', 'z5kl', 'z5ll', 'z5te', 'z5ue', 'z5hp', 'z5ip', 'z5hq', 'z5iq', 'z5qb', 'z5rb', 'z5qh', 'z5rh',\n",
-    "             'z5qc', 'z5rc', 'z5qi', 'z5ri', 'z5xj', 'z5yj', 'z5xk', 'z5yk', 'z5qe', 'z5re', 'z5qk', 'z5rk', 'z5ql', 'z5rl', 'z5qm', 'z5rm',\n",
-    "             'z1az', 'z1bz', 'z1ao', 'z1bo'])\n",
-    "    return df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def clean_chunk(chunk):\n",
-    "    chunk.columns = [c.lower() for c in chunk.columns.to_list()]\n",
-    "    chunk.fillna(0, inplace=True)\n",
-    "    chunk = chunk.astype({\"stutile\": \"str\"})\n",
-    "    chunk = chunk.astype({\"fip18_c\": \"str\"})\n",
-    "    chunk = chunk.replace({\"tsirna\": {\"+\": 1, \"-\": \"-1\"}, \"srbg\": {\"+\": 1, \"-\": \"-1\"}})\n",
-    "    chunk = chunk.astype({\"tsirna\": \"int32\", \"srbg\": \"int32\"})\n",
-    "    chunk[\"rnirp8\"] = chunk[\"rnirp8\"] * chunk[\"tsirna\"]\n",
-    "    chunk[\"rbg\"] = chunk[\"rbg\"] * chunk[\"srbg\"]\n",
-    "    chunk[\"f+h\"] = chunk[\"f\"] + chunk[\"h\"]\n",
-    "    # chunk[\"zp+zf\"] = chunk[\"zp\"] + chunk[\"zf\"] => Its a boolean !!!\n",
-    "    chunk.drop([\"tsirna\", \"srbg\"], axis=1, inplace=True)\n",
-    "    chunk = sum_columns(chunk)\n",
-    "\n",
-    "    return chunk"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Lecture du fichier SAS\n",
-    "\n",
-    "On va lire le fichier par morceau de 1 million de lignes, pour ne pas saturer la mémoire. Il y a 39 millions de lignes.\n",
-    "\n",
-    "On va les enregistrer au fur et à mesure en format Apache Arrow."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Nombre d'itérations : 19\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "19it [22:00, 69.48s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: total: 20min 54s\n",
-      "Wall time: 22min\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "# Temps sur CASD : < 20 minutes.\n",
-    "\n",
-    "\n",
-    "# Efface le dossier de sortie\n",
-    "shutil.rmtree(OUT_PATH, ignore_errors=True)\n",
-    "Path(OUT_PATH).mkdir(parents=True, exist_ok=True)\n",
-    "\n",
-    "dfi = pd.read_sas(\n",
-    "    SAS_FILE, chunksize=taille_chunk, encoding=\"iso8859-15\", iterator=True\n",
-    ")\n",
-    "\n",
-    "dd_values = None\n",
-    "i = 0\n",
-    "print(f\"Nombre d'itérations : {39512402/taille_chunk:.0f}\")\n",
-    "for chunk in tqdm(dfi):\n",
-    "    chunk = clean_chunk(chunk)\n",
-    "    dd_values = vaex.from_pandas(chunk, copy_index=False)\n",
-    "    dd_values.export(f\"{OUT_PATH}pote_brutes_{year}_{i}.parquet\")\n",
-    "    del dd_values\n",
-    "    dd_values = None\n",
-    "    #### DEBUG\n",
-    "    i += 1\n",
-    "    #     if i>=2:\n",
-    "    #         break\n",
-    "    #### DEBUG"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Vérification"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "chunk"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from leximpact_prepare_data.calib_and_copules import tc\n",
-    "\n",
-    "dfv = vaex.open(f\"{OUT_PATH}pote_brutes_{year}_*.parquet\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tc.assertEqual(len(dfv), 39_264_696)  # 39_512_402\n",
-    "tc.assertGreaterEqual(dfv[\"revkire\"].count(), 36644848)\n",
-    "tc.assertGreaterEqual(dfv[\"revkire\"].sum(), 1_084_000_000_000)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "leximpact-prepare-data-kernel",
-   "language": "python",
-   "name": "leximpact-prepare-data-kernel"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebooks/code_CASD/extractions_base_des_impots/30a_Agregats_POTE-Quantiles.ipynb b/notebooks/code_CASD/extractions_base_des_impots/30a_Agregats_POTE-Quantiles.ipynb
deleted file mode 100644
index 481d2672fe6377a70bf4f51cc3f2c0e2630496fe..0000000000000000000000000000000000000000
--- a/notebooks/code_CASD/extractions_base_des_impots/30a_Agregats_POTE-Quantiles.ipynb
+++ /dev/null
@@ -1,381 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "284168c2-c515-442a-8943-638ab2487933",
-   "metadata": {},
-   "source": [
-    "# CASD : Extraction de quantiles de POTE"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3b1b7645-f0a7-4929-a667-3ffa31e1b4db",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from IPython.core.interactiveshell import InteractiveShell\n",
-    "\n",
-    "InteractiveShell.ast_node_interactivity = \"all\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e888519d-ca9f-404a-b188-49de0bf72e31",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "year = \"2019\"\n",
-    "# year = \"2019\"\n",
-    "# year = \"2018\"\n",
-    "OUT_PATH = r\"C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\data/\"\n",
-    "ARROW_PATH = OUT_PATH + \"assiettes_pote_brutes_\" + year + r\"-chunk/\"\n",
-    "taille_chunk = 2 * 2**20  # 2**20 = 1_048_576\n",
-    "# taille_chunk = 5000"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5e63307c-d42d-43d6-9ce3-8ea904a338eb",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'0.0.17'"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import leximpact_prepare_data\n",
-    "\n",
-    "leximpact_prepare_data.__version__"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d5c9f1f6-e464-48d1-a933-421ad58a270a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import gc\n",
-    "import json\n",
-    "\n",
-    "import vaex\n",
-    "from tqdm import tqdm\n",
-    "\n",
-    "from leximpact_prepare_data.scenario_tools.calib_and_copules import *"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "805afac3-cab5-4795-8c93-84cd0e99d45f",
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "AssertionError",
-     "evalue": "39818227 != 39264696",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
-      "File \u001b[1;32m<timed exec>:5\u001b[0m, in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n",
-      "File \u001b[1;32mC:\\Users\\Public\\Documents\\Anaconda\\envs\\leximpa\\lib\\unittest\\case.py:837\u001b[0m, in \u001b[0;36mTestCase.assertEqual\u001b[1;34m(self, first, second, msg)\u001b[0m\n\u001b[0;32m    833\u001b[0m \u001b[38;5;124;03m\"\"\"Fail if the two objects are unequal as determined by the '=='\u001b[39;00m\n\u001b[0;32m    834\u001b[0m \u001b[38;5;124;03m   operator.\u001b[39;00m\n\u001b[0;32m    835\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    836\u001b[0m assertion_func \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getAssertEqualityFunc(first, second)\n\u001b[1;32m--> 837\u001b[0m \u001b[43massertion_func\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfirst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msecond\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmsg\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmsg\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[1;32mC:\\Users\\Public\\Documents\\Anaconda\\envs\\leximpa\\lib\\unittest\\case.py:830\u001b[0m, in \u001b[0;36mTestCase._baseAssertEqual\u001b[1;34m(self, first, second, msg)\u001b[0m\n\u001b[0;32m    828\u001b[0m standardMsg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m != \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m _common_shorten_repr(first, second)\n\u001b[0;32m    829\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_formatMessage(msg, standardMsg)\n\u001b[1;32m--> 830\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfailureException(msg)\n",
-      "\u001b[1;31mAssertionError\u001b[0m: 39818227 != 39264696"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "# Temps de chargement 8 secondes pour 39,264,695 lignes, vive le lazy loading !\n",
-    "dfv = vaex.open(ARROW_PATH + \"*\")\n",
-    "# dfv = vaex.open(ARROW_PATH + \"pote_brutes_2019_5.arrow\")\n",
-    "# dfv\n",
-    "tc.assertEqual(len(dfv), 39264696)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f7042430-e355-4142-a733-a083b936d67d",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: total: 0 ns\n",
-      "Wall time: 0 ns\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "# Temps d'exécution : 2 secondes\n",
-    "# pyramide_des_ages = dfv.groupby(by=\"aged\", agg={\"age\": vaex.agg.count(\"aged\")})\n",
-    "# pyramide_des_ages"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2e8a14fa-6b7a-4a3a-95d2-c559dbd2b20e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# dfv.info()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7d55425f-af65-4f69-9b84-270020a8122e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['mat',\n",
-       " 'aged',\n",
-       " 'agec',\n",
-       " 'zf',\n",
-       " 'zp',\n",
-       " 'zn',\n",
-       " 'stutile',\n",
-       " 'f',\n",
-       " 'clirpg',\n",
-       " 'g',\n",
-       " 'r',\n",
-       " 'j',\n",
-       " 'h',\n",
-       " 'i',\n",
-       " 'p',\n",
-       " 'nbefi',\n",
-       " 'nbfoy',\n",
-       " 'nbpldm',\n",
-       " 'rimp',\n",
-       " 'rnirp8',\n",
-       " 'rbg',\n",
-       " 'mnrvr3',\n",
-       " 'txmoy',\n",
-       " 'revkire',\n",
-       " 'z1ak',\n",
-       " 'z1az',\n",
-       " 'z1bk',\n",
-       " 'z1bz',\n",
-       " 'z8uy',\n",
-       " 'mnipeg',\n",
-       " 'mnrvi2',\n",
-       " 'mnrvk',\n",
-       " 'mnrvni',\n",
-       " 'rnimeh',\n",
-       " 'rnirai',\n",
-       " 'rnirdu',\n",
-       " 'rnirgi',\n",
-       " 'frf',\n",
-       " 'impotnet',\n",
-       " 'impot',\n",
-       " 'nbpart',\n",
-       " 'fip18_c',\n",
-       " 'f+h']"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dfv.get_column_names()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d9d20ef4-b120-4f52-8ba5-6f722d56037b",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['f', 'g', 'h', 'i', 'r', 'p']"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "\"f g h i r p\".split(\" \")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0653c281-5687-4542-8ff1-bd70dace2410",
-   "metadata": {},
-   "source": [
-    "## Variables continues"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "48b77a1d-4e9e-496b-8131-be831cfbf5ab",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# \"Z1ak Z1bk txmoy impot impotnet rnirp8 rnimeh tsirna mnipeg rnirai rnirdu rnirgi Z1az Z1bz\".split(\" \")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7985d253-e7d1-49b1-8d96-372b2002cef0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# continuous_variables = [\n",
-    "# \"mnipeg\", Toujours à 0\n",
-    "# \"rnirp8\",\n",
-    "# \"rnimeh\",\n",
-    "# \"rnirai\",\n",
-    "# \"rnirdu\",\n",
-    "# \"rnirgi\",\n",
-    "# \"Z1az\",\n",
-    "# \"Z1bz\",\n",
-    "# \"rimp\",\n",
-    "# \"rbg\",\n",
-    "# \"mnrvr3\",\n",
-    "# \"revkire\",\n",
-    "# \"Z1ak\",\n",
-    "# \"Z1bk\",\n",
-    "# \"Z8uy\",\n",
-    "# \"MNRVI2\",\n",
-    "# \"MNRVK\",\n",
-    "# \"MNRVNI\",\n",
-    "# \"FRF\",\n",
-    "# \"Z1ak\",\n",
-    "# \"Z1bk\",\n",
-    "# \"txmoy\",\n",
-    "# \"impot\",\n",
-    "# \"impotnet\",\n",
-    "# ]\n",
-    "# continuous_variables = [c.lower() for c in continuous_variables]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7f1e01ef-7303-4bfd-9d60-a28f861c56bd",
-   "metadata": {},
-   "source": [
-    "### Calcul des quantiles"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "da0ed4e7-0836-43c1-a99e-c33a5236f7d9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def compute_quantile(vdf, columns=None, quantiles=10):\n",
-    "    vdf.fillna(column_names=columns, value=0, inplace=True)\n",
-    "    # vdf.fillnan(column_names=columns, value=0, inplace=True)\n",
-    "    vdf.shape[0]\n",
-    "    columns = columns if columns else vdf.get_column_names()\n",
-    "    for col in tqdm(columns):\n",
-    "        try:\n",
-    "            # print(col)\n",
-    "            q = Quantile(vdf[col].tolist())\n",
-    "            for quantile in quantiles:\n",
-    "                q_dict = q.get_quantile(quantile)\n",
-    "                with open(\n",
-    "                    f\"{OUT_PATH}/quantile_POTE_{quantile}_{year}_{col}.json\", \"w\"\n",
-    "                ) as f:\n",
-    "                    f.write(json.dumps(q_dict))\n",
-    "            del q\n",
-    "            gc.collect()\n",
-    "        except Exception as e:\n",
-    "            print(f\"ERROR processing {col} {e.__class__.__name__} : {e}\")\n",
-    "            continue"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5960386d-494f-4305-ac33-d98381478b54",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|███████████████████████████████████████| 23/23 [1:50:32<00:00, 288.38s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: total: 1h 49min 50s\n",
-      "Wall time: 1h 50min 32s\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "# Temps sur CASD : 5 minutes par colonne\n",
-    "compute_quantile(dfv, quantiles=[10, 100])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d4dcc7d7-29f2-470e-8713-8858399c54f3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "469"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "del dfv\n",
-    "gc.collect()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "leximpact-prepare-data-kernel",
-   "language": "python",
-   "name": "leximpact-prepare-data-kernel"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/code_CASD/extractions_base_des_impots/30b_Copules_POTE.ipynb b/notebooks/code_CASD/extractions_base_des_impots/30b_Copules_POTE.ipynb
deleted file mode 100644
index a930163a42aea70bb64742e0aa84d0cb3e061ce6..0000000000000000000000000000000000000000
--- a/notebooks/code_CASD/extractions_base_des_impots/30b_Copules_POTE.ipynb
+++ /dev/null
@@ -1,714 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "284168c2-c515-442a-8943-638ab2487933",
-   "metadata": {},
-   "source": [
-    "# CASD : Extraction d'agrégats"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b54838d4-6300-4242-816a-2b9df2041868",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#!conda list"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3b1b7645-f0a7-4929-a667-3ffa31e1b4db",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from IPython.core.interactiveshell import InteractiveShell\n",
-    "\n",
-    "InteractiveShell.ast_node_interactivity = \"all\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e888519d-ca9f-404a-b188-49de0bf72e31",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "year = \"2019\"\n",
-    "# year = \"2019\"\n",
-    "# year = \"2018\"\n",
-    "OUT_PATH = r\"C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\data/\"\n",
-    "OUT_PATH = OUT_PATH + \"assiettes_pote_brutes_\" + year + \"-chunk/\"\n",
-    "taille_chunk = 2 * 2**20  # 2**20 = 1_048_576"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5e63307c-d42d-43d6-9ce3-8ea904a338eb",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'0.0.23'"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import leximpact_prepare_data\n",
-    "\n",
-    "leximpact_prepare_data.__version__"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d5c9f1f6-e464-48d1-a933-421ad58a270a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import gc\n",
-    "\n",
-    "import pandas as pd\n",
-    "import vaex\n",
-    "from tqdm import tqdm\n",
-    "\n",
-    "from leximpact_prepare_data.scenario_tools.calib_and_copules import *"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "805afac3-cab5-4795-8c93-84cd0e99d45f",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: total: 375 ms\n",
-      "Wall time: 372 ms\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "# Temps de chargement 8 secondes pour 39,264,695 lignes, vive la lazy loading !\n",
-    "dfv = vaex.open(OUT_PATH + \"*\")\n",
-    "# dfv = vaex.open(ARROW_PATH + \"pote_brutes_2019_5.arrow\")\n",
-    "# dfv"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f7042430-e355-4142-a733-a083b936d67d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%time\n",
-    "# Temps d'exécution : 2 secondes\n",
-    "# pyramide_des_ages = dfv.groupby(by=\"aged\", agg={\"age\": vaex.agg.count(\"aged\")})\n",
-    "# pyramide_des_ages"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b5a6136d-ad4b-4ad0-b823-1fa476cc0100",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2e8a14fa-6b7a-4a3a-95d2-c559dbd2b20e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# dfv.info()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e08260b0-bf98-4160-8825-bcdaa20c9229",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# tc.assertEqual(dfv[\"revkire\"].count(), 39512402)  # 2019\n",
-    "tc.assertEqual(dfv[\"revkire\"].count(), 39818227)  # 2020"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7d55425f-af65-4f69-9b84-270020a8122e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['rnsgbd',\n",
-       " 'rnsgld',\n",
-       " 'revkire',\n",
-       " 'z1aj',\n",
-       " 'z1ap',\n",
-       " 'z1as',\n",
-       " 'z1bj',\n",
-       " 'z1bp',\n",
-       " 'z1bs',\n",
-       " 'z1cj',\n",
-       " 'z1cw',\n",
-       " 'z1dw',\n",
-       " 'z2ch',\n",
-       " 'z2dc',\n",
-       " 'z2dh',\n",
-       " 'z2tr',\n",
-       " 'z3ua',\n",
-       " 'z3vg',\n",
-       " 'z3vz',\n",
-       " 'z4ba',\n",
-       " 'z4bb',\n",
-       " 'z4bc',\n",
-       " 'z4bd',\n",
-       " 'z4be',\n",
-       " 'z6de',\n",
-       " 'z8sc',\n",
-       " 'z8sw',\n",
-       " 'z8sx',\n",
-       " 'cics',\n",
-       " 'mnimqg',\n",
-       " 'fip18_c',\n",
-       " 'revenus_capitaux_prelevement_bareme',\n",
-       " 'revenus_capitaux_prelevement_liberatoire',\n",
-       " 'revenus_capitaux_prelevement_forfaitaire_unique_ir',\n",
-       " 'rente_viagere_titre_onereux_net',\n",
-       " 'revenu_categoriel_foncier',\n",
-       " 'assiette_csg_plus_values',\n",
-       " 'assiette_csg_revenus_capital',\n",
-       " 'retraites',\n",
-       " 'chomage_et_indemnites',\n",
-       " 'rev_salaire']"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dfv.get_column_names()\n",
-    "# Remove id fip18_c\n",
-    "_ = dfv.drop(\"fip18_c\", inplace=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0653c281-5687-4542-8ff1-bd70dace2410",
-   "metadata": {},
-   "source": [
-    "## Variables continues"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "48b77a1d-4e9e-496b-8131-be831cfbf5ab",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# \"Z1ak Z1bk txmoy impot impotnet rnirp8 rnimeh tsirna mnipeg rnirai rnirdu rnirgi Z1az Z1bz\".split(\" \")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7985d253-e7d1-49b1-8d96-372b2002cef0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# continuous_variables = dfv.get_column_names()\n",
-    "# continuous_variables = [c.lower() for c in continuous_variables]\n",
-    "continuous_variables = [\n",
-    "    \"revkire\",\n",
-    "    \"revkire_par_part\",\n",
-    "    \"rbg\",\n",
-    "    \"rnirp8\",\n",
-    "    \"assiette_csg_revenus_capital\",\n",
-    "    \"revenus_capitaux_prelevement_bareme\",\n",
-    "    \"revenus_capitaux_prelevement_liberatoire\",\n",
-    "    \"revenus_capitaux_prelevement_forfaitaire_unique_ir\",\n",
-    "    \"rente_viagere_titre_onereux_net\",\n",
-    "    \"revenu_categoriel_foncier\",\n",
-    "    \"assiette_csg_plus_values\",\n",
-    "    \"revenus_individuels\",\n",
-    "    \"revenus_individuels_par_part\",\n",
-    "    \"rev_salaire\",\n",
-    "    \"retraites\",\n",
-    "    \"chomage_et_indemnites\",\n",
-    "    \"rag\",\n",
-    "    \"ric\",\n",
-    "    \"rnc\",\n",
-    "    \"pension_invalidite\",\n",
-    "    \"pension_alimentaire\",\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f9d8a9c4-f719-4af9-bb7d-ece0262bf405",
-   "metadata": {},
-   "source": [
-    "### Calcul d'agregats"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "418b50a2-0bc9-4f4b-a7db-1bb271ad1d32",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def compute_agg(vdf, columns):\n",
-    "    sub_total = []\n",
-    "    vdf.fillna(column_names=columns, value=0, inplace=True)\n",
-    "    # vdf.fillnan(column_names=columns, value=0, inplace=True)\n",
-    "    ldf = vdf.shape[0]\n",
-    "    columns = columns if columns else vdf.get_column_names()\n",
-    "    for col in tqdm(columns):\n",
-    "        # print(col)\n",
-    "        name = f\"{col}_non_zero\"\n",
-    "        vdf.select(f\"{col} != 0\", name=name)\n",
-    "        nb_no_zero = int(vdf.count(\"*\", selection=name))\n",
-    "        lenzero = ldf - nb_no_zero\n",
-    "        dict_col = {\n",
-    "            \"name\": col,\n",
-    "            \"nb_line\": ldf,\n",
-    "            \"lenzero\": lenzero,\n",
-    "            \"pct_zero\": lenzero / ldf * 100,\n",
-    "            \"sum\": int(vdf.sum(col)),\n",
-    "            \"mean\": float(vdf.mean(col, selection=name)) if nb_no_zero > 0 else 0.0,\n",
-    "            \"variance\": float(vdf.var(col, selection=name)) if nb_no_zero > 0 else 0.0,\n",
-    "            \"std_dev\": float(vdf.std(col, selection=name)) if nb_no_zero > 0 else 0.0,\n",
-    "        }\n",
-    "        sub_total.append(dict_col)\n",
-    "    return pd.DataFrame(sub_total)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "22b6014d-ae3c-4b70-82f8-c5f51d484bed",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%time\n",
-    "# Temps sur CASD : 30s par colonne avant l'upgrade de machine, moins de 3 secondes après upgrade !\n",
-    "df_agg = compute_agg(dfv, continuous_variables)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a3f94b4e-3016-48be-bdcf-0f3e236c434a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# dfv.mnipeg.mean()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "57e615a2-d2d7-4422-bed0-4d8c27e0b06d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pd.set_option(\"display.float_format\", \"{:,}\".format)\n",
-    "# Export dans un fichier\n",
-    "df_agg.to_csv(OUT_PATH + \"/agregats_POTE_revenus_rici_\" + year + \".csv\", index=False)\n",
-    "df_agg"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5317aaf0-24cd-4af9-b757-a50818d74177",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "del df_agg\n",
-    "import gc\n",
-    "\n",
-    "gc.collect()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "785ec58a-da1b-4cd3-8851-169b60d4ef24",
-   "metadata": {},
-   "source": [
-    "## Extraction de fonctions de répartition (pour calibration)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3023e812-2306-4fc8-813e-83e8a9e6da19",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: total: 23.4 s\n",
-      "Wall time: 23.2 s\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "dfv = dfv.fillna(0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9ee6e983-0135-4db8-8d66-43fbbfc3b0f3",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: total: 0 ns\n",
-      "Wall time: 0 ns\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "# calib = get_calib(dfv, \"rimp\", 10)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "aaf56d71-c77d-4e03-8ec8-7b7b337ac8f9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Redéfinition à migrer !\n",
-    "from typing import Dict\n",
-    "\n",
-    "\n",
-    "def get_primary_buckets(\n",
-    "    vdx_sort: vaex.dataframe.DataFrameLocal,\n",
-    "    nb_bucket: int,\n",
-    "    variable_to_split_on: str = \"revkire\",\n",
-    "    minimal_bucket_size=12,\n",
-    "    debug=False,\n",
-    ") -> Dict:\n",
-    "    \"\"\"\n",
-    "    Objectif: Split the variable in buckets\n",
-    "    Dans chaque bucket on stocke toutes les valeurs non nulles de \"variable\"\n",
-    "    ::vdx_sort:: Le dataset, trié selon la variable à étudier\n",
-    "    ::nb_bucket:: Nombre de tranches souhaitées\n",
-    "    ::variable_to_split_on:: Variable on which to split buckets\n",
-    "    ::debug:: Pour activer un mode debug, qui affiche des traces\n",
-    "    \"\"\"\n",
-    "    dataset_size = vdx_sort.shape[0]  # Nb de lignes\n",
-    "    # Conversion en array\n",
-    "    variable_array = vdx_sort.to_arrays(\n",
-    "        column_names=[variable_to_split_on], selection=False, array_type=\"python\"\n",
-    "    )[0]\n",
-    "    # On vérifie que le dataset est bien trié\n",
-    "    previous = variable_array[-1]\n",
-    "    for i in range(1, 1000):\n",
-    "        idx = dataset_size // i\n",
-    "        idx = idx if idx != dataset_size else dataset_size - 1\n",
-    "        if previous < variable_array[idx]:\n",
-    "            raise DatasetNotSorted(\n",
-    "                f\"Your dataset is not sorted on {variable_to_split_on}!\"\n",
-    "            )\n",
-    "        previous = variable_array[idx]\n",
-    "\n",
-    "    # Découpage du RFR en buckets:\n",
-    "    borders = get_borders(\n",
-    "        dataset_size=dataset_size,\n",
-    "        nb_bucket=nb_bucket,\n",
-    "        minimal_bucket_size=minimal_bucket_size,\n",
-    "        debug=debug,\n",
-    "    )\n",
-    "\n",
-    "    # On retire la dernière frontière pour éviter des tests (index out of range), on la remetra après\n",
-    "    borders = borders[:-1]\n",
-    "    i = 0\n",
-    "    # On supprime les frontières qui n'auraient que du 0\n",
-    "    while i < len(borders):\n",
-    "        if variable_array[borders[i]] < 1:\n",
-    "            if debug:\n",
-    "                print(\n",
-    "                    f\"WARNING: On efface la frontière d'index {i} : {borders[i]} inutile car valeur de la borne haute est {variable_array[borders[i]]}\"\n",
-    "                )\n",
-    "            borders = borders[:i] + borders[i + 1 :]\n",
-    "        else:\n",
-    "            i += 1\n",
-    "    frontieres_valeurs = [0] + [variable_array[frontiere] for frontiere in borders]\n",
-    "    # On ajoute une valeur de fin trés haute (10^15€)\n",
-    "    frontieres_valeurs += [10**15]\n",
-    "    # On remet la dernière frontière\n",
-    "    borders += [dataset_size]\n",
-    "    dic = {\"borders_values\": frontieres_valeurs, \"borders\": borders}\n",
-    "    del variable_array\n",
-    "    gc.collect()\n",
-    "    return dic"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c45ddd1b-2477-4bd2-ad27-a4652fafe270",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# TODO : import from package\n",
-    "# def get_fake_upper_bound(val):\n",
-    "#     if val == 1e15:\n",
-    "#         return 1e15\n",
-    "#     else:\n",
-    "#         return 10 ** (len(str(int(val))))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "32fa27f6-6639-4fca-8eb0-16597361b652",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# get_fake_upper_bound(100.5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8317e05e-df85-4b16-bca8-80ca19e8223e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# calib = get_copulas(dfv, \"revkire\", \"revkire\", 10, une_tranche_rfr)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "59ae4f6a-6fe5-4bbf-8aba-9fcb1c75734a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# calib[\"copules\"][\"buckets\"][-1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "50bf217f-3a83-4884-87a7-98d3768878b4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# calib = calib[\"copules\"][0][\"buckets\"]\n",
-    "# keep_upper_bound_secret(calib)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a974e412-cbfa-4a64-86e9-f6fe80394f41",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# calib"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d2a34c2f-94de-44e4-a4ee-1ef029c8d5c6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# calib[\"buckets\"][-1][\"seuil_var_supp\"] = \"secret\"\n",
-    "# calib"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a122c283-4bef-4ffa-a76b-d846d36f50be",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# from IPython.display import JSON\n",
-    "\n",
-    "# JSON(calib)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "df0387a2-0574-4acc-a75e-f8c61fc09afc",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# %%time\n",
-    "# # Temps sur CASD : 138s par iteration\n",
-    "# nb_bucket_var = 100\n",
-    "\n",
-    "# for variable in tqdm(continuous_variables):\n",
-    "#     #calib = get_calib(dfv, variable, nb_bucket_var)\n",
-    "#     # print(variable)\n",
-    "#     calib = compute_copule_vaex(dfv, variable, nb_bucket_var, une_tranche_rfr)\n",
-    "#     calib[\"copules\"][0][\"buckets\"][-1][\"seuil_var_supp\"] = \"secret\"\n",
-    "#     with open(f\"{OUT_PATH}CalibPote-{nb_bucket_var}-{year}-{variable}.json\", \"w\") as f:\n",
-    "#         f.write(json.dumps(calib[\"copules\"][0][\"buckets\"]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "026dcad9-2b94-4ba7-8117-8cc92efc8b8c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# %%time\n",
-    "# # Temps sur CASD : 538s par iteration !\n",
-    "# nb_bucket_var = 1000\n",
-    "\n",
-    "# for variable in tqdm(continuous_variables):\n",
-    "#     #calib = get_calib(dfv, variable, nb_bucket_var)\n",
-    "#     # print(variable)\n",
-    "#     calib = compute_copule_vaex(dfv, variable, nb_bucket_var, une_tranche_rfr)\n",
-    "#     calib[\"copules\"][0][\"buckets\"][-1][\"seuil_var_supp\"] = \"secret\"\n",
-    "#     with open(f\"{OUT_PATH}CalibPote-{nb_bucket_var}-{year}-{variable}.json\", \"w\") as f:\n",
-    "#         f.write(json.dumps(calib[\"copules\"][0][\"buckets\"]))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d1c5f22c-5e27-4a8f-94a3-5bfe897f1d7f",
-   "metadata": {},
-   "source": [
-    "## Extraction de Copules"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c4e85dd4-2c66-4994-ad0d-5d327737aece",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|███████████████████████████████████████████| 3/3 [07:55<00:00, 158.59s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: total: 7min 54s\n",
-      "Wall time: 7min 55s\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "nb_bucket_var = 10\n",
-    "\n",
-    "# on fait des copules en fonction du rfr mais aussi en fonction des revenus individuels pour voir si ça permet d'améliorer l'imputation\n",
-    "# Les copules en fonction de assiette_csg_revenus_capital servent si on veut voir la distribution des différents revenus du capital dans la somme de revenus du capital\n",
-    "for copule_var in [\"revkire\", \"revkire_par_part\", \"revenus_individuels\", \"revenus_individuels_par_part\", \"assiette_csg_revenus_capital\"]\n",
-    "    centile = get_primary_buckets(\n",
-    "        dfv, nb_bucket_var, variable_to_split_on=copule_var, minimal_bucket_size=500\n",
-    "    )\n",
-    "\n",
-    "    for variable in tqdm(continuous_variables):  # continuous_variables\n",
-    "        try:\n",
-    "            copule = get_copulas(\n",
-    "                dfv,\n",
-    "                copule_var,\n",
-    "                variable,\n",
-    "                nb_bucket_var,\n",
-    "                centile_rfr,\n",
-    "                minimal_bucket_size=100,\n",
-    "            )\n",
-    "            # copule[\"copules\"][0][\"buckets\"][-1][\"upper_bound\"] = \"secret\"\n",
-    "            anonimyze_lower_and_upper_bound(copule[\"copules\"])\n",
-    "            with open(\n",
-    "                f\"{OUT_PATH}CopulePote-{nb_bucket_var}-{year}-{copule_var}-{variable}.json\", \"w\"\n",
-    "            ) as f:\n",
-    "                f.write(json.dumps(copule))\n",
-    "        except Exception as e:\n",
-    "            print(f\"ERROR processing {variable}\", e)\n",
-    "            # raise e"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "364dff4d-d746-441e-bde6-1db0594e80b5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# dfv.column_names"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "53396d39-b2d7-459a-abde-83a77a897f50",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# copule = get_copulas(dfv, \"revkire\", variable, nb_bucket_var, centile_rfr)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a753f56c-48e3-4dcd-a388-c5249615759f",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "leximpact-prepare-data-kernel",
-   "language": "python",
-   "name": "leximpact-prepare-data-kernel"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/code_CASD/extractions_base_des_impots/40_Verifications.ipynb b/notebooks/code_CASD/extractions_base_des_impots/40_Verifications.ipynb
deleted file mode 100644
index 6e5c592f84b7f05a3142d5ec9291b39ca65a2d48..0000000000000000000000000000000000000000
--- a/notebooks/code_CASD/extractions_base_des_impots/40_Verifications.ipynb
+++ /dev/null
@@ -1,208 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "4bacd951",
-   "metadata": {},
-   "source": [
-    "# CASD : Vérification des extractions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "455a8fc3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | hide\n",
-    "from IPython.core.interactiveshell import InteractiveShell\n",
-    "from matplotlib.ticker import PercentFormatter\n",
-    "\n",
-    "InteractiveShell.ast_node_interactivity = \"all\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0c00e027",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "IN_PATH = r\"C:\\Users\\Public\\Documents\\TRAVAIL\\csg\\data_out/\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3a42607a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "\n",
-    "# import seaborn as sns\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2c6c90b0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# file = \"CalibPote-2019-rev_salaire.json\"\n",
-    "file = \"CalibPote-2019-assiette_csg_revenus_capital.json\"\n",
-    "# file ='CalibPote-1000-2019-revkire.json'\n",
-    "with open(IN_PATH + file) as f:\n",
-    "    calib = json.load(f)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "40771600",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "calib[\"buckets\"][15]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "52935304",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pd.set_option(\"display.float_format\", \"{:,}\".format)\n",
-    "df = pd.DataFrame(calib[\"buckets\"])\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7518c848",
-   "metadata": {},
-   "source": [
-    "60% des foyers n'ont pas de revenu du capital"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4ef8124b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.query(\"0.70 < ratio_nb_above_seuil < 0.9\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8d81e5a3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.iloc[99]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a7f2e110",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df[\"seuil_de_var_str\"] = df.seuil_var_supp.astype(str)\n",
-    "df[\"pareto\"] = (1 - df.ratio_nb_above_seuil) * 100"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "de033787",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sns.set(rc={\"figure.figsize\": (20, 8)})\n",
-    "ax = sns.scatterplot(data=df, x=\"seuil_de_var_str\", y=\"seuil_var_supp\")\n",
-    "_ = ax.set_xticklabels(labels=ax.get_xticklabels(), rotation=90)\n",
-    "_ = ax.set_title(\"Pareto de la variable\")\n",
-    "ax2 = ax.twinx()\n",
-    "ax3 = sns.lineplot(data=df, x=\"seuil_de_rfr_str\", y=\"pareto\")\n",
-    "ax3.yaxis.set_major_formatter(PercentFormatter())\n",
-    "# _ = ax.set_yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f1fcf1be",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ax3 = sns.lineplot(data=df, x=\"seuil_var_supp\", y=\"pareto\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2488ff98",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sns.set(rc={\"figure.figsize\": (20, 8)})\n",
-    "ax = sns.scatterplot(data=df, x=\"seuil_var_supp\", y=\"nombre_ff_tranche\")\n",
-    "# _ = ax.set_yscale(\"log\")\n",
-    "_ = ax.set_xticklabels(labels=ax.get_xticklabels(), rotation=90)\n",
-    "_ = ax.set_title(\"Nombre de foyers par tranche de RFR\\nEchelle logarithmique\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3f8df2c3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sns.set(rc={\"figure.figsize\": (20, 8)})\n",
-    "ax = sns.scatterplot(data=df, x=\"seuil_var_supp\", y=\"mean_tranche_var\")\n",
-    "_ = ax.set_xticklabels(labels=ax.get_xticklabels(), rotation=90)\n",
-    "_ = ax.set_title(\"Nombre de foyers par tranche de RFR\\nEchelle logarithmique\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ece9f9c2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ax = sns.lineplot(data=df, x=\"seuil_var_supp\", y=\"sum_var_above_seuil\")\n",
-    "_ = ax.set_yscale(\"log\")\n",
-    "# _ = ax.set_xticklabels(labels=ax.get_xticklabels(), rotation=90)\n",
-    "_ = ax.set_title(\"Nombre de foyers par tranche de RFR\\nEchelle logarithmique\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9f98dc9b",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/code_CASD/extractions_base_des_impots/Colonnes_POTE_2019.xlsx b/notebooks/code_CASD/extractions_base_des_impots/Colonnes_POTE_2019.xlsx
deleted file mode 100644
index d44a25648b404a56e46dcf638228cdd19c223e2d..0000000000000000000000000000000000000000
Binary files a/notebooks/code_CASD/extractions_base_des_impots/Colonnes_POTE_2019.xlsx and /dev/null differ
diff --git a/notebooks/code_CASD/extractions_base_des_impots/_test_install.ipynb b/notebooks/code_CASD/extractions_base_des_impots/_test_install.ipynb
deleted file mode 100644
index fc0722e8d362a9fa30f35c4b56562b3f6e1a5a58..0000000000000000000000000000000000000000
--- a/notebooks/code_CASD/extractions_base_des_impots/_test_install.ipynb
+++ /dev/null
@@ -1,497 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "possible-celebrity",
-   "metadata": {},
-   "source": [
-    "# Test l'installation du paquet"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "supreme-technical",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pydantic.version import VERSION\n",
-    "\n",
-    "from leximpact_prepare_data.scenario_tools.calib_and_copules import (\n",
-    "    DistribDeVarVaex,\n",
-    "    get_primary_buckets,\n",
-    ")\n",
-    "\n",
-    "VERSION\n",
-    "\n",
-    "import vaex\n",
-    "\n",
-    "from leximpact_prepare_data.scenario_tools.calib_and_copules import (\n",
-    "    get_copulas,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "psychological-voltage",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "50\n"
-     ]
-    }
-   ],
-   "source": [
-    "variable_values = [i * 10 for i in range(50)]\n",
-    "dist = DistribDeVarVaex(\n",
-    "    variable_values=variable_values,\n",
-    "    variable=\"toto\",\n",
-    "    nb_entity=len(variable_values),\n",
-    "    nb_bucket_var=2,\n",
-    "    lower_bound=50,\n",
-    "    upper_bound=1e10,\n",
-    "    debug=False,\n",
-    ")\n",
-    "print(dist.to_dict()[\"count\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "middle-damage",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_raw = vaex.open(\"fake_pote_light.parquet\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "vital-moral",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "<thead>\n",
-       "<tr><th>#                                </th><th>revkire  </th><th>rev_capital_partiel  </th><th>rev_salaire  </th><th>rente_viagere  </th><th>rev_categ_foncier  </th><th>retraites  </th><th>chomage  </th></tr>\n",
-       "</thead>\n",
-       "<tbody>\n",
-       "<tr><td><i style='opacity: 0.6'>0</i>    </td><td>0        </td><td>0                    </td><td>0            </td><td>0              </td><td>0                  </td><td>0          </td><td>0        </td></tr>\n",
-       "<tr><td><i style='opacity: 0.6'>1</i>    </td><td>0        </td><td>0                    </td><td>0            </td><td>0              </td><td>0                  </td><td>0          </td><td>0        </td></tr>\n",
-       "<tr><td><i style='opacity: 0.6'>2</i>    </td><td>0        </td><td>0                    </td><td>0            </td><td>0              </td><td>0                  </td><td>0          </td><td>0        </td></tr>\n",
-       "<tr><td><i style='opacity: 0.6'>3</i>    </td><td>0        </td><td>0                    </td><td>0            </td><td>0              </td><td>0                  </td><td>0          </td><td>0        </td></tr>\n",
-       "<tr><td><i style='opacity: 0.6'>4</i>    </td><td>0        </td><td>0                    </td><td>0            </td><td>0              </td><td>0                  </td><td>0          </td><td>0        </td></tr>\n",
-       "<tr><td>...                              </td><td>...      </td><td>...                  </td><td>...          </td><td>...            </td><td>...                </td><td>...        </td><td>...      </td></tr>\n",
-       "<tr><td><i style='opacity: 0.6'>3,895</i></td><td>22367    </td><td>67                   </td><td>3355         </td><td>67             </td><td>82                 </td><td>55         </td><td>23178    </td></tr>\n",
-       "<tr><td><i style='opacity: 0.6'>3,896</i></td><td>475      </td><td>23                   </td><td>71           </td><td>23             </td><td>2                  </td><td>1          </td><td>80280    </td></tr>\n",
-       "<tr><td><i style='opacity: 0.6'>3,897</i></td><td>4561     </td><td>12                   </td><td>684          </td><td>12             </td><td>6474               </td><td>4316       </td><td>41896    </td></tr>\n",
-       "<tr><td><i style='opacity: 0.6'>3,898</i></td><td>43151    </td><td>12                   </td><td>6472         </td><td>12             </td><td>18                 </td><td>12         </td><td>123001   </td></tr>\n",
-       "<tr><td><i style='opacity: 0.6'>3,899</i></td><td>2234     </td><td>1                    </td><td>335          </td><td>1              </td><td>209                </td><td>139        </td><td>22409    </td></tr>\n",
-       "</tbody>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "#      revkire    rev_capital_partiel    rev_salaire    rente_viagere    rev_categ_foncier    retraites    chomage\n",
-       "0      0          0                      0              0                0                    0            0\n",
-       "1      0          0                      0              0                0                    0            0\n",
-       "2      0          0                      0              0                0                    0            0\n",
-       "3      0          0                      0              0                0                    0            0\n",
-       "4      0          0                      0              0                0                    0            0\n",
-       "...    ...        ...                    ...            ...              ...                  ...          ...\n",
-       "3,895  22367      67                     3355           67               82                   55           23178\n",
-       "3,896  475        23                     71             23               2                    1            80280\n",
-       "3,897  4561       12                     684            12               6474                 4316         41896\n",
-       "3,898  43151      12                     6472           12               18                   12           123001\n",
-       "3,899  2234       1                      335            1                209                  139          22409"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "test_raw"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "former-hawaii",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_raw = test_raw.sort(\"revkire\", ascending=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "included-nigeria",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[0;31mSignature:\u001b[0m\n",
-      "\u001b[0mget_copulas\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mvdf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mvaex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrameLocal\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mprimary_variable\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mvariable\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mnb_bucket_var\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mprimary_buckets\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0madd_upper_bucket\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.01\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mdebug\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mminimal_bucket_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mSource:\u001b[0m   \n",
-      "\u001b[0;32mdef\u001b[0m \u001b[0mget_copulas\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mvdf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mvaex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrameLocal\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mprimary_variable\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mvariable\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mnb_bucket_var\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mprimary_buckets\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0madd_upper_bucket\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1e-2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mdebug\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mminimal_bucket_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;34m\"\"\"\u001b[0m\n",
-      "\u001b[0;34m    On nous donne des tranches de RFR, en nombre de personne, et en valeur de RFR\u001b[0m\n",
-      "\u001b[0;34m    Pour chacune de ses tranches on doit extraire les valeurs de 'variable'\u001b[0m\n",
-      "\u001b[0;34m    On ne garde que celle supérieure à 0 et on les envoie à DistribDeVarVaex\u001b[0m\n",
-      "\u001b[0;34m    ::vdf:: Le jeux de données\u001b[0m\n",
-      "\u001b[0;34m    ::variable:: Nom de la variable secondaire.\u001b[0m\n",
-      "\u001b[0;34m    ::nb_bucket_var:: Nombre de tranches de variable secondaire souhaités.\u001b[0m\n",
-      "\u001b[0;34m    ::primary_buckets:: La liste des tranches de RFR.\u001b[0m\n",
-      "\u001b[0;34m    ::debug:: Pour activer un mode debug, qui affiche des traces.\u001b[0m\n",
-      "\u001b[0;34m    ::minimal_bucket_size:: Nombre minimal d'individus pour respecter le secret statistique.\u001b[0m\n",
-      "\u001b[0;34m    \"\"\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mcontrole\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mcopules\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mfrontieres_valeurs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprimary_buckets\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"borders_values\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mborders\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprimary_buckets\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"borders\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;32mif\u001b[0m \u001b[0mprimary_variable\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_column_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mprimary_variable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprimary_variable\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mprimary_variable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_column_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;31m# Conversion en array\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mprimary_variable_array\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_arrays\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mcolumn_names\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mprimary_variable\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marray_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"python\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mdataset_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;31m# On vérifie que le dataset est bien trié\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mprevious\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0midx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataset_size\u001b[0m \u001b[0;34m//\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0midx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0midx\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0midx\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mdataset_size\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mdataset_size\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mprevious\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;32mraise\u001b[0m \u001b[0mDatasetNotSorted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Your dataset is not sorted on {primary_variable}!\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mprevious\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;31m# On parcourt les frontières de FF (= les index dans le tableau)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0midx_inf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mdebut\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;31m# On ne peut malheureusement pas filtrer par > 0 avant extraction car cela fausserait le nombre de valeur\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mvariable_all_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_arrays\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mcolumn_names\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvariable\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marray_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"python\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;31m# On fait l'hypothèse que c'est bien trié par ordre croissant\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mlower_bound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_inf\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Temps d'extraction par to_arrays  {time()-debut}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx_sup\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mborders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mstarttime\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mupper_bound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfrontieres_valeurs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m  \u001b[0;31m# Car frontieres_valeurs contient 0\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mvariable_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvariable_all_values\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_inf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0midx_sup\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;31m# nb_entity = vdf_tmp.shape[0]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mnb_entity\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariable_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"-----------------Temps après slice {time()-starttime}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32massert\u001b[0m \u001b[0mnb_entity\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0midx_sup\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0midx_inf\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;31m# Quand il y a beaucoup de personne ayant le même revenu on peut avec des tranches avec lower_bound=upper_bound, mais ce n'est pas gênant\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_inf\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlower_bound\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;32mand\u001b[0m \u001b[0mlower_bound\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mupper_bound\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                \u001b[0;34mf\"get_copulas {i} WARNING: Il y a peut-être un problème car le RFR du premier index (idx_inf={idx_inf}) est {primary_variable_array[idx_inf]} alors que lower_bound vaut {lower_bound}\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mi\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mborders\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;32mand\u001b[0m \u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_sup\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mupper_bound\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;32mand\u001b[0m \u001b[0mlower_bound\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mupper_bound\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                \u001b[0;34mf\"get_copulas {i} WARNING: Il y a peut-être un problème car le RFR du dernier index (idx_sup={idx_sup}) est {primary_variable_array[idx_sup]} alors que upper_bound vaut {upper_bound}\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;31m# Remove 0\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mvariable_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvariable_values\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m0.9999\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Temps avant sort {time()-starttime}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;31m# Tri des variables : sort() est plus rapide que sorted, mais écrase notre liste\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mvariable_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;31m# variable_values = sorted(variable_values)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Temps après sort {time()-starttime}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                \u001b[0;34mf\"get_copulas {i} : index entre idx_inf={idx_inf} et idx_sup={idx_sup} - RFR entre lower_bound={lower_bound} et upper_bound={upper_bound} - {len(variable_values)} valeurs différentes de zéro.\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariable_values\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                    \u001b[0;34mf\"\\tmin(variable_values)={min(variable_values)} max(variable_values)={max(variable_values)}\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariable_values\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0midx_sup\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0midx_inf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                \u001b[0;34mf\"get_copulas ERROR i={i} len(variable_values)={len(variable_values)} != {idx_sup - idx_inf}\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32massert\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariable_values\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0midx_sup\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0midx_inf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mDistribDeVar_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mbdr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDistribDeVarVaex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mvariable_values\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvariable_values\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mvariable\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvariable\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mnb_entity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnb_entity\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mnb_bucket_var\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnb_bucket_var\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mlower_bound\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlower_bound\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mupper_bound\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mupper_bound\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0madd_upper_bucket\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0madd_upper_bucket\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mdebug\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mminimal_bucket_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mminimal_bucket_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Temps de DistribDeVarVaex {time()-DistribDeVar_time}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;31m# Et on ajoute ce tableau à la liste des tableaux\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mcopules\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mbdr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0midx_inf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0midx_sup\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mlower_bound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mupper_bound\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Temps après fin de la boucle {time()-starttime} --------------\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"DEBUG EXIT !!!\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mdico\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"controle\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mcontrole\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"copules\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mcopules\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;32mreturn\u001b[0m \u001b[0mdico\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mFile:\u001b[0m      ~/leximpact/leximpact-prepare-data/leximpact_prepare_data/scenario_tools/calib_and_copules.py\n",
-      "\u001b[0;31mType:\u001b[0m      function"
-     ]
-    }
-   ],
-   "source": [
-    "get_copulas??"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ee3a8ea4",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[0;31mSignature:\u001b[0m\n",
-      "\u001b[0mget_copulas\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mvdf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mvaex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrameLocal\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mprimary_variable\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mvariable\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mnb_bucket_var\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mprimary_buckets\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0madd_upper_bucket\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.01\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mdebug\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mminimal_bucket_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mSource:\u001b[0m   \n",
-      "\u001b[0;32mdef\u001b[0m \u001b[0mget_copulas\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mvdf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mvaex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrameLocal\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mprimary_variable\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mvariable\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mnb_bucket_var\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mprimary_buckets\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0madd_upper_bucket\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1e-2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mdebug\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mminimal_bucket_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;34m\"\"\"\u001b[0m\n",
-      "\u001b[0;34m    On nous donne des tranches de RFR, en nombre de personne, et en valeur de RFR\u001b[0m\n",
-      "\u001b[0;34m    Pour chacune de ses tranches on doit extraire les valeurs de 'variable'\u001b[0m\n",
-      "\u001b[0;34m    On ne garde que celle supérieure à 0 et on les envoie à DistribDeVarVaex\u001b[0m\n",
-      "\u001b[0;34m    ::vdf:: Le jeux de données\u001b[0m\n",
-      "\u001b[0;34m    ::variable:: Nom de la variable secondaire.\u001b[0m\n",
-      "\u001b[0;34m    ::nb_bucket_var:: Nombre de tranches de variable secondaire souhaités.\u001b[0m\n",
-      "\u001b[0;34m    ::primary_buckets:: La liste des tranches de RFR.\u001b[0m\n",
-      "\u001b[0;34m    ::debug:: Pour activer un mode debug, qui affiche des traces.\u001b[0m\n",
-      "\u001b[0;34m    ::minimal_bucket_size:: Nombre minimal d'individus pour respecter le secret statistique.\u001b[0m\n",
-      "\u001b[0;34m    \"\"\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mcontrole\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mcopules\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mfrontieres_valeurs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprimary_buckets\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"borders_values\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mborders\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprimary_buckets\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"borders\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;32mif\u001b[0m \u001b[0mprimary_variable\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_column_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mprimary_variable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprimary_variable\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mprimary_variable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_column_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;31m# Conversion en array\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mprimary_variable_array\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_arrays\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mcolumn_names\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mprimary_variable\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marray_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"python\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mdataset_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;31m# On vérifie que le dataset est bien trié\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mprevious\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0midx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataset_size\u001b[0m \u001b[0;34m//\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0midx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0midx\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0midx\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mdataset_size\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mdataset_size\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mprevious\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;32mraise\u001b[0m \u001b[0mDatasetNotSorted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Your dataset is not sorted on {primary_variable}!\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mprevious\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;31m# On parcourt les frontières de FF (= les index dans le tableau)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0midx_inf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mdebut\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;31m# On ne peut malheureusement pas filtrer par > 0 avant extraction car cela fausserait le nombre de valeur\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mvariable_all_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_arrays\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mcolumn_names\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvariable\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marray_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"python\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;31m# On fait l'hypothèse que c'est bien trié par ordre croissant\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mlower_bound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_inf\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Temps d'extraction par to_arrays  {time()-debut}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx_sup\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mborders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mstarttime\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mupper_bound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfrontieres_valeurs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m  \u001b[0;31m# Car frontieres_valeurs contient 0\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mvariable_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvariable_all_values\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_inf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0midx_sup\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;31m# nb_entity = vdf_tmp.shape[0]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mnb_entity\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariable_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"-----------------Temps après slice {time()-starttime}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32massert\u001b[0m \u001b[0mnb_entity\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0midx_sup\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0midx_inf\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;31m# Quand il y a beaucoup de personne ayant le même revenu on peut avec des tranches avec lower_bound=upper_bound, mais ce n'est pas gênant\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_inf\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlower_bound\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;32mand\u001b[0m \u001b[0mlower_bound\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mupper_bound\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                \u001b[0;34mf\"get_copulas {i} WARNING: Il y a peut-être un problème car le RFR du premier index (idx_inf={idx_inf}) est {primary_variable_array[idx_inf]} alors que lower_bound vaut {lower_bound}\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mi\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mborders\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;32mand\u001b[0m \u001b[0mprimary_variable_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_sup\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mupper_bound\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;32mand\u001b[0m \u001b[0mlower_bound\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mupper_bound\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                \u001b[0;34mf\"get_copulas {i} WARNING: Il y a peut-être un problème car le RFR du dernier index (idx_sup={idx_sup}) est {primary_variable_array[idx_sup]} alors que upper_bound vaut {upper_bound}\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;31m# Remove 0\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mvariable_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvariable_values\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m0.9999\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Temps avant sort {time()-starttime}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;31m# Tri des variables : sort() est plus rapide que sorted, mais écrase notre liste\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mvariable_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;31m# variable_values = sorted(variable_values)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Temps après sort {time()-starttime}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                \u001b[0;34mf\"get_copulas {i} : index entre idx_inf={idx_inf} et idx_sup={idx_sup} - RFR entre lower_bound={lower_bound} et upper_bound={upper_bound} - {len(variable_values)} valeurs différentes de zéro.\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariable_values\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                    \u001b[0;34mf\"\\tmin(variable_values)={min(variable_values)} max(variable_values)={max(variable_values)}\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariable_values\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0midx_sup\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0midx_inf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m                \u001b[0;34mf\"get_copulas ERROR i={i} len(variable_values)={len(variable_values)} != {idx_sup - idx_inf}\"\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32massert\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariable_values\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0midx_sup\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0midx_inf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mDistribDeVar_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mbdr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDistribDeVarVaex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mvariable_values\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvariable_values\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mvariable\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvariable\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mnb_entity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnb_entity\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mnb_bucket_var\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnb_bucket_var\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mlower_bound\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlower_bound\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mupper_bound\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mupper_bound\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0madd_upper_bucket\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0madd_upper_bucket\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mdebug\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mminimal_bucket_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mminimal_bucket_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Temps de DistribDeVarVaex {time()-DistribDeVar_time}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;31m# Et on ajoute ce tableau à la liste des tableaux\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mcopules\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mbdr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0midx_inf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0midx_sup\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0mlower_bound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mupper_bound\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Temps après fin de la boucle {time()-starttime} --------------\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mdebug\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"DEBUG EXIT !!!\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m            \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0mdico\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"controle\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mcontrole\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"copules\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mcopules\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
-      "\u001b[0;34m\u001b[0m    \u001b[0;32mreturn\u001b[0m \u001b[0mdico\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mFile:\u001b[0m      ~/leximpact/leximpact-prepare-data/leximpact_prepare_data/scenario_tools/calib_and_copules.py\n",
-      "\u001b[0;31mType:\u001b[0m      function"
-     ]
-    }
-   ],
-   "source": [
-    "??get_copulas"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "abandoned-league",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "get_borders frontieres de base [390, 780, 1170, 1560, 1950, 2340, 2730, 3120, 3510]\n",
-      "get_borders frontieres avant [390, 780, 1170, 1560, 1950, 2340, 2730, 3120, 3510, 3861, 3900]\n",
-      "get_borders len(borders) avant 11\n",
-      "get_borders frontieres apres [390, 780, 1170, 1560, 1950, 2340, 2730, 3120, 3510, 3861, 3900]\n",
-      "get_borders frontieres avant fin [390, 780, 1170, 1560, 1950, 2340, 2730, 3120, 3510, 3861, 3900]\n"
-     ]
-    }
-   ],
-   "source": [
-    "tranche_rfr_small_test = get_primary_buckets(test_raw, 10, debug=True)\n",
-    "cop = get_copulas(test_raw, 10, \"rev_salaire\", 10, tranche_rfr_small_test)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "leximpact-prepare-data-kernel",
-   "language": "python",
-   "name": "leximpact-prepare-data-kernel"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/code_CASD/extractions_base_des_impots/_test_spark.ipynb b/notebooks/code_CASD/extractions_base_des_impots/_test_spark.ipynb
deleted file mode 100644
index b4bdeb02b2797bebced491fa4c94d444cbce31cb..0000000000000000000000000000000000000000
--- a/notebooks/code_CASD/extractions_base_des_impots/_test_spark.ipynb
+++ /dev/null
@@ -1,156 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "4b1c23b1",
-   "metadata": {},
-   "source": [
-    "# CASD : Test de l'utilisation de Spark"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7e463584",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import findspark"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7c8ec284",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "findspark.init()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c74420d4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pyspark.sql import SparkSession\n",
-    "\n",
-    "spark = SparkSession.builder.appName(\"Basics\").getOrCreate()\n",
-    "print(spark.version)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c187137f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "calib = r\"C:\\Users\\Public\\Documents\\TRAVAIL\\csg\\data_out\\CalibPOTE_2019.csv\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "25e34ee1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = spark.read.csv(parquet_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cb0f594b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "hdf_pote = (\n",
-    "    r\"C:\\Users\\Public\\Documents\\TRAVAIL\\csg\\data_in\\extraction_assiettes_csg\\*.hdf\"\n",
-    ")\n",
-    "hdf_pote"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "91655d9d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "parquet_path = r\"C:\\Users\\Public\\Documents\\TRAVAIL\\csg\\data_in\\assiettes_csg.parquet\"\n",
-    "parquet_path"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "08ae5ffd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# !dir {parquet_path}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "32f3e09b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = spark.read.parquet(parquet_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "99b1a4ae",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.createOrReplaceTempView(\"csg\")\n",
-    "df_count = spark.sql(\"SELECT count(*) FROM csg\")\n",
-    "df_count.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "328c545c",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "11ff65d3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# df = spark.read.format(\"com.github.saurfang.sas.spark\").load(r\"\\\\casd.fr\\casdfs\\Projets\\LEXIMPA\\Data\\POTE_POTE_2019\\pote_diff_2019.sas7bdat\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c2a09032",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "leximpa",
-   "language": "python",
-   "name": "leximpa"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/code_CASD/extractions_base_des_impots/_test_vispy.ipynb b/notebooks/code_CASD/extractions_base_des_impots/_test_vispy.ipynb
deleted file mode 100644
index 90ca532817095999af5b2c26f43323a21a66d6e7..0000000000000000000000000000000000000000
--- a/notebooks/code_CASD/extractions_base_des_impots/_test_vispy.ipynb
+++ /dev/null
@@ -1,148 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "e21abfd4",
-   "metadata": {},
-   "source": [
-    "# CASD : Test de la librairie Vispy\n",
-    "[Vispy](https://vispy.org/) permet d'afficher des graphiques contenant des milliers de points."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f741a0b6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import dask.dataframe as dd\n",
-    "import vispy.app\n",
-    "from vispy import plot as vp"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d1d2aee6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "vispy.app.use_app(\"ipynb_webgl\")\n",
-    "fig = vp.Fig(show=False)\n",
-    "fig1 = fig[0, 0]\n",
-    "fig1.plot(range(10), marker_size=0)\n",
-    "fig.show(run=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "22d66dc7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "input_directory = (\n",
-    "    r\"C:\\Users\\Public\\Documents\\TRAVAIL\\csg\\data_in\\extraction_assiettes_csg\\*.hdf\"\n",
-    ")\n",
-    "rfrs = dd.read_hdf(input_directory, \"/pote2019\")\n",
-    "rfrs.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1c06484c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# sample = rfrs\n",
-    "sample = rfrs[rfrs[\"revkire\"] > 500_000]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b6574e72",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%time\n",
-    "sample = sample.set_index(\"revkire\").compute()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9b99d525",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(sample)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "84b63a38",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# sample.index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "08376150",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "vispy.app.use_app(\"ipynb_webgl\")\n",
-    "from vispy import plot as vp\n",
-    "\n",
-    "fig = vp.Fig(show=False)\n",
-    "fig1 = fig[0, 0]\n",
-    "fig1.plot((range(len(sample)), sample.index), marker_size=10)\n",
-    "fig.show(run=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6a1b3260",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(f\"{sample.index.max():,}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dfe2d117",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(f\"{sample.index.min():,}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3da2e5ff",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/code_CASD/extractions_base_des_impots/fake_pote_light.parquet b/notebooks/code_CASD/extractions_base_des_impots/fake_pote_light.parquet
deleted file mode 100644
index db7a9ae46ea8c920110a479b75b8089a8e03e93a..0000000000000000000000000000000000000000
Binary files a/notebooks/code_CASD/extractions_base_des_impots/fake_pote_light.parquet and /dev/null differ
diff --git a/notebooks/code_CASD/extractions_dads_postes/20_Convert_SAS_DADS.ipynb b/notebooks/code_CASD/extractions_dads_postes/20_Convert_SAS_DADS.ipynb
index bf46accb5c3c1f3d1e8a1537ef281f774c57ede7..f4ea103a711b3a7a869ba42a2a66710f1e328b4b 100644
--- a/notebooks/code_CASD/extractions_dads_postes/20_Convert_SAS_DADS.ipynb
+++ b/notebooks/code_CASD/extractions_dads_postes/20_Convert_SAS_DADS.ipynb
@@ -28,11 +28,11 @@
     "# year = \"2018\"\n",
     "SAS_FILE = (\n",
     "    r\"C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\sas/\"\n",
-    "    #+ \"extrait_dads_2020.sas7bdat\"\n",
+    "    # + \"extrait_dads_2020.sas7bdat\"\n",
     "    + \"extrait_dads_2020_220809.sas7bdat\"\n",
     ")\n",
     "\n",
-    "OUT_PATH = r\"C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\data\\chunks\\\"\n",
+    "OUT_PATH = \"\"  # r\"C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\data\\chunks\\\"\n",
     "OUT_PATH = OUT_PATH + \"extrait_dads_\" + year + r\"-chunk/\"\n",
     "taille_chunk = 2 * 2**20"
    ]
@@ -43,12 +43,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import shutil\n",
-    "from pathlib import Path\n",
-    "\n",
-    "import pandas as pd\n",
-    "import vaex\n",
-    "from tqdm import tqdm"
+    "import pandas as pd"
    ]
   },
   {
@@ -115,31 +110,31 @@
     }
    ],
    "source": [
-    "%%time\n",
-    "# Temps sur CASD : < 20 minutes.\n",
+    "# %%time\n",
+    "# # Temps sur CASD : < 20 minutes.\n",
     "\n",
-    "# Efface le dossier de sortie\n",
-    "shutil.rmtree(OUT_PATH, ignore_errors=True)\n",
-    "Path(OUT_PATH).mkdir(parents=True, exist_ok=True)\n",
+    "# # Efface le dossier de sortie\n",
+    "# shutil.rmtree(OUT_PATH, ignore_errors=True)\n",
+    "# Path(OUT_PATH).mkdir(parents=True, exist_ok=True)\n",
     "\n",
-    "dfi = pd.read_sas(\n",
-    "    SAS_FILE, chunksize=taille_chunk, encoding=\"iso8859-15\", iterator=True\n",
-    ")\n",
+    "# dfi = pd.read_sas(\n",
+    "#     SAS_FILE, chunksize=taille_chunk, encoding=\"iso8859-15\", iterator=True\n",
+    "# )\n",
     "\n",
-    "dd_values = None\n",
-    "i = 0\n",
-    "print(f\"Nombre d'itérations : {61_689_822/taille_chunk:.0f}\")\n",
-    "for chunk in tqdm(dfi):\n",
-    "    del dd_values\n",
-    "    dd_values = None\n",
-    "    chunk = clean_chunk(chunk)\n",
-    "    dd_values = vaex.from_pandas(chunk, copy_index=False)\n",
-    "    dd_values.export(f\"{OUT_PATH}{year}_{i}.parquet\")\n",
-    "    #### DEBUG\n",
-    "    i += 1\n",
-    "    # if i>=2:\n",
-    "    #     break\n",
-    "    #### DEBUG"
+    "# dd_values = None\n",
+    "# i = 0\n",
+    "# print(f\"Nombre d'itérations : {61_689_822/taille_chunk:.0f}\")\n",
+    "# for chunk in tqdm(dfi):\n",
+    "#     del dd_values\n",
+    "#     dd_values = None\n",
+    "#     chunk = clean_chunk(chunk)\n",
+    "#     dd_values = vaex.from_pandas(chunk, copy_index=False)\n",
+    "#     dd_values.export(f\"{OUT_PATH}{year}_{i}.parquet\")\n",
+    "#     #### DEBUG\n",
+    "#     i += 1\n",
+    "#     # if i>=2:\n",
+    "#     #     break\n",
+    "#     #### DEBUG"
    ]
   },
   {
@@ -180,7 +175,7 @@
     }
    ],
    "source": [
-    "chunk.info()"
+    "# chunk.info()"
    ]
   },
   {
@@ -190,7 +185,7 @@
    "outputs": [],
    "source": [
     "pd.set_option(\"display.max_columns\", None)\n",
-    "chunk"
+    "# chunk"
    ]
   },
   {
@@ -378,7 +373,7 @@
     }
    ],
    "source": [
-    "chunk.describe()"
+    "# chunk.describe()"
    ]
   },
   {
@@ -387,7 +382,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "chunk.query(\"s_brut < 0\")"
+    "# chunk.query(\"s_brut < 0\")"
    ]
   }
  ],
diff --git a/notebooks/code_CASD/extractions_dads_postes/30_DADS-Quantiles.ipynb b/notebooks/code_CASD/extractions_dads_postes/30_DADS-Quantiles.ipynb
index fb9cb285ea3e382ed1cc5b3854968a2ac697f1fc..20c178b54331050a3e8982d25f1d29456c1c1899 100644
--- a/notebooks/code_CASD/extractions_dads_postes/30_DADS-Quantiles.ipynb
+++ b/notebooks/code_CASD/extractions_dads_postes/30_DADS-Quantiles.ipynb
@@ -84,7 +84,16 @@
     "import vaex\n",
     "from tqdm import tqdm\n",
     "\n",
-    "from leximpact_prepare_data.scenario_tools.calib_and_copules import *"
+    "from leximpact_prepare_data.scenario_tools.calib_and_copules import (\n",
+    "    DistribDeVarVaex,\n",
+    "    variable_to_split_on,\n",
+    "    DatasetNotSorted,\n",
+    "    get_borders,\n",
+    "    get_copulas,\n",
+    "    get_primary_buckets,\n",
+    "    Quantile,\n",
+    "    keep_upper_bound_secret,\n",
+    ")"
    ]
   },
   {
@@ -96,7 +105,7 @@
    "source": [
     "%%time\n",
     "dfv = vaex.open(ARROW_PATH + \"*\")\n",
-    "tc.assertEqual(len(dfv), 44_653_064)"
+    "tqdm.assertEqual(len(dfv), 44_653_064)"
    ]
   },
   {
diff --git a/notebooks/code_CASD/extractions_dads_postes/30a_Agregats_POTE-PER.ipynb b/notebooks/code_CASD/extractions_dads_postes/30a_Agregats_POTE-PER.ipynb
index a19e7d7a77e87dfd2960ff294048528af48dc597..a0c9b644f11f6de33b937445d8cdea72407597e2 100644
--- a/notebooks/code_CASD/extractions_dads_postes/30a_Agregats_POTE-PER.ipynb
+++ b/notebooks/code_CASD/extractions_dads_postes/30a_Agregats_POTE-PER.ipynb
@@ -73,7 +73,12 @@
     "import vaex\n",
     "from tqdm import tqdm\n",
     "\n",
-    "from leximpact_prepare_data.scenario_tools.calib_and_copules import *"
+    "from leximpact_prepare_data.scenario_tools.calib_and_copules import (\n",
+    "    get_copulas,\n",
+    "    anonimyze_lower_and_upper_bound,\n",
+    "    get_primary_buckets,\n",
+    "    Quantile,\n",
+    ")"
    ]
   },
   {
@@ -404,7 +409,6 @@
    "source": [
     "# Libère la mémoire\n",
     "del df_agg\n",
-    "import gc\n",
     "\n",
     "gc.collect()"
    ]
diff --git a/notebooks/code_CASD/extractions_dads_postes/40_categorical_bi-variate.ipynb b/notebooks/code_CASD/extractions_dads_postes/40_categorical_bi-variate.ipynb
index 71e39dcd5a28aa5b29d2765f4c17e88ea969b0c4..75896d737ac1bc044dea9da0026ab1e65b05aaad 100644
--- a/notebooks/code_CASD/extractions_dads_postes/40_categorical_bi-variate.ipynb
+++ b/notebooks/code_CASD/extractions_dads_postes/40_categorical_bi-variate.ipynb
@@ -72,7 +72,7 @@
     "import vaex\n",
     "from tqdm import tqdm\n",
     "\n",
-    "from leximpact_prepare_data.scenario_tools.calib_and_copules import *"
+    "from leximpact_prepare_data.scenario_tools.calib_and_copules import Quantile"
    ]
   },
   {
@@ -93,7 +93,7 @@
    "source": [
     "%%time\n",
     "dfv = vaex.open(ARROW_PATH + \"*\")\n",
-    "tc.assertEqual(len(dfv), 61_689_822)"
+    "tqdm.assertEqual(len(dfv), 61_689_822)"
    ]
   },
   {
diff --git a/notebooks/memos/memo_aah.ipynb b/notebooks/memos/memo_aah.ipynb
index 65699e8cbfdc8d1d314ab74ffc9096e54759931a..dbf41a7e131a2bdb23d496f07be22f546d9b5f28 100644
--- a/notebooks/memos/memo_aah.ipynb
+++ b/notebooks/memos/memo_aah.ipynb
@@ -121,7 +121,6 @@
    ],
    "source": [
     "# | echo: false\n",
-    "from IPython.display import HTML\n",
     "from pandas import DataFrame\n",
     "\n",
     "import plotly.io as pio\n",
@@ -155,7 +154,7 @@
     "    columns=colonnes,\n",
     ").fillna(\"-\")\n",
     "\n",
-    "display(HTML(df.to_html(escape=False)))"
+    "# display(HTML(df.to_html(escape=False)))"
    ]
   },
   {
diff --git a/notebooks/memos/memo_cotisations_sociales_dash.py b/notebooks/memos/memo_cotisations_sociales_dash.py
index f490f91fcf0c93de4367b3b711c855bc0f367040..8d9e47494e82d285fc6738f78705c5a7b804decc 100644
--- a/notebooks/memos/memo_cotisations_sociales_dash.py
+++ b/notebooks/memos/memo_cotisations_sociales_dash.py
@@ -7,7 +7,7 @@
 
 http://localhost:5000/leximpact_prepare_data/memos/memo_cotisations_sociales_dash/
 
-Cette dataviz en Dash est incluse dans le mémo, mais elle doit être 
+Cette dataviz en Dash est incluse dans le mémo, mais elle doit être
 exécutée à part pour être partagée : les mémos sont des fichiers HTML
  alors que Dash doit lancer un serveur, il faut donc le déployer côté
    serveur en complément du HTML.
diff --git a/poetry.lock b/poetry.lock
index 39ffc8fccf90cf54c5e8f3dbd759ae5983d3c0bd..92d950f792d00b82a55f4f141b40d5fabf5b5b2b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -695,7 +695,7 @@ files = [
     {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"},
     {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"},
 ]
-markers = {main = "implementation_name == \"pypy\" or platform_python_implementation == \"PyPy\""}
+markers = {main = "platform_python_implementation == \"PyPy\" or implementation_name == \"pypy\""}
 
 [package.dependencies]
 pycparser = "*"
@@ -4212,7 +4212,7 @@ files = [
     {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
     {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
 ]
-markers = {main = "implementation_name == \"pypy\" or platform_python_implementation == \"PyPy\""}
+markers = {main = "platform_python_implementation == \"PyPy\" or implementation_name == \"pypy\""}
 
 [[package]]
 name = "pydocstyle"