diff --git a/notebooks/extractions_dads_postes/20_Convert_SAS_DADS.ipynb b/notebooks/extractions_dads_postes/20_Convert_SAS_DADS.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..202121b75595086dcf92b734a86f489c1df42e2f
--- /dev/null
+++ b/notebooks/extractions_dads_postes/20_Convert_SAS_DADS.ipynb
@@ -0,0 +1,250 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# CASD : Conversion de l'extraction SAS en Apache Parquet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.core.interactiveshell import InteractiveShell\n",
+    "\n",
+    "InteractiveShell.ast_node_interactivity = \"all\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "year = \"2020\"\n",
+    "# year = \"2018\"\n",
+    "SAS_FILE = (\n",
+    "    r\"C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\sas/\"\n",
+    "    + \"extrait_dads_2020.sas7bdat\"\n",
+    ")\n",
+    "\n",
+    "OUT_PATH = r\"C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\data\\chunks\"\n",
+    "OUT_PATH = OUT_PATH + \"extrait_dads_\" + year + r\"-chunk/\"\n",
+    "taille_chunk = 600_000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import shutil\n",
+    "from pathlib import Path\n",
+    "from time import time\n",
+    "\n",
+    "import pandas as pd\n",
+    "import vaex\n",
+    "from tqdm import tqdm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def clean_chunk(chunk):\n",
+    "    chunk.columns = [c.lower() for c in chunk.columns.to_list()]\n",
+    "    # Remplacement des NaN par 0 dans les colonnes de données discrètes : non catégorielles.\n",
+    "    for col in \"EFF_3112 S_BRUT PEPA NET\".lower().split(\" \"):\n",
+    "        chunk[col].fillna(0, inplace=True)\n",
+    "    return chunk"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Lecture du fichier SAS\n",
+    "\n",
+    "On va lire le fichier par morceau de 1 million de lignes, pour ne pas saturer la mémoire. Il y a 39 millions de lignes.\n",
+    "\n",
+    "On va les enregistrer au fur et à mesure en format Apache Arrow."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Nombre d'itérations : 103\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "103it [18:05, 10.54s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: total: 17min 41s\n",
+      "Wall time: 18min 5s\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "# Temps sur CASD : < 20 minutes.\n",
+    "\n",
+    "# Efface le dossier de sortie\n",
+    "shutil.rmtree(OUT_PATH, ignore_errors=True)\n",
+    "Path(OUT_PATH).mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "dfi = pd.read_sas(\n",
+    "    SAS_FILE, chunksize=taille_chunk, encoding=\"iso8859-15\", iterator=True\n",
+    ")\n",
+    "\n",
+    "dd_values = None\n",
+    "i = 0\n",
+    "print(f\"Nombre d'itérations : {61_689_822/taille_chunk:.0f}\")\n",
+    "for chunk in tqdm(dfi):\n",
+    "    del dd_values\n",
+    "    dd_values = None\n",
+    "    chunk = clean_chunk(chunk)\n",
+    "    dd_values = vaex.from_pandas(chunk, copy_index=False)\n",
+    "    dd_values.export(f\"{OUT_PATH}{year}_{i}.parquet\")\n",
+    "    #### DEBUG\n",
+    "    i += 1\n",
+    "#     if i>=2:\n",
+    "#         break\n",
+    "    #### DEBUG"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 489822 entries, 61200000 to 61689821\n",
+      "Data columns (total 10 columns):\n",
+      " #   Column           Non-Null Count   Dtype  \n",
+      "---  ------           --------------   -----  \n",
+      " 0   a17              283843 non-null  object \n",
+      " 1   a88              283843 non-null  object \n",
+      " 2   contrat_travail  283842 non-null  object \n",
+      " 3   cris             283747 non-null  object \n",
+      " 4   eff_3112         489822 non-null  float64\n",
+      " 5   motifcdd         121886 non-null  object \n",
+      " 6   net              489822 non-null  float64\n",
+      " 7   pepa             489822 non-null  float64\n",
+      " 8   s_brut           489822 non-null  float64\n",
+      " 9   treffect         283845 non-null  object \n",
+      "dtypes: float64(4), object(6)\n",
+      "memory usage: 37.4+ MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "chunk.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.set_option(\"display.max_columns\", None)\n",
+    "chunk"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# chunk.query(\"Z1cj > 0 and revkire < 10000\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# chunk.query(\"impot != impotnet\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# chunk[chunk['srbg'] < 0]['rbg'].min()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#dd_values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "leximpact-prepare-data-kernel",
+   "language": "python",
+   "name": "leximpact-prepare-data-kernel"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/extractions_dads_postes/30_DADS-Quantiles.ipynb b/notebooks/extractions_dads_postes/30_DADS-Quantiles.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..25b55c60b10ee2924319c54c59f5ffb28969075b
--- /dev/null
+++ b/notebooks/extractions_dads_postes/30_DADS-Quantiles.ipynb
@@ -0,0 +1,820 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "284168c2-c515-442a-8943-638ab2487933",
+   "metadata": {},
+   "source": [
+    "# CASD : Extraction d'agrégats\n",
+    "\n",
+    "TODO :\n",
+    "- Keep Secret des quantiles => Il faut la version 0.0.17\n",
+    "- Copule PEPA\n",
+    "- Count des variables catégorielles discrètes\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "3b1b7645-f0a7-4929-a667-3ffa31e1b4db",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.core.interactiveshell import InteractiveShell\n",
+    "\n",
+    "InteractiveShell.ast_node_interactivity = \"all\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e888519d-ca9f-404a-b188-49de0bf72e31",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "year = \"2020\"\n",
+    "# year = \"2018\"\n",
+    "# C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\data\\chunks\\extrait_dads_2020-chunk\n",
+    "OUT_PATH = r\"C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\data\\DADS/\"\n",
+    "ARROW_PATH = OUT_PATH + \"/../chunks/extrait_dads_\" + year + r\"-chunk/\"\n",
+    "taille_chunk = 2 * 2**20  # 2**20 = 1_048_576\n",
+    "# taille_chunk = 5000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "5e63307c-d42d-43d6-9ce3-8ea904a338eb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'0.0.15'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import leximpact_prepare_data\n",
+    "\n",
+    "leximpact_prepare_data.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d5c9f1f6-e464-48d1-a933-421ad58a270a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import shutil\n",
+    "from pathlib import Path\n",
+    "from time import time\n",
+    "\n",
+    "import pandas as pd\n",
+    "import vaex\n",
+    "import gc\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "from leximpact_prepare_data.calib_and_copules import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "805afac3-cab5-4795-8c93-84cd0e99d45f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: total: 438 ms\n",
+      "Wall time: 474 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "dfv = vaex.open(ARROW_PATH + \"*\")\n",
+    "tc.assertEqual(len(dfv), 61_689_822 )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "2e8a14fa-6b7a-4a3a-95d2-c559dbd2b20e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# dfv.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "7d55425f-af65-4f69-9b84-270020a8122e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['a17',\n",
+       " 'a88',\n",
+       " 'contrat_travail',\n",
+       " 'cris',\n",
+       " 'eff_3112',\n",
+       " 'motifcdd',\n",
+       " 'net',\n",
+       " 'pepa',\n",
+       " 's_brut',\n",
+       " 'treffect']"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dfv.get_column_names()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "04e671d2-ade2-48ac-95df-3344228bf13b",
+   "metadata": {},
+   "source": [
+    "## Variables catégorielles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "a16822e7-c562-4a48-87dd-849cf8ac75da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "categorical = [\n",
+    " 'A17',\n",
+    " 'A88',\n",
+    " 'CONTRAT_TRAVAIL',\n",
+    " 'CRIS',\n",
+    " 'MOTIFCDD',\n",
+    " 'TREFFECT'\n",
+    "]\n",
+    "categorical = [c.lower() for c in categorical]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2aed1cf7-8a89-4cca-b4bf-052274850116",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "import numpy as np\n",
+    "\n",
+    "# Temps d'éxécution : 11s\n",
+    "\n",
+    "for col in tqdm(categorical):\n",
+    "    df_col = dfv.groupby(\n",
+    "        by=col,\n",
+    "        sort=True,\n",
+    "        agg={\n",
+    "            \"count\": vaex.agg.count(col),\n",
+    "        },\n",
+    "    )\n",
+    "    df = df_col.to_pandas_df()\n",
+    "    df.loc[df[\"count\"] <= 12, \"count\"] = np.nan\n",
+    "    df.to_csv(f\"{OUT_PATH}count_{year}_{col}.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0653c281-5687-4542-8ff1-bd70dace2410",
+   "metadata": {},
+   "source": [
+    "## Variables continues"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "7985d253-e7d1-49b1-8d96-372b2002cef0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "continuous_variables = [\n",
+    " 'EFF_3112',\n",
+    " 'NET',\n",
+    " 'PEPA',\n",
+    " 'S_BRUT',\n",
+    "]\n",
+    "continuous_variables = [c.lower() for c in continuous_variables]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "55cd1255-bc37-4cb1-9cd2-7fb55b104a6a",
+   "metadata": {},
+   "source": [
+    "### Calcul d'agregats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "ed98c3b0-76e3-46e3-8992-290a5c64b77a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_agg(vdf, columns):\n",
+    "    sub_total = []\n",
+    "    vdf.fillna(column_names=columns, value=0, inplace=True)\n",
+    "    # vdf.fillnan(column_names=columns, value=0, inplace=True)\n",
+    "    ldf = vdf.shape[0]\n",
+    "    columns = columns if columns else vdf.get_column_names()\n",
+    "    for col in tqdm(columns):\n",
+    "        # print(col)\n",
+    "        name = f\"{col}_non_zero\"\n",
+    "        vdf.select(f\"{col} != 0\", name=name)\n",
+    "        nb_no_zero = int(vdf.count(\"*\", selection=name))\n",
+    "        lenzero = ldf - nb_no_zero\n",
+    "        dict_col = {\n",
+    "            \"name\": col,\n",
+    "            \"nb_line\": ldf,\n",
+    "            \"lenzero\": lenzero,\n",
+    "            \"pct_zero\": lenzero / ldf * 100,\n",
+    "            \"sum\": int(vdf.sum(col)),\n",
+    "            \"mean\": float(vdf.mean(col, selection=name)) if nb_no_zero > 0 else 0.0,\n",
+    "            \"variance\": float(vdf.var(col, selection=name)) if nb_no_zero > 0 else 0.0,\n",
+    "            \"std_dev\": float(vdf.std(col, selection=name)) if nb_no_zero > 0 else 0.0,\n",
+    "        }\n",
+    "        sub_total.append(dict_col)\n",
+    "    return pd.DataFrame(sub_total)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "13f5536a-75a9-4df7-8db8-0833ad8c8ac5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|████████████████████████████████████████████| 4/4 [00:36<00:00,  9.14s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: total: 57.5 s\n",
+      "Wall time: 36.6 s\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "# Temps sur CASD : 30s par colonne\n",
+    "df_agg = compute_agg(\n",
+    "    dfv, continuous_variables\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "1e50b232-be30-4194-a4cf-b987cb3aeda3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>nb_line</th>\n",
+       "      <th>lenzero</th>\n",
+       "      <th>pct_zero</th>\n",
+       "      <th>sum</th>\n",
+       "      <th>mean</th>\n",
+       "      <th>variance</th>\n",
+       "      <th>std_dev</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>EFF_3112</td>\n",
+       "      <td>61689822</td>\n",
+       "      <td>21478958</td>\n",
+       "      <td>34.817668950317284</td>\n",
+       "      <td>183166377917</td>\n",
+       "      <td>4,555.146537438241</td>\n",
+       "      <td>371,755,244.70450455</td>\n",
+       "      <td>19,280.955492519155</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>NET</td>\n",
+       "      <td>61689822</td>\n",
+       "      <td>29780878</td>\n",
+       "      <td>48.27518873372661</td>\n",
+       "      <td>490547776913</td>\n",
+       "      <td>15,373.36293277926</td>\n",
+       "      <td>557,799,386.3548901</td>\n",
+       "      <td>23,617.77691390317</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>PEPA</td>\n",
+       "      <td>61689822</td>\n",
+       "      <td>56327418</td>\n",
+       "      <td>91.30747370287435</td>\n",
+       "      <td>3173478173</td>\n",
+       "      <td>591.8013960933506</td>\n",
+       "      <td>176,410.5382527109</td>\n",
+       "      <td>420.01254535158006</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>S_BRUT</td>\n",
+       "      <td>61689822</td>\n",
+       "      <td>17053834</td>\n",
+       "      <td>27.644485665722947</td>\n",
+       "      <td>818402937097</td>\n",
+       "      <td>18,335.04698265482</td>\n",
+       "      <td>729,497,116.5502262</td>\n",
+       "      <td>27,009.204293179235</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       name   nb_line   lenzero           pct_zero           sum  \\\n",
+       "0  EFF_3112  61689822  21478958 34.817668950317284  183166377917   \n",
+       "1       NET  61689822  29780878  48.27518873372661  490547776913   \n",
+       "2      PEPA  61689822  56327418  91.30747370287435    3173478173   \n",
+       "3    S_BRUT  61689822  17053834 27.644485665722947  818402937097   \n",
+       "\n",
+       "                mean             variance             std_dev  \n",
+       "0 4,555.146537438241 371,755,244.70450455 19,280.955492519155  \n",
+       "1 15,373.36293277926  557,799,386.3548901  23,617.77691390317  \n",
+       "2  591.8013960933506   176,410.5382527109  420.01254535158006  \n",
+       "3 18,335.04698265482  729,497,116.5502262 27,009.204293179235  "
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.set_option(\"display.float_format\", \"{:,}\".format)\n",
+    "# Export dans un fichier\n",
+    "df_agg.to_csv(OUT_PATH + \"/agregats_DADS_\" + year + \".csv\", index=False)\n",
+    "df_agg"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7f1e01ef-7303-4bfd-9d60-a28f861c56bd",
+   "metadata": {},
+   "source": [
+    "### Calcul des quantiles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "da0ed4e7-0836-43c1-a99e-c33a5236f7d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_quantile(vdf, columns=None, quantiles=10):\n",
+    "    sub_total = []\n",
+    "    vdf.fillna(column_names=columns, value=0, inplace=True)\n",
+    "    # vdf.fillnan(column_names=columns, value=0, inplace=True)\n",
+    "    ldf = vdf.shape[0]\n",
+    "    columns = columns if columns else vdf.get_column_names()\n",
+    "    for col in tqdm(columns):\n",
+    "        try:\n",
+    "            # print(col)\n",
+    "            q = Quantile(vdf[col].tolist())\n",
+    "            for quantile in quantiles:\n",
+    "                q_dict = q.get_quantile(quantile)\n",
+    "                #keep_upper_bound_secret(q_dict)\n",
+    "                with open(f\"{OUT_PATH}/quantile_DADS_{quantile}_{year}_{col}.json\", \"w\") as f:\n",
+    "                    f.write(json.dumps(q_dict))\n",
+    "            del q\n",
+    "            gc.collect()\n",
+    "        except Exception as e:\n",
+    "            print(f\"ERROR processing {col} {e.__class__.__name__} : {e}\")\n",
+    "            continue"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e79ad721-692c-4f5f-830e-8940e0f30be2",
+   "metadata": {},
+   "source": [
+    "#### Déciles et centiles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5960386d-494f-4305-ac33-d98381478b54",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "# Temps sur CASD : 5 minutes par colonne\n",
+    "compute_quantile(\n",
+    "    dfv, continuous_variables, quantiles=[10,100]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f751954f-2f4f-4ae7-b4e7-018352450aef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "afc626b2-41a2-4d0f-91b2-0b5d83fe6106",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3390e234-222d-43dc-98c0-3d3d187c7f37",
+   "metadata": {},
+   "source": [
+    "### Calcul de calibration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "57084940-2b4b-4004-a14c-ecdd31ab24d7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "calib_base_variable = \"s_brut\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "d8aa5ab7-12d0-4c42-a291-a144f9c56de5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: total: 15.6 s\n",
+      "Wall time: 18.6 s\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "# dfv[\"CONTRAT_TRAVAIL\"] = dfv[\"CONTRAT_TRAVAIL\"].fillna(0)\n",
+    "# dfv[\"CONTRAT_TRAVAIL\"] = dfv[\"CONTRAT_TRAVAIL\"].astype('int')\n",
+    "# dfv = dfv.sort(\"CONTRAT_TRAVAIL\")\n",
+    "\n",
+    "##dfv[\"S_BRUT\"] = dfv[\"S_BRUT\"].astype('int')\n",
+    "dfv = dfv.sort(calib_base_variable)\n",
+    "# tc.assertEqual(dfv[\"S_BRUT\"].count(), 39264696)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "501e3978-4328-4a2b-b8d4-0c2c1d275573",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: total: 20.7 s\n",
+      "Wall time: 21 s\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "# < 1 minute\n",
+    "une_tranche_rfr = get_primary_buckets(dfv, 1, variable_to_split_on=calib_base_variable)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "c669f3c0-fc59-4cf9-b0f7-12f4f9aab6eb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'borders_values': [0, 1000000000000000], 'borders': [61689822]}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "une_tranche_rfr"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "36e93ee0-d296-454c-b384-831aa2a3785d",
+   "metadata": {},
+   "source": [
+    "#### On sort les distributions de chaque variable continue"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "e104fae0-e832-4664-9a6a-00bf75ff6a5c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|███████████████████████████████████████████| 4/4 [17:12<00:00, 258.23s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: total: 17min 2s\n",
+      "Wall time: 17min 12s\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "# Temps sur CASD : 30 minutes pour 4 colonnes\n",
+    "nb_bucket_var = 100\n",
+    "\n",
+    "for variable in tqdm(continuous_variables):\n",
+    "    try:\n",
+    "        calib = get_copulas(dfv, calib_base_variable, variable, nb_bucket_var, une_tranche_rfr)\n",
+    "        calib = calib[\"copules\"][0][\"buckets\"]\n",
+    "        if type(calib) is str:\n",
+    "            print(f\"ERROR {variable} calib is '{calib}'\")\n",
+    "            continue\n",
+    "        keep_upper_bound_secret(calib)\n",
+    "        with open(\n",
+    "            f\"{OUT_PATH}Distrib_DADS-{nb_bucket_var}-{year}-{variable}.json\", \"w\"\n",
+    "        ) as f:\n",
+    "            f.write(json.dumps(calib))\n",
+    "    except Exception as e:\n",
+    "        print(f\"ERROR processing {variable}\", e)\n",
+    "        raise e"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "112972db-7b3a-4de2-b5ed-02ae91ecaaa8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from IPython.display import JSON\n",
+    "\n",
+    "# JSON(calib)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "be197441-db93-43fc-9544-782eb6748ecd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %%time\n",
+    "# # Temps sur CASD : 138s par iteration\n",
+    "# nb_bucket_var = 100\n",
+    "\n",
+    "# for variable in tqdm(continuous_variables):\n",
+    "#     #calib = get_calib(dfv, variable, nb_bucket_var)\n",
+    "#     # print(variable)\n",
+    "#     calib = compute_copule_vaex(dfv, variable, nb_bucket_var, une_tranche_rfr)\n",
+    "#     calib[\"copules\"][0][\"buckets\"][-1][\"seuil_var_supp\"] = \"secret\"\n",
+    "#     with open(f\"{OUT_PATH}CalibPote-{nb_bucket_var}-{year}-{variable}.json\", \"w\") as f:\n",
+    "#         f.write(json.dumps(calib[\"copules\"][0][\"buckets\"]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "7526cee8-ec5e-40f8-848f-be82dce7972d",
+   "metadata": {
+    "jupyter": {
+     "source_hidden": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# %%time\n",
+    "# # Temps sur CASD : 538s par iteration !\n",
+    "# nb_bucket_var = 1000\n",
+    "\n",
+    "# for variable in tqdm(continuous_variables):\n",
+    "#     #calib = get_calib(dfv, variable, nb_bucket_var)\n",
+    "#     # print(variable)\n",
+    "#     calib = compute_copule_vaex(dfv, variable, nb_bucket_var, une_tranche_rfr)\n",
+    "#     calib[\"copules\"][0][\"buckets\"][-1][\"seuil_var_supp\"] = \"secret\"\n",
+    "#     with open(f\"{OUT_PATH}CalibPote-{nb_bucket_var}-{year}-{variable}.json\", \"w\") as f:\n",
+    "#         f.write(json.dumps(calib[\"copules\"][0][\"buckets\"]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "98f7a2a0-d29b-44ce-97c0-61f68ca9dc67",
+   "metadata": {},
+   "source": [
+    "#### Extraction de Copules\n",
+    "\n",
+    "On ne sort des copules que pour:\n",
+    "- PEPA par salaire brut\n",
+    "- PEPA par type de contrat de travail\n",
+    "- PEPA par secteur d'activité"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "da73e5f7-a38b-4fc5-9166-936723b1375a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# copules_frf = get_copules(dfv, 10, \""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "3d9cab3d-41cc-4d67-9fcc-c110fbbfc706",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "a2555b60-b565-404f-9748-e9fe3f198d1f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|                                                    | 0/1 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ERROR processing pepa DistribDeVar : WARNING !!!!, moins de 12 éléments => On retourne une liste vide. !!!!!!!!!!\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|████████████████████████████████████████████| 1/1 [00:56<00:00, 56.44s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: total: 1min 17s\n",
+      "Wall time: 1min 17s\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "centile_rfr = get_primary_buckets(dfv, 10, variable_to_split_on=calib_base_variable)\n",
+    "\n",
+    "nb_bucket_var = 10\n",
+    "\n",
+    "for variable in tqdm( [\n",
+    " 'pepa',\n",
+    "]):\n",
+    "    try:\n",
+    "        copule = get_copulas(dfv, calib_base_variable, variable, nb_bucket_var, centile_rfr)\n",
+    "        # copule[\"copules\"][0][\"buckets\"][-1][\"upper_bound\"] = \"secret\"\n",
+    "        keep_upper_bound_secret(copule[\"copules\"])\n",
+    "        with open(\n",
+    "            f\"{OUT_PATH}CopuleDADS_{calib_base_variable}-{nb_bucket_var}-{year}-{variable}.json\", \"w\"\n",
+    "        ) as f:\n",
+    "            f.write(json.dumps(copule))\n",
+    "    except Exception as e:\n",
+    "        print(f\"ERROR processing {variable}\", e)\n",
+    "        # raise e"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c402fc11-1eee-4fec-84e5-a5a8e1153db8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "del dfv\n",
+    "gc.collect()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "leximpact-prepare-data-kernel",
+   "language": "python",
+   "name": "leximpact-prepare-data-kernel"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/extractions_dads_postes/40_categorical_bi-variate.ipynb b/notebooks/extractions_dads_postes/40_categorical_bi-variate.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..cf4d63eafe321b499b826354578529b89d1efdf8
--- /dev/null
+++ b/notebooks/extractions_dads_postes/40_categorical_bi-variate.ipynb
@@ -0,0 +1,213 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "8e189d9b-84a8-42a9-bb05-583fde8c66bf",
+   "metadata": {},
+   "source": [
+    "# Distrib bi-variée de catégorie\n",
+    "\n",
+    "Comme les copules ne fonctionnent pas avec des catégories, nous avons sorti des centiles pour chaque catégorie."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "c7417c68-57fc-4b6e-b43c-4fdfd678716b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.core.interactiveshell import InteractiveShell\n",
+    "\n",
+    "InteractiveShell.ast_node_interactivity = \"all\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "71d0d8c8-26ef-46ac-a97c-defe097cdd3f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "year = \"2020\"\n",
+    "# year = \"2018\"\n",
+    "# C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\data\\chunks\\extrait_dads_2020-chunk\n",
+    "OUT_PATH = r\"C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\data\\DADS/\"\n",
+    "ARROW_PATH = OUT_PATH + \"/../chunks/extrait_dads_\" + year + r\"-chunk/\"\n",
+    "taille_chunk = 2 * 2**20  # 2**20 = 1_048_576\n",
+    "# taille_chunk = 5000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "37a530d4-41cd-4004-aebe-284e93eb2946",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'0.0.15'"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import leximpact_prepare_data\n",
+    "\n",
+    "leximpact_prepare_data.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "bac5ed21-92fd-4826-9b65-67f65141be05",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import shutil\n",
+    "from pathlib import Path\n",
+    "from time import time\n",
+    "\n",
+    "import pandas as pd\n",
+    "import vaex\n",
+    "import gc\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "from leximpact_prepare_data.calib_and_copules import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "8ba466b8-df04-4189-89b0-a0d70d2b929c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: total: 328 ms\n",
+      "Wall time: 339 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "dfv = vaex.open(ARROW_PATH + \"*\")\n",
+    "tc.assertEqual(len(dfv), 61_689_822 )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "20d0c74b-7c87-41d7-b04a-8d536cb6730a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# dfv.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "ec44f102-96f2-4116-9795-75f4f705441b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['a17',\n",
+       " 'a88',\n",
+       " 'contrat_travail',\n",
+       " 'cris',\n",
+       " 'eff_3112',\n",
+       " 'motifcdd',\n",
+       " 'net',\n",
+       " 'pepa',\n",
+       " 's_brut',\n",
+       " 'treffect']"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dfv.get_column_names()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "f6f107be-2315-4bce-b327-d58fd59eed52",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "categorical = [\n",
+    " 'A17',\n",
+    " 'CONTRAT_TRAVAIL',\n",
+    " 'TREFFECT'\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "faefe272-577b-4ff9-b5c8-20ed1eb0fe2b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for col_maj in tqdm(['CONTRAT_TRAVAIL','TREFFECT' ]):\n",
+    "    df = pd.read_csv(r\"C:\\Users\\Public\\Documents\\TRAVAIL\\agregats\\data\\DADS\\count_2020_\" + col_maj + \".csv\", dtype={col_maj:str})\n",
+    "    col = col_maj.lower()\n",
+    "    secondary = \"pepa\"\n",
+    "    nb_quantile = 100\n",
+    "    for cat in df[col_maj].tolist():\n",
+    "        # a == a is False if NaN\n",
+    "        if cat == cat:\n",
+    "            vdf_cat = dfv[dfv[col] == str(cat)]\n",
+    "            #print(cat, vdf_cat.count())\n",
+    "            q = Quantile(vdf_cat[secondary].tolist())\n",
+    "            q_dict = q.get_quantile(nb_quantile)\n",
+    "            #keep_upper_bound_secret(q_dict)\n",
+    "            with open(f\"{OUT_PATH}/quantile_DADS_{col}_{cat}_{nb_quantile}_{year}_{col}.json\", \"w\") as f:\n",
+    "                f.write(json.dumps(q_dict))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "623109a0-c58a-4f85-9660-b64149e9edbd",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "leximpact-prepare-data-kernel",
+   "language": "python",
+   "name": "leximpact-prepare-data-kernel"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/extractions_dads_postes/extraction_pepa_macron.sas b/notebooks/extractions_dads_postes/extraction_pepa_macron.sas
new file mode 100644
index 0000000000000000000000000000000000000000..642a85be278ac6fd9187726fc54634862330bf27
--- /dev/null
+++ b/notebooks/extractions_dads_postes/extraction_pepa_macron.sas
@@ -0,0 +1,5 @@
+libname dads_src "\\casd.fr\casdfs\Projets\LEXIMPA\Data\DADS_DADS Postes_2020";
+libname outputs "C:\Users\Public\Documents\TRAVAIL\agregats\sas";
+data outputs.extrait_dads_2020;
+set dads_src.post (keep=A17 A88 CRIS TREFFECT EFF_3112 CONTRAT_TRAVAIL MOTIFCDD S_BRUT PEPA NET);
+run;
diff --git a/notebooks/extractions_dads_postes/recherche_agricole.sas b/notebooks/extractions_dads_postes/recherche_agricole.sas
new file mode 100644
index 0000000000000000000000000000000000000000..8caa6bda0040c52c3e6c7096a61aa3c42b37a3c9
--- /dev/null
+++ b/notebooks/extractions_dads_postes/recherche_agricole.sas
@@ -0,0 +1,25 @@
+/* exploration des donn�es du secteur agricole dans DADS Postes 2019 (1/12�me) */
+
+/* ELIGIBLES TO=DE : */
+/* A88 = '01' (culture, �levage et chasse)*/
+/* CONTRAT_TRAVAIL = '02' => cdd */
+/* MOTIFCDD IN ('03', '04', '05') => emplois � caract�re saisonnier, contrat vendanges et contrat d'usage */
+/* CONTRAT_TRAVAIL = '93' => contrat aid� (plusieurs sortes dont contrat d'insertion) */
+/* CONTRAT_TRAVAIL = '95' => travail occasionnel (saisonnier, occasionnel) */
+/* ! LIMITES : pr�cision sur contrat d'insertion (CDDI ou CDD CIE) et cas CDI demandeur d'emploi groupement d'employeurs tous en activit�s �ligibles */
+
+proc sql;
+SELECT count(A88) AS NB_TRAVAILLEURS_OCCASIONNELS FROM Dad12e19.Post12
+WHERE A88 = '01' AND CONTRAT_TRAVAIL = '95';
+
+/* SELECT * FROM Dad12e19.Post12 */
+SELECT CONTRAT_TRAVAIL, MOTIFCDD, BRUT_F, BRUT_S, NET FROM Dad12e19.Post12
+WHERE ( 
+	A88 = '01' 
+	AND (
+		(CONTRAT_TRAVAIL = '02' AND MOTIFCDD IN ('03', '04', '05'))
+		OR CONTRAT_TRAVAIL = '93'
+		OR CONTRAT_TRAVAIL = '95'
+	);
+quit;
+	
diff --git a/notebooks/extractions_dads_postes/recherche_pepa_macron.sas b/notebooks/extractions_dads_postes/recherche_pepa_macron.sas
new file mode 100644
index 0000000000000000000000000000000000000000..74de13e1cae618a97bd18a4d1e0c2140809d89bd
--- /dev/null
+++ b/notebooks/extractions_dads_postes/recherche_pepa_macron.sas
@@ -0,0 +1,6 @@
+/* exploration des donn�es DADS Postes 2019 (1/12�me) concernant la PEPA dite Prime Macron */
+
+proc sql;
+SELECT count(PEPA) AS NB_PEPA FROM Dad12e19.Post12 WHERE PEPA > 0;
+SELECT sum(PEPA) AS SOMME_PEPA FROM Dad12e19.Post12;
+quit;