--- title: Augmentation artificielle du nombre de foyers dans la base ERFS-FPR keywords: fastai sidebar: home_sidebar nb_path: "notebooks/retraitement_erfs-fpr/modules/enlarge_fake.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
from leximpact_socio_fisca_simu_etat.config import Configuration

from leximpact_prepare_data.toolbase import *

config = Configuration(project_folder="leximpact-prepare-data")
{% endraw %} {% raw %}
erfs_r = pd.read_hdf(
    config.get("DATA_OUT") + "01_erfs_reduced_" + config.get("YEAR_ERFS") + ".h5"
)

# erfs_r = erfs_r.sample(n=1000, random_state=1)
erfs_r

print("Nombre initial d'individus' : ", len(erfs_r))
{% endraw %} {% raw %}

enlarge[source]

enlarge(erfs_r, N)

{% endraw %} {% raw %}
{% endraw %} {% raw %}
erfs_enlarged = enlarge(erfs_r, 3)
erfs_enlarged.sort_values(by="age")
{% endraw %} {% raw %}
outfile_path = (
    config.get("DATA_OUT") + "02_erfs_enlarged_" + config.get("YEAR_ERFS") + "_dev.h5"
)
erfs_enlarged.to_hdf(outfile_path, key="input", mode="w")
{% endraw %} {% raw %}
var_list = [
    "age",
    "chomage_brut",
    "heures_remunerees_volume",
    "pensions_alimentaires_percues",
    "retraite_brute",
    "salaire_de_base",
    "taux_csg_remplacement",
    "f4ba",
]

for var in var_list:
    avant = (erfs_r[var] * erfs_r["wprm"]).sum()
    apres = (erfs_enlarged[var] * erfs_enlarged["wprm"]).sum()
    print("Variation du total de ", var, "de : ", 100 * (avant - apres) / avant, "%")
{% endraw %}