--- title: INVESTIGATIONS SUR LES CHIFFRES DU CHOMAGE keywords: fastai sidebar: home_sidebar nb_path: "notebooks/retraitement_erfs-fpr/investigation_chiffres_chomage.ipynb" ---
import gc
import json
import unittest
import numpy as np
import pandas as pd
from openfisca_france import FranceTaxBenefitSystem
TBS = FranceTaxBenefitSystem()
from leximpact_socio_fisca_simu_etat.config import Configuration
from leximpact_socio_fisca_simu_etat.logger import logger as log
from matplotlib import pyplot as plt
tc = unittest.TestCase()
config = Configuration(project_folder="leximpact-prepare-data")
from leximpact_prepare_data.aging_tools import (
bruitage,
calib_initiale_ff,
inflation_economique,
inflation_foyers,
)
# from leximpact_prepare_data.calib_and_copules import reduce_bucket_number
from leximpact_prepare_data.calibration_tools import (
ajout_gens_en_haut,
calibration,
calibration_quantiles,
compare_distributions,
distrib_to_quantiles,
)
# from leximpact_prepare_data.copules_add_var import (
# convert_to_openfisca,
# get_ratios,
# integration_data_ff,
# )
from leximpact_prepare_data.enlargement import enlarge
from leximpact_prepare_data.monte_carlo import *
from leximpact_prepare_data.reduce_data import remove_useless_variables
# Import des modules de calcul spécifiques
from leximpact_prepare_data.toolbase import (
compute_var_in_ff,
create_simulation,
foyers_fiscaux_to_individus,
individus_to_foyers_fiscaux,
)
log.debug(config)
annee_erfs = config.get("YEAR_ERFS")
annee_pote = config.get("YEAR_POTE")
annee_de_calcul = config.get("YEAR_COMPUTATION")
wanted = {}
erfs_to_cal_ind = pd.read_hdf(
config.get("DATA_OUT")
+ "07_erfs_salaire_to_cal_ind"
+ config.get("YEAR_ERFS")
+ "_inflated_to_"
+ config.get("YEAR_POTE")
+ ".h5"
)
# erfs_to_cal_ind.columns
pipeline_tracker = pd.DataFrame(
columns=[
"init",
"enlarge",
"post_calib_RFR",
"post_calib",
"infl_eco19",
"infl_ff19",
"infl_eco21",
"infl_ff21",
"final",
]
)
# Mise en forme
pd.set_option("display.max_colwidth", 80)
pd.options.display.float_format = "{:,.7f}".format
# Données d'intérêt
data = ["f4ba", "chomage_brut", "salaire_de_base", "retraite_brute"]
# Initialisation
erfs = erfs_to_cal_ind.copy()
# Taille de la base
pipeline_tracker.loc["Len_ind", "init"] = round(len(erfs["wprm"]))
erfsff = individus_to_foyers_fiscaux(erfs)
pipeline_tracker.loc["Nb_foyers", "init"] = round((erfsff["wprm"]).sum())
pipeline_tracker.loc["Len_ff", "init"] = round(len(erfsff["wprm"]))
# Variables
for var in data:
pipeline_tracker.loc[var, "init"] = round((erfs["wprm"] * erfs[var]).sum())
del erfsff
del erfs
pipeline_tracker
erfs_to_cal_ff = individus_to_foyers_fiscaux(erfs_to_cal_ind)
# erfs_to_cal_ff.head()
len(erfs_to_cal_ff[erfs_to_cal_ff["chomage_brut"] >= 93738])
erfs_to_cal_ff[erfs_to_cal_ff["chomage_brut"] >= 93738][
[
"wprm",
"chomage_brut",
"salaire_de_base",
"revenus_capitaux_prelevement_bareme",
"revenus_capitaux_prelevement_liberatoire",
"revenus_capitaux_prelevement_forfaitaire_unique_ir",
"salaire_imposable",
]
]
erfs_save = erfs_to_cal_ind.copy(deep=True)
erfs_to_cal_ind = erfs_save.copy(deep=True)
# ils touchent des indemnités de la fonction publique
len_init = len(erfs_to_cal_ind)
erfs_to_cal_ind.set_index("idfoy", drop=False, inplace=True)
idfoy_max_chom_ind = []
idfoy_max_chom_ind = list(
erfs_to_cal_ind[erfs_to_cal_ind["chomage_brut"] >= 93_738]["idfoy"]
)
print(idfoy_max_chom_ind)
print(
"On supprime ",
len(idfoy_max_chom_ind),
" individus au-dessus du seuil max de chômage : ",
93738,
"€",
)
# Si on veut sortir ces gens de la calibration du chomage
for idxi in idfoy_max_chom_ind:
erfs_to_cal_ind.at[idxi, "chomage_brut"] = 0
erfs_to_cal_ind = erfs_to_cal_ind.drop(labels=idxi, axis="index")
erfs_to_cal_ind = erfs_to_cal_ind.reset_index(drop=True)
assert len(erfs_to_cal_ind) < len_init
erfs_to_cal_ff = individus_to_foyers_fiscaux(erfs_to_cal_ind)
print(
"Désormais, le chomage maximum est de: ",
erfs_to_cal_ff["chomage_brut"].max(),
"€ dans notre base",
)
%%capture
var_name = "chomage_brut"
# Calibration
erfs_cal_ff, erfs_cal_ind, Distribs_chom, plot_chom = calibration(
erfs_to_cal_ind, var_name, annee_erfs, annee_pote
)
# Sauvegarde de l'agrégat de référence
wanted[var_name] = Distribs_chom[2].df["sum"].sum()
# A-t'on ajouté des gens dans la base?
if erfs_cal_ff["idfoy"].nunique() > erfs_to_cal_ff["idfoy"].nunique():
new_ppl = True
elif erfs_cal_ff["idfoy"].nunique() == erfs_to_cal_ff["idfoy"].nunique():
new_ppl = False
else:
erfs_cal_ff["idfoy"].nunique() < erfs_to_cal_ff["idfoy"].nunique()
raise Exception("Il y a eu une erreur dans la calibration de ", var_name)
# On repasse en individus
erfs_to_cal_ind = foyers_fiscaux_to_individus(
erfs_to_cal_ind, erfs_cal_ff, [var_name], [var_name]
)
# On repasse en foyers fiscaux
erfs_to_cal_ff = individus_to_foyers_fiscaux(erfs_to_cal_ind)
print("On a calibré ", var_name)
plt.figure().clear()
plot_chom
plt.show()
var_list = [
"salaire_imposable",
"retraite_brute",
"chomage_brut",
]
# erfs_cal_ind.head()
for var in var_list:
pipeline_tracker.loc[var, "post_calib"] = round(
(erfs_to_cal_ind["wprm"] * erfs_to_cal_ind[var]).sum()
)
# Taille de la base
pipeline_tracker.loc["Len_ind", "post_calib"] = round(len(erfs_to_cal_ind["wprm"]))
erfsff = individus_to_foyers_fiscaux(erfs_to_cal_ind)
pipeline_tracker.loc["Nb_foyers", "post_calib"] = round((erfsff["wprm"]).sum())
pipeline_tracker.loc["Len_ff", "post_calib"] = round(len(erfsff["wprm"]))
del erfsff
pipeline_tracker
erfs_to_cal_ind.drop(columns=["primes_etc"], inplace=True)
saving_ff = erfs_to_cal_ff.copy()
saving_ind = erfs_to_cal_ind.copy()
erfs_to_cal_ff = saving_ff.copy()
erfs_to_cal_ind = saving_ind.copy()
len(erfs_to_cal_ind.index)
len(erfs_to_cal_ind)
erfs_to_cal_ind["idfoy"].nunique()
erfs_to_cal_ind["idfam"].nunique()
erfs_to_cal_ind["idmen"].nunique()
# Sources : https://fr.statista.com/statistiques/505942/repartition-depenses-assurance-chomage-par-poste-france/
print("Dans POTE, on a un total de ", f"{wanted['chomage_brut']:,}")
print(
"Alors que dans l'ERFS, on a un total de ",
f"{(erfs_to_cal_ff['chomage_brut']*erfs_to_cal_ff['wprm']).sum():,}",
)
print(
"Soit ",
100
* (
(erfs_to_cal_ff["chomage_brut"] * erfs_to_cal_ff["wprm"]).sum()
- wanted["chomage_brut"]
)
/ (wanted["chomage_brut"]),
"% d'écart",
)
Distrib_ERFS = Distribs_chom[0]
Distrib_POTE = Distribs_chom[1]
Distrib_CAL = Distribs_chom[2]
df_erfs = Distrib_ERFS.df
df_cal = Distrib_CAL.df
df_pote = Distrib_POTE.df
erfs_to_cal_ind.isnull().values.any()
erfs_to_cal_ind.isnull().sum().sum()
len(erfs_to_cal_ff[erfs_to_cal_ff["chomage_brut"] >= 93738])
# erfs_to_cal_ff[erfs_to_cal_ff['chomage_brut']>= 93738][['wprm','chomage_brut', 'salaire_de_base', 'revenus_capitaux_prelevement_bareme','revenus_capitaux_prelevement_liberatoire','revenus_capitaux_prelevement_forfaitaire_unique_ir','salaire_imposable']]
erfs_to_cal_ind.head()
erfs_to_cal_ind.columns
# Pour tests
# erfs_to_cal_ind.drop(columns=["f4ba"], inplace=True)
# erfs_to_cal_ind['activite',] = 1.0
# erfs_to_cal_ind['activite'] = 3.0
erfs_ind = erfs_to_cal_ind.copy(deep=True)
# erfs_ind = erfs_ind.sort_values(by=["chomage_brut"])
cols_declarant_principal = [
"pensions_alimentaires_percues",
"retraite_brute",
"rag",
"ric",
"rnc",
"salaire_de_base",
"f4ba",
"rfr",
"revenus_capitaux_prelevement_bareme",
"revenus_capitaux_prelevement_liberatoire",
"revenus_capitaux_prelevement_forfaitaire_unique_ir",
"salaire_imposable",
"chomage_brut",
]
erfs_ind.drop(["rfr"], axis=1, inplace=True)
my_simu, _ = create_simulation(data=erfs_ind, tbs=TBS, period=annee_de_calcul)
erfs_ind = compute_var_in_ff(
my_simu, annee_de_calcul, erfs_ind, ["rfr"], cols_declarant_principal
)
erfs_ind = compute_var_in_ff(
my_simu, annee_de_calcul, erfs_ind, ["nbptr"], cols_declarant_principal
)
erfs_ind
print(len(erfs_ind))
print(
"Somme du Chômage post calibration : "
+ f'{ (erfs_ind["wprm"] * erfs_ind["chomage_brut"]).sum() :,}'
+ " €"
)
to_calc = my_simu.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
print(len(to_calc))
erfs_ind["csg_imposable_chomage"] = to_calc
# my_simu.tracer.print_computation_log()
print(
"Total de CSG imposable sur le chomage en 2021 : ",
f'{(erfs_ind["csg_imposable_chomage"]*erfs_ind["wprm"]).sum():,}',
)
to_calc = my_simu.calculate_add("csg_deductible_chomage", period=annee_de_calcul)
erfs_ind["csg_deductible_chomage"] = to_calc
print(
"Total de CSG deductible sur le chomage en 2021 : ",
f'{(erfs_ind["csg_deductible_chomage"]*erfs_ind["wprm"]).sum():,}',
)
sum_csg_chom = (erfs_ind["csg_deductible_chomage"] * erfs_ind["wprm"]).sum() + (
erfs_ind["csg_imposable_chomage"] * erfs_ind["wprm"]
).sum()
print(
"Total de CSG Chomage calculé : ",
f"{sum_csg_chom:,}",
"€ et attendu : ",
f"{(1_037_000_000):,}",
)
erfs_ind.head()
# tc.assertLessEqual(-sum_csg_chom, 1.2 * 1_037_000_000)
# tc.assertGreaterEqual(-sum_csg_chom, 0.8 * 1_037_000_000)
erfs_ind.tail()
erfs_ind.isnull().values.any()
erfs_ind.isnull().sum().sum()
print("Soit ", 100 * (sum_csg_chom - 1_037_000_000) / (1_037_000_000), "% d'écart")
base = erfs_ind # [['fake_id','chomage_brut', 'csg_deductible_chomage', 'csg_imposable_chomage']]
base = base.sort_values(by="chomage_brut")
base[
["fake_id", "chomage_brut", "csg_deductible_chomage", "csg_imposable_chomage"]
].loc[[223125, 177174, 289306]]
base[-10:]
base.isnull().values.any()
base.isnull().sum().sum()
4 * 41136
base["taux_imp"] = round(
(base["csg_imposable_chomage"] / base["chomage_brut"]), 3
) # expected 0.024%
base["taux_ded"] = round(
(base["csg_deductible_chomage"] / base["chomage_brut"]), 3
) # expected 0.038%
base[-10:]
# A-t'on bien un meme taux?
base["taux_ded"].nunique()
print("taux_ded", base["taux_ded"].unique())
base["taux_imp"].nunique()
print("taux_imp", base["taux_imp"].unique())
# NB: les taux nans correspondent aux divisons /0 (les gens non concernés)
len(erfs_ind)
erfs_ind = erfs_ind[erfs_ind["chomage_brut"] > 0]
len(erfs_ind)
# erfs_ind.sort_values(by=['chomage_brut'], inplace=True)
# erfs_ind = erfs_ind.loc[[4, 5695, 65040]]
erfs_ind
erfs_ind.columns
erfs_calc = erfs_ind[
[
"activite",
"age",
"categorie_salarie",
"contrat_de_travail",
"date_naissance",
"effectif_entreprise",
"heures_remunerees_volume",
"idfam",
"idfoy",
"idmen",
"noindiv",
"quifam",
"quifoy",
"quimen",
"statut_marital",
"idmen_original",
"idfoy_original",
"idfam_original",
"idmen_x",
"wprm",
"zone_apl",
"fake_id",
"quimenof",
"quifoyof",
"quifamof",
"chomage_brut",
"rfr",
"nbptr",
]
]
# seuil_exoneration = seuils.seuil_rfr1 + (nbptr - 1) * seuils.demi_part_suppl_rfr1
# seuil_reduction = seuils.seuil_rfr2 + (nbptr - 1) * seuils.demi_part_suppl_rfr2
def calc_taux(rfr, seuil_exoneration, seuil_reduction):
# Exoneration
if rfr < seuil_exoneration:
taux_imp = 0
taux_ded = 0
# Taux reduit
elif rfr < seuil_reduction:
taux_imp = 0
taux_ded = 0.038
# Taux plein
else:
taux_imp = 0.024
taux_ded = 0.038
return taux_imp, taux_ded
# En 2018
# seuil_exoneration = seuils.seuil_rfr1 + (nbptr - 1) * seuils.demi_part_suppl_rfr1
# seuil_reduction = seuils.seuil_rfr2 + (nbptr - 1) * seuils.demi_part_suppl_rfr2
# ie:
# seuil_exoneration = 11018.0 + (nbptr - 1) * 2942.0
# seuil_reduction = 14404.0 + (nbptr - 1) * 3846.0
erfs_calc["seuil_exoneration"] = erfs_calc["nbptr"].apply(
lambda row: 11018.0 + (row - 1) * 2942.0
)
erfs_calc["seuil_reduction"] = erfs_calc["nbptr"].apply(
lambda row: 14404.0 + (row - 1) * 3846.0
)
erfs_calc.head()
def func1(cols):
return calc_taux(cols["rfr"], cols["seuil_exoneration"], cols["seuil_reduction"])[0]
def func2(cols):
return calc_taux(cols["rfr"], cols["seuil_exoneration"], cols["seuil_reduction"])[1]
erfs_calc["taux_imp"] = erfs_calc.apply(func1, axis=1)
erfs_calc["taux_ded"] = erfs_calc.apply(func2, axis=1)
erfs_calc
# 4 * PSS = 4 * 41136
erfs_calc["base"] = erfs_calc["chomage_brut"].apply(
lambda row: 0.9825 * row if row < 4 * 41136 else row
)
erfs_calc.head()
erfs_calc["base"].sum()
erfs_calc["montant_csg_imp"] = erfs_calc["base"] * erfs_calc["taux_imp"]
erfs_calc["montant_csg_ded"] = erfs_calc["base"] * erfs_calc["taux_ded"]
erfs_calc["cho_seuil_exo"] = (
12 * 1.0 * 9.88 * 151.67
) # erfs_calc["heures_remunerees_volume"].apply(lambda row :
erfs_calc["cho_seuil_exo"].sum() #!! ANNUEL
def func3(cols):
return max(
cols["base"] * cols["taux_imp"]
- max(
cols["cho_seuil_exo"]
- (cols["chomage_brut"] - cols["base"] * cols["taux_imp"]),
0,
),
0,
)
def func4(cols):
return max(
cols["base"] * cols["taux_ded"]
- max(
cols["cho_seuil_exo"]
- (cols["chomage_brut"] - cols["base"] * cols["taux_ded"]),
0,
),
0,
)
erfs_calc["csg_imp"] = erfs_calc.apply(func3, axis=1)
erfs_calc["csg_ded"] = erfs_calc.apply(func4, axis=1)
erfs_ff = individus_to_foyers_fiscaux(erfs_calc)
erfs_ff.columns
sum_imp = (erfs_ff["csg_imp"] * erfs_ff["wprm"]).sum()
sum_imp
sum_ded = (erfs_ff["csg_ded"] * erfs_ff["wprm"]).sum()
sum_ded
print("Soit le total de CSG chomage:", f"{ sum_imp + sum_ded:,}")
erfs_ff[
[
"idfoy",
"wprm",
"heures_remunerees_volume",
"chomage_brut",
"rfr",
"nbptr",
"taux_imp",
"taux_ded",
"seuil_exoneration",
"seuil_reduction",
"base",
"cho_seuil_exo",
"csg_imp",
"csg_ded",
]
]
erfs_ind = erfs_calc.copy()
erfs_ind.columns
print(len(erfs_ind))
print(
"Somme du Chômage post calibration : "
+ f'{ (erfs_ind["wprm"] * erfs_ind["chomage_brut"]).sum() :,}'
+ " €"
)
annee_de_calcul = 2018
# SImulation
erfs_min = erfs_ind[
[
"activite",
"age",
"categorie_salarie",
"contrat_de_travail",
"date_naissance",
"effectif_entreprise",
"heures_remunerees_volume",
"idfam",
"idfoy",
"idmen",
"noindiv",
"quifam",
"quifoy",
"quimen",
"statut_marital",
"idmen_original",
"idfoy_original",
"idfam_original",
"idmen_x",
"wprm",
"zone_apl",
"fake_id",
"quimenof",
"quifoyof",
"quifamof",
"chomage_brut",
"rfr",
"nbptr",
]
]
my_simu, _ = create_simulation(data=erfs_min, tbs=TBS, period=annee_de_calcul)
to_calc = my_simu.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
my_simu.trace = True
print(len(to_calc))
erfs_ind["csg_imposable_chomage"] = to_calc
print(
"Total de CSG imposable sur le chomage en 2021 : ",
f'{(erfs_ind["csg_imposable_chomage"]*erfs_ind["wprm"]).sum():,}',
)
to_calc = my_simu.calculate_add("csg_deductible_chomage", period=annee_de_calcul)
erfs_ind["csg_deductible_chomage"] = to_calc
print(
"Total de CSG deductible sur le chomage en 2021 : ",
f'{(erfs_ind["csg_deductible_chomage"]*erfs_ind["wprm"]).sum():,}',
)
sum_csg_chom = (erfs_ind["csg_deductible_chomage"] * erfs_ind["wprm"]).sum() + (
erfs_ind["csg_imposable_chomage"] * erfs_ind["wprm"]
).sum()
erfs_ind.head()
# print calculation steps
my_simu.tracer.print_computation_log()
# tc.assertLessEqual(-sum_csg_chom, 1.2 * 1_037_000_000)
# tc.assertGreaterEqual(-sum_csg_chom, 0.8 * 1_037_000_000)
print(
"Total de CSG Chomage calculé : ",
f"{sum_csg_chom:,}",
"€ et attendu : ",
f"{(1_037_000_000):,}",
)
erfs_ind.columns
erfs_ind[
[
"idfoy",
"quifoy",
"wprm",
"chomage_brut",
"rfr",
"nbptr",
"seuil_exoneration",
"seuil_reduction",
"taux_imp",
"taux_ded",
"base",
"cho_seuil_exo",
"csg_imp",
"csg_ded",
"montant_csg_imp",
"csg_imposable_chomage",
"csg_deductible_chomage",
]
]
erfs_ind["diff_imp"] = (
abs(-erfs_ind["csg_imposable_chomage"] - erfs_ind["csg_imp"]) / erfs_ind["csg_imp"]
)
erfs_ind["diff_imp"].sort_values()
erfs_ind["diff_imp"].max()
erfs_payants = erfs_ind[erfs_ind["rfr"] > erfs_ind["cho_seuil_exo"]]
print(
"Sur ",
len(erfs_ind),
" personnes touchant du chômage, soit un total de ",
f'{(erfs_ind["chomage_brut"]*erfs_ind["wprm"]).sum():,}',
"€, ",
len(erfs_payants),
" personnes ne sont pas exonérées, soit une masse imposable de ",
f'{(erfs_payants["chomage_brut"]*erfs_payants["wprm"]).sum():,}',
)
erfs_red = erfs_payants[erfs_payants["rfr"] < erfs_payants["seuil_reduction"]]
erfs_plein = erfs_payants[erfs_payants["rfr"] > erfs_payants["seuil_reduction"]]
print("Total taux reduit :", (erfs_red["chomage_brut"] * erfs_red["wprm"]).sum())
print("Total taux plein :", (erfs_plein["chomage_brut"] * erfs_plein["wprm"]).sum())
imp_red = (erfs_red["chomage_brut"] * erfs_red["wprm"]).sum() * 0
imp_plein = (erfs_plein["chomage_brut"] * erfs_plein["wprm"]).sum() * 0.024
ded_red = (erfs_red["chomage_brut"] * erfs_red["wprm"]).sum() * 0.038
ded_plein = (erfs_plein["chomage_brut"] * erfs_plein["wprm"]).sum() * 0.038
print(
"Soit le total de CSG chomage: ",
f"{(imp_red + imp_plein + ded_plein + ded_red) :,}",
)
print(
"Attention cela ne prend pas en compte l'exoneration faite si chomage_brut - csg_due < seuil_exo"
)
def exo_ind_pour_max(cols):
if (cols["chomage_brut"] - cols["montant_csg_imp"]) < cols["cho_seuil_exo"]:
return 0
else:
return 1
def exo_ded_pour_max(cols):
if (cols["chomage_brut"] - cols["montant_csg_ded"]) < cols["cho_seuil_exo"]:
return 0
else:
return 1
erfs_payants["exo_ded_pour_max"] = erfs_payants.apply(exo_ded_pour_max, axis=1)
erfs_payants["exo_imp_pour_max"] = erfs_payants.apply(exo_ind_pour_max, axis=1)
erfs_payants
erfs_red = erfs_payants[(erfs_payants["rfr"] < erfs_payants["seuil_reduction"])]
erfs_plein = erfs_payants[erfs_payants["rfr"] > erfs_payants["seuil_reduction"]]
print(
"Total taux reduit :",
f'{(erfs_red["chomage_brut"]*erfs_red["wprm"]).sum() :,}',
"€",
)
print(
"Total taux plein :",
f'{(erfs_plein["chomage_brut"]*erfs_plein["wprm"]).sum() :,}',
"€",
)
imp_red = (
erfs_red["chomage_brut"] * erfs_red["wprm"] * erfs_payants["exo_imp_pour_max"]
).sum() * 0
imp_plein = (
erfs_plein["chomage_brut"] * erfs_plein["wprm"] * erfs_payants["exo_imp_pour_max"]
).sum() * 0.024
ded_red = (
erfs_red["chomage_brut"] * erfs_red["wprm"] * erfs_payants["exo_ded_pour_max"]
).sum() * 0.038
ded_plein = (
erfs_plein["chomage_brut"] * erfs_plein["wprm"] * erfs_payants["exo_ded_pour_max"]
).sum() * 0.038
print(
"Soit le total de CSG chomage: ",
f"{(imp_red + imp_plein + ded_plein + ded_red) :,}",
)
1 / 0
erfs_to_cal_ind = saving_ind.copy()
sample = erfs_to_cal_ind.loc[[223125, 177174, 289306]]
sample
erfs_to_cal_ind[erfs_to_cal_ind["idfoy"] == 113256]
erfs_to_cal_ind[erfs_to_cal_ind["idfoy"] == 89943]
erfs_to_cal_ind[erfs_to_cal_ind["idfoy"] == 146878]
sample = erfs_to_cal_ind[erfs_to_cal_ind["idfoy"].isin([113256, 89943, 146878])]
# sample['activite']=4
sample
# to_calc
# to_calc
simulation, _ = create_simulation(data=sample, tbs=TBS, period=annee_de_calcul)
# activate the trace
# simulation.trace = True
to_calc = simulation.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
sample["csg_imposable_chomage"] = to_calc
# my_simu.tracer.print_computation_log()
to_calc = simulation.calculate_add("csg_deductible_chomage", period=annee_de_calcul)
sample["csg_deductible_chomage"] = to_calc
x = sample[
[
"rfr",
"fake_id",
"chomage_brut",
"csg_deductible_chomage",
"csg_imposable_chomage",
]
]
print(x)
# print calculation steps
# simulation.tracer.print_computation_log()
sample1 = sample[
[
"activite",
"age",
"categorie_salarie",
"contrat_de_travail",
"date_naissance",
"effectif_entreprise",
"heures_remunerees_volume",
"idfam",
"idfoy",
"idmen",
"noindiv",
"quifam",
"quifoy",
"quimen",
"chomage_brut",
"csg_imposable_chomage",
"csg_deductible_chomage",
]
]
sample2 = sample[
[
"statut_marital",
"idmen_original",
"idfoy_original",
"idfam_original",
"idmen_x",
"wprm",
"zone_apl",
"fake_id",
"quimenof",
"quifoyof",
"quifamof",
"pensions_alimentaires_percues",
"retraite_brute",
"rag",
"ric",
"rnc",
"chomage_brut",
"csg_imposable_chomage",
"csg_deductible_chomage",
]
]
sample3 = sample[
[
"salaire_de_base",
"f4ba",
"rfr",
"revenus_capitaux_prelevement_bareme",
"revenus_capitaux_prelevement_liberatoire",
"revenus_capitaux_prelevement_forfaitaire_unique_ir",
"salaire_imposable",
"chomage_brut",
"csg_imposable_chomage",
"csg_deductible_chomage",
]
]
sample1
sample2
sample3
seuil = 11408 + 3046
seuil
seuil_reduc = 14914 + 3982
seuil_reduc
sample = erfs_to_cal_ind[erfs_to_cal_ind["idfoy"].isin([113256, 89943, 146878])]
sample.drop(["rfr"], axis=1, inplace=True)
simulation, _ = create_simulation(data=sample, tbs=TBS, period=annee_de_calcul)
# activate the trace
# simulation.trace = True
sample = compute_var_in_ff(
simulation,
annee_de_calcul,
sample,
["rfr"],
["rfr", "chomage_brut"],
)
to_calc = simulation.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
sample["csg_imposable_chomage"] = to_calc
# my_simu.tracer.print_computation_log()
to_calc = simulation.calculate_add("csg_deductible_chomage", period=annee_de_calcul)
sample["csg_deductible_chomage"] = to_calc
print(sample[["idfoy", "quifoy", "chomage_brut", "rfr", "csg_deductible_chomage"]])
# print calculation steps
# simulation.tracer.print_computation_log()
sample = erfs_to_cal_ind[erfs_to_cal_ind["idfoy"].isin([113256, 89943, 146878])]
sample1 = sample[["idfoy", "quifoy", "chomage_brut", "rfr"]]
sample1
simulation, _ = create_simulation(data=sample1, tbs=TBS, period=annee_de_calcul)
# activate the trace
# simulation.trace = True
to_calc = simulation.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
sample1["csg_imposable_chomage"] = to_calc
# my_simu.tracer.print_computation_log()
to_calc = simulation.calculate_add("csg_deductible_chomage", period=annee_de_calcul)
sample1["csg_deductible_chomage"] = to_calc
print(sample1)
# print calculation steps
# simulation.tracer.print_computation_log()
1 / 0
df_check = pd.DataFrame(
columns=[
"total_cal",
"total_pote",
"error_chomage",
"total_csg",
"total_csg_imp",
"total_csg_ded",
],
index=range(len(Distrib_CAL.bucket_list)),
)
for nb in Distrib_CAL.bucket_list:
bucket = Distrib_CAL.bucket_list[nb]
idx = int(nb)
sample_ff = bucket.sample
# sample_ff.set_index("idfoy", drop=False, inplace=True)
# Calcul du total de chomage
df_check.at[idx, "total_cal"] = Distrib_CAL.df["sum"][idx]
df_check["total_pote"].loc[idx] = Distrib_POTE.df["sum"][idx]
error = (
Distrib_CAL.df["sum"][idx] - Distrib_POTE.df["sum"][idx]
) / Distrib_POTE.df["sum"][idx]
if idx > 0:
df_check["error_chomage"].loc[idx] = 100 * error
df_check["error_chomage"].loc[0] = 0
# On repasse en individus
# sample = erfs_ind [erfs_ind['idfoy'] in list(sample_ff['idfoy'])]
lst_idfoy = list(sample_ff["idfoy"])
sample = erfs_ind[erfs_ind["idfoy"].isin(lst_idfoy)]
tc.assertGreaterEqual(len(sample), len(sample_ff))
# print(sample)
# Calcul du total de CSG chomage
my_simux, _ = create_simulation(data=sample, tbs=TBS, period=annee_de_calcul)
to_calcIMP = my_simux.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
sample.loc[:, "csg_imposable_chomage"] = to_calcIMP
to_calcDED = my_simux.calculate_add(
"csg_deductible_chomage", period=annee_de_calcul
)
sample.loc[:, "csg_deductible_chomage"] = to_calcDED
total_imp = (sample["csg_imposable_chomage"] * sample["wprm"]).sum()
total_ded = (sample["csg_deductible_chomage"] * sample["wprm"]).sum()
df_check.at[idx, "total_csg_imp"] = round(total_imp, 0)
df_check.at[idx, "total_csg_ded"] = round(total_ded, 0)
df_check.at[idx, "total_csg"] = round(total_imp + total_ded, 0)
del my_simux
# break
# df_check = df_check.astype(float).round(1) # Ne marche pas... wtf?
df_check
erfs_cal_ind = erfs_to_cal_ind.copy()
erfs_cal_ff = individus_to_foyers_fiscaux(erfs_cal_ind)
# On vérifie qu'on ne perd personne en route
tc.assertEqual(
len(erfs_cal_ff["idfoy"].unique()), len(erfs_to_cal_ind["idfoy"].unique())
)
# Année de départ: année de la base vieillie
year_start = int(config.get("YEAR_POTE"))
# Année de fin : année de production de la base pour calculs sur l'API
year_end = int(config.get("YEAR_COMPUTATION"))
print("On passe la base de ", year_start, " à ", year_end)
erfs_to_cal_ind.columns
cols_to_inflate = [
"chomage_brut",
"pensions_alimentaires_percues",
"rag",
"ric",
"rnc",
"salaire_de_base",
"f4ba",
"retraite_brute",
"rfr",
"revenus_capitaux_prelevement_bareme",
"revenus_capitaux_prelevement_liberatoire",
"revenus_capitaux_prelevement_forfaitaire_unique_ir",
"salaire_imposable",
]
erfs_inflated_ff = inflation_economique(
erfs_cal_ff, cols_to_inflate, year_start, year_end
)
# On ajoute les valeurs dans la base individus
cols_declarant_principal = cols_to_inflate
to_update = cols_to_inflate
erfs_inflated_ind = foyers_fiscaux_to_individus(
erfs_cal_ind, erfs_inflated_ff, to_update, cols_declarant_principal, new_ppl=False
)
for var in data:
pipeline_tracker.loc[var, "infl_eco21"] = round(
(erfs_inflated_ind["wprm"] * erfs_inflated_ind[var]).sum()
)
# Taille de la base
pipeline_tracker.loc["Len_ind", "infl_eco21"] = round(len(erfs_inflated_ind["wprm"]))
erfsff = individus_to_foyers_fiscaux(erfs_inflated_ind)
pipeline_tracker.loc["Nb_foyers", "infl_eco21"] = round((erfsff["wprm"]).sum())
pipeline_tracker.loc["Len_ff", "infl_eco21"] = round(len(erfsff["wprm"]))
del erfsff
pipeline_tracker
erfs_inflated_ff, erfs_inflate_foyers_ind = inflation_foyers(
erfs_inflated_ind, year_start, year_end
)
for var in data:
pipeline_tracker.loc[var, "infl_ff21"] = round(
(erfs_inflate_foyers_ind["wprm"] * erfs_inflate_foyers_ind[var]).sum()
)
# Taille de la base
pipeline_tracker.loc["Len_ind", "infl_ff21"] = round(
len(erfs_inflate_foyers_ind["wprm"])
)
erfsff = individus_to_foyers_fiscaux(erfs_inflate_foyers_ind)
pipeline_tracker.loc["Nb_foyers", "infl_ff21"] = round((erfsff["wprm"]).sum())
pipeline_tracker.loc["Len_ff", "infl_ff21"] = round(len(erfsff["wprm"]))
del erfsff
pipeline_tracker
On bruite la base par souci d'anonymité, en accord avec nos engagements auprès des fournisseurs de données (la DGFiP, pour POTE)
erfs_ind = bruitage(erfs_inflate_foyers_ind)
erfs_ind.columns
erfs_ind.columns
erfs_ind.describe()
for col in erfs_ind.columns:
if erfs_ind[col].max() == 0:
print("On supprime cette colonne qui est vide :", col)
erfs_ind = erfs_ind.drop(col, axis=1)
elif erfs_ind[col].max() == np.nan:
print("On supprime cette colonne qui est vide :", col)
erfs_ind = erfs_ind.drop(col, axis=1)
# Ajout des valeurs
for var in data:
pipeline_tracker.loc[var, "final"] = round((erfs_ind["wprm"] * erfs_ind[var]).sum())
# Taille de la base
pipeline_tracker.loc["Len_ind", "final"] = round(len(erfs_ind["wprm"]))
erfs_ff = individus_to_foyers_fiscaux(erfs_ind)
pipeline_tracker.loc["Nb_foyers", "final"] = round((erfs_ff["wprm"]).sum())
pipeline_tracker.loc["Len_ff", "final"] = round(len(erfs_ff["wprm"]))
pipeline_tracker
# NB: on prend toujours la base locale
N = 3
erfs_ind = pd.read_hdf(
config.get("DATA_OUT")
+ "erfs_final_ind_"
+ config.get("YEAR_ERFS")
+ "_aged_to_"
+ config.get("YEAR_COMPUTATION")
+ "_N_"
+ str(N)
+ ".h5"
)
nb_foyers = erfs_ind["wprm"].sum()
print("Total de foyers: ", nb_foyers, " agrégat POTE 2019 : 39_331_689 ")
tc.assertGreater(
abs(nb_foyers), 39_331_689
) # 39_331_689 est le Nb de foyers en 2019, la dernière valeur connue
sum_retraite = (erfs_ind["retraite_brute"] * erfs_ind["wprm"]).sum()
print("Total de RFR: ", sum_retraite, " agrégat POTE : ", wanted["retraite_brute"])
tc.assertGreater(abs(sum_retraite), 0.8 * wanted["retraite_brute"])
my_simu, _ = create_simulation(data=erfs_ind, tbs=TBS, period=annee_de_calcul)
print(
"Somme des retraites post calibration : "
+ f'{ (erfs_ind["wprm"] * erfs_ind["retraite_brute"]).sum() :,}'
+ " €"
)
to_calc = my_simu.calculate_add("csg_imposable_retraite", period=annee_de_calcul)
erfs_ind["csg_imposable_retraite"] = to_calc
# my_simu.tracer.print_computation_log()
print(
"Total de CSG imposable sur les retraites en 2021 : ",
f'{(erfs_ind["csg_imposable_retraite"]*erfs_ind["wprm"]).sum():,}',
)
to_calc = my_simu.calculate_add("csg_deductible_retraite", period=annee_de_calcul)
erfs_ind["csg_deductible_retraite"] = to_calc
print(
"Total de CSG deductible sur les retraites en 2021 : ",
f'{(erfs_ind["csg_deductible_retraite"]*erfs_ind["wprm"]).sum():,}',
)
sum_csg_ret = (erfs_ind["csg_deductible_retraite"] * erfs_ind["wprm"]).sum() + (
erfs_ind["csg_imposable_retraite"] * erfs_ind["wprm"]
).sum()
print(
"Total de CSG Retraites calculé : ",
f"{sum_csg_ret:,}",
"€ et attendu : ",
(21_291_000_000),
)
tc.assertLessEqual(-sum_csg_ret, 1.2 * 21_291_000_000)
tc.assertGreaterEqual(-sum_csg_ret, 0.8 * 21_291_000_000)
print(
"Somme du Chômage post calibration : "
+ f'{ (erfs_ind["wprm"] * erfs_ind["chomage_brut"]).sum() :,}'
+ " €"
)
to_calc = my_simu.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
erfs_ind["csg_imposable_chomage"] = to_calc
# my_simu.tracer.print_computation_log()
print(
"Total de CSG imposable sur le chomage en 2021 : ",
f'{(erfs_ind["csg_imposable_chomage"]*erfs_ind["wprm"]).sum():,}',
)
to_calc = my_simu.calculate_add("csg_deductible_chomage", period=annee_de_calcul)
erfs_ind["csg_deductible_chomage"] = to_calc
print(
"Total de CSG deductible sur le chomage en 2021 : ",
f'{(erfs_ind["csg_deductible_chomage"]*erfs_ind["wprm"]).sum():,}',
)
sum_csg_chom = (erfs_ind["csg_deductible_chomage"] * erfs_ind["wprm"]).sum() + (
erfs_ind["csg_imposable_chomage"] * erfs_ind["wprm"]
).sum()
print(
"Total de CSG Chomage calculé : ",
f"{sum_csg_chom:,}",
"€ et attendu : ",
f"{1_037_000_000:,}",
)
tc.assertLessEqual(-sum_csg_chom, 1.2 * 1_037_000_000)
tc.assertGreaterEqual(-sum_csg_chom, 0.8 * 1_037_000_000)
to_calc = my_simu.calculate_add("crds_retraite", period=annee_de_calcul)
erfs_ind["crds_retraite"] = to_calc
sum_crds_ret = (erfs_ind["crds_retraite"] * erfs_ind["wprm"]).sum()
print(
"Somme pondérée de CRDS Retraite en ",
annee_de_calcul,
" : " + f"{sum_crds_ret:,}",
)
print(
"Total de CRDS sur les Retraites calculé : ",
f"{sum_crds_ret:,}",
"€ et attendu : ",
(1000_000),
)
# tc.assertLessEqual(sum_crds_ret, 1.2 * 000_000)
# tc.assertGreaterEqual(sum_crds_ret, 0.8 * 000_000)
to_calc = my_simu.calculate_add("crds_chomage", period=annee_de_calcul)
erfs_ind["crds_chomage"] = to_calc
sum_crds_chom = (erfs_ind["crds_chomage"] * erfs_ind["wprm"]).sum()
print(
"Somme pondérée de CRDS Chômage en ",
annee_de_calcul,
" : " + f"{sum_crds_chom:,}",
)
print(
"Total de CRDS sur le Chômage calculé : ",
f"{sum_crds_chom:,}",
"€ et attendu : ",
(1000_000),
)
# tc.assertLessEqual(sum_crds_chom, 1.2 * 000_000)
# tc.assertGreaterEqual(sum_crds_chom, 0.8 * 000_000)