--- title: INVESTIGATIONS SUR LES CHIFFRES DU CHOMAGE keywords: fastai sidebar: home_sidebar nb_path: "notebooks/retraitement_erfs-fpr/investigation_chiffres_chomage.ipynb" ---
{% raw %}
{% endraw %}

Imports

{% raw %}
import gc
import json
import unittest

import numpy as np
import pandas as pd
from openfisca_france import FranceTaxBenefitSystem

TBS = FranceTaxBenefitSystem()

from leximpact_socio_fisca_simu_etat.config import Configuration
from leximpact_socio_fisca_simu_etat.logger import logger as log
from matplotlib import pyplot as plt

tc = unittest.TestCase()
config = Configuration(project_folder="leximpact-prepare-data")
{% endraw %} {% raw %}
from leximpact_prepare_data.aging_tools import (
    bruitage,
    calib_initiale_ff,
    inflation_economique,
    inflation_foyers,
)

# from leximpact_prepare_data.calib_and_copules import reduce_bucket_number
from leximpact_prepare_data.calibration_tools import (
    ajout_gens_en_haut,
    calibration,
    calibration_quantiles,
    compare_distributions,
    distrib_to_quantiles,
)

# from leximpact_prepare_data.copules_add_var import (
#    convert_to_openfisca,
#    get_ratios,
#    integration_data_ff,
# )
from leximpact_prepare_data.enlargement import enlarge
from leximpact_prepare_data.monte_carlo import *
from leximpact_prepare_data.reduce_data import remove_useless_variables

# Import des modules de calcul spécifiques
from leximpact_prepare_data.toolbase import (
    compute_var_in_ff,
    create_simulation,
    foyers_fiscaux_to_individus,
    individus_to_foyers_fiscaux,
)
{% endraw %} {% raw %}
log.debug(config)
{% endraw %} {% raw %}
annee_erfs = config.get("YEAR_ERFS")
annee_pote = config.get("YEAR_POTE")
annee_de_calcul = config.get("YEAR_COMPUTATION")
{% endraw %} {% raw %}
wanted = {}
{% endraw %}

8 - Calibration des variables 2019 (ind to ff)

Import

{% raw %}
erfs_to_cal_ind = pd.read_hdf(
    config.get("DATA_OUT")
    + "07_erfs_salaire_to_cal_ind"
    + config.get("YEAR_ERFS")
    + "_inflated_to_"
    + config.get("YEAR_POTE")
    + ".h5"
)
# erfs_to_cal_ind.columns
{% endraw %} {% raw %}
pipeline_tracker = pd.DataFrame(
    columns=[
        "init",
        "enlarge",
        "post_calib_RFR",
        "post_calib",
        "infl_eco19",
        "infl_ff19",
        "infl_eco21",
        "infl_ff21",
        "final",
    ]
)
# Mise en forme
pd.set_option("display.max_colwidth", 80)
pd.options.display.float_format = "{:,.7f}".format

# Données d'intérêt
data = ["f4ba", "chomage_brut", "salaire_de_base", "retraite_brute"]

# Initialisation
erfs = erfs_to_cal_ind.copy()
# Taille de la base
pipeline_tracker.loc["Len_ind", "init"] = round(len(erfs["wprm"]))
erfsff = individus_to_foyers_fiscaux(erfs)
pipeline_tracker.loc["Nb_foyers", "init"] = round((erfsff["wprm"]).sum())
pipeline_tracker.loc["Len_ff", "init"] = round(len(erfsff["wprm"]))

# Variables
for var in data:
    pipeline_tracker.loc[var, "init"] = round((erfs["wprm"] * erfs[var]).sum())

del erfsff
del erfs
pipeline_tracker
{% endraw %} {% raw %}
erfs_to_cal_ff = individus_to_foyers_fiscaux(erfs_to_cal_ind)
# erfs_to_cal_ff.head()
{% endraw %}

Calibration

Superscript de calibration

{% raw %}
len(erfs_to_cal_ff[erfs_to_cal_ff["chomage_brut"] >= 93738])
erfs_to_cal_ff[erfs_to_cal_ff["chomage_brut"] >= 93738][
    [
        "wprm",
        "chomage_brut",
        "salaire_de_base",
        "revenus_capitaux_prelevement_bareme",
        "revenus_capitaux_prelevement_liberatoire",
        "revenus_capitaux_prelevement_forfaitaire_unique_ir",
        "salaire_imposable",
    ]
]
{% endraw %} {% raw %}
erfs_save = erfs_to_cal_ind.copy(deep=True)
{% endraw %}

Calibration du chomage

Application du seuil max de chomage

{% raw %}
erfs_to_cal_ind = erfs_save.copy(deep=True)
{% endraw %} {% raw %}
# ils touchent des indemnités de la fonction publique
len_init = len(erfs_to_cal_ind)
erfs_to_cal_ind.set_index("idfoy", drop=False, inplace=True)

idfoy_max_chom_ind = []
idfoy_max_chom_ind = list(
    erfs_to_cal_ind[erfs_to_cal_ind["chomage_brut"] >= 93_738]["idfoy"]
)
print(idfoy_max_chom_ind)

print(
    "On supprime ",
    len(idfoy_max_chom_ind),
    " individus au-dessus du seuil max de chômage : ",
    93738,
    "€",
)

# Si on veut sortir ces gens de la calibration du chomage
for idxi in idfoy_max_chom_ind:
    erfs_to_cal_ind.at[idxi, "chomage_brut"] = 0
    erfs_to_cal_ind = erfs_to_cal_ind.drop(labels=idxi, axis="index")
erfs_to_cal_ind = erfs_to_cal_ind.reset_index(drop=True)

assert len(erfs_to_cal_ind) < len_init
erfs_to_cal_ff = individus_to_foyers_fiscaux(erfs_to_cal_ind)
print(
    "Désormais, le chomage maximum est de: ",
    erfs_to_cal_ff["chomage_brut"].max(),
    "€ dans notre base",
)
{% endraw %}

Calibration

{% raw %}
%%capture
var_name = "chomage_brut"

# Calibration
erfs_cal_ff, erfs_cal_ind, Distribs_chom, plot_chom = calibration(
    erfs_to_cal_ind, var_name, annee_erfs, annee_pote
)

# Sauvegarde de l'agrégat de référence
wanted[var_name] = Distribs_chom[2].df["sum"].sum()

# A-t'on ajouté des gens dans la base?
if erfs_cal_ff["idfoy"].nunique() > erfs_to_cal_ff["idfoy"].nunique():
    new_ppl = True
elif erfs_cal_ff["idfoy"].nunique() == erfs_to_cal_ff["idfoy"].nunique():
    new_ppl = False
else:
    erfs_cal_ff["idfoy"].nunique() < erfs_to_cal_ff["idfoy"].nunique()
    raise Exception("Il y a eu une erreur dans la calibration de ", var_name)

# On repasse en individus
erfs_to_cal_ind = foyers_fiscaux_to_individus(
    erfs_to_cal_ind, erfs_cal_ff, [var_name], [var_name]
)
# On repasse en foyers fiscaux
erfs_to_cal_ff = individus_to_foyers_fiscaux(erfs_to_cal_ind)
print("On a calibré ", var_name)
{% endraw %} {% raw %}
plt.figure().clear()
plot_chom
plt.show()
{% endraw %}

Tracking

{% raw %}
var_list = [
    "salaire_imposable",
    "retraite_brute",
    "chomage_brut",
]
# erfs_cal_ind.head()
{% endraw %} {% raw %}
for var in var_list:
    pipeline_tracker.loc[var, "post_calib"] = round(
        (erfs_to_cal_ind["wprm"] * erfs_to_cal_ind[var]).sum()
    )

# Taille de la base
pipeline_tracker.loc["Len_ind", "post_calib"] = round(len(erfs_to_cal_ind["wprm"]))

erfsff = individus_to_foyers_fiscaux(erfs_to_cal_ind)
pipeline_tracker.loc["Nb_foyers", "post_calib"] = round((erfsff["wprm"]).sum())
pipeline_tracker.loc["Len_ff", "post_calib"] = round(len(erfsff["wprm"]))
del erfsff
pipeline_tracker
{% endraw %} {% raw %}
erfs_to_cal_ind.drop(columns=["primes_etc"], inplace=True)
saving_ff = erfs_to_cal_ff.copy()
saving_ind = erfs_to_cal_ind.copy()
{% endraw %}

PARTIE DEBUGGING - 1

{% raw %}
erfs_to_cal_ff = saving_ff.copy()
erfs_to_cal_ind = saving_ind.copy()
{% endraw %} {% raw %}
len(erfs_to_cal_ind.index)
len(erfs_to_cal_ind)
{% endraw %} {% raw %}
erfs_to_cal_ind["idfoy"].nunique()
erfs_to_cal_ind["idfam"].nunique()
erfs_to_cal_ind["idmen"].nunique()
{% endraw %}

Calcul de l'erreur

{% raw %}
# Sources : https://fr.statista.com/statistiques/505942/repartition-depenses-assurance-chomage-par-poste-france/
print("Dans POTE, on a un total de ", f"{wanted['chomage_brut']:,}")
print(
    "Alors que dans l'ERFS, on a un total de ",
    f"{(erfs_to_cal_ff['chomage_brut']*erfs_to_cal_ff['wprm']).sum():,}",
)
print(
    "Soit ",
    100
    * (
        (erfs_to_cal_ff["chomage_brut"] * erfs_to_cal_ff["wprm"]).sum()
        - wanted["chomage_brut"]
    )
    / (wanted["chomage_brut"]),
    "% d'écart",
)
{% endraw %}

Analyse de la base avant calcul

{% raw %}
Distrib_ERFS = Distribs_chom[0]
Distrib_POTE = Distribs_chom[1]
Distrib_CAL = Distribs_chom[2]

df_erfs = Distrib_ERFS.df
df_cal = Distrib_CAL.df
df_pote = Distrib_POTE.df
{% endraw %} {% raw %}
erfs_to_cal_ind.isnull().values.any()
erfs_to_cal_ind.isnull().sum().sum()
{% endraw %} {% raw %}
len(erfs_to_cal_ff[erfs_to_cal_ff["chomage_brut"] >= 93738])
# erfs_to_cal_ff[erfs_to_cal_ff['chomage_brut']>= 93738][['wprm','chomage_brut', 'salaire_de_base', 'revenus_capitaux_prelevement_bareme','revenus_capitaux_prelevement_liberatoire','revenus_capitaux_prelevement_forfaitaire_unique_ir','salaire_imposable']]
{% endraw %}

Calcul de la CSG sur base pleine

{% raw %}
erfs_to_cal_ind.head()
erfs_to_cal_ind.columns
# Pour tests
# erfs_to_cal_ind.drop(columns=["f4ba"], inplace=True)
# erfs_to_cal_ind['activite',] = 1.0
# erfs_to_cal_ind['activite'] = 3.0
{% endraw %}

Recalcul du RFR et du nombre de parts

{% raw %}
erfs_ind = erfs_to_cal_ind.copy(deep=True)
# erfs_ind = erfs_ind.sort_values(by=["chomage_brut"])
{% endraw %} {% raw %}
cols_declarant_principal = [
    "pensions_alimentaires_percues",
    "retraite_brute",
    "rag",
    "ric",
    "rnc",
    "salaire_de_base",
    "f4ba",
    "rfr",
    "revenus_capitaux_prelevement_bareme",
    "revenus_capitaux_prelevement_liberatoire",
    "revenus_capitaux_prelevement_forfaitaire_unique_ir",
    "salaire_imposable",
    "chomage_brut",
]
{% endraw %} {% raw %}
erfs_ind.drop(["rfr"], axis=1, inplace=True)

my_simu, _ = create_simulation(data=erfs_ind, tbs=TBS, period=annee_de_calcul)

erfs_ind = compute_var_in_ff(
    my_simu, annee_de_calcul, erfs_ind, ["rfr"], cols_declarant_principal
)

erfs_ind = compute_var_in_ff(
    my_simu, annee_de_calcul, erfs_ind, ["nbptr"], cols_declarant_principal
)

erfs_ind
{% endraw %}

Calcul de la CSG Chomage

{% raw %}
print(len(erfs_ind))
print(
    "Somme du Chômage post calibration : "
    + f'{ (erfs_ind["wprm"] * erfs_ind["chomage_brut"]).sum() :,}'
    + " €"
)

to_calc = my_simu.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
print(len(to_calc))
erfs_ind["csg_imposable_chomage"] = to_calc
# my_simu.tracer.print_computation_log()
print(
    "Total de CSG imposable sur le chomage en 2021 : ",
    f'{(erfs_ind["csg_imposable_chomage"]*erfs_ind["wprm"]).sum():,}',
)

to_calc = my_simu.calculate_add("csg_deductible_chomage", period=annee_de_calcul)
erfs_ind["csg_deductible_chomage"] = to_calc
print(
    "Total de CSG deductible sur le chomage en 2021 : ",
    f'{(erfs_ind["csg_deductible_chomage"]*erfs_ind["wprm"]).sum():,}',
)

sum_csg_chom = (erfs_ind["csg_deductible_chomage"] * erfs_ind["wprm"]).sum() + (
    erfs_ind["csg_imposable_chomage"] * erfs_ind["wprm"]
).sum()
print(
    "Total de CSG Chomage calculé : ",
    f"{sum_csg_chom:,}",
    "€ et attendu : ",
    f"{(1_037_000_000):,}",
)
erfs_ind.head()
# tc.assertLessEqual(-sum_csg_chom, 1.2 * 1_037_000_000)
# tc.assertGreaterEqual(-sum_csg_chom, 0.8 * 1_037_000_000)
{% endraw %} {% raw %}
erfs_ind.tail()
{% endraw %} {% raw %}
erfs_ind.isnull().values.any()
erfs_ind.isnull().sum().sum()
{% endraw %} {% raw %}
print("Soit ", 100 * (sum_csg_chom - 1_037_000_000) / (1_037_000_000), "% d'écart")
{% endraw %}

PARTIE DEBUGGING - 2

Analyse du resultat par personne

{% raw %}
base = erfs_ind  # [['fake_id','chomage_brut', 'csg_deductible_chomage', 'csg_imposable_chomage']]
base = base.sort_values(by="chomage_brut")
base[
    ["fake_id", "chomage_brut", "csg_deductible_chomage", "csg_imposable_chomage"]
].loc[[223125, 177174, 289306]]
base[-10:]
{% endraw %}

Verification du taux et du seuil

{% raw %}
base.isnull().values.any()
base.isnull().sum().sum()
{% endraw %} {% raw %}
4 * 41136
{% endraw %} {% raw %}
base["taux_imp"] = round(
    (base["csg_imposable_chomage"] / base["chomage_brut"]), 3
)  # expected 0.024%
base["taux_ded"] = round(
    (base["csg_deductible_chomage"] / base["chomage_brut"]), 3
)  # expected 0.038%
base[-10:]

# A-t'on bien un meme taux?
base["taux_ded"].nunique()
print("taux_ded", base["taux_ded"].unique())
base["taux_imp"].nunique()
print("taux_imp", base["taux_imp"].unique())
# NB: les taux nans correspondent aux divisons /0 (les gens non concernés)
{% endraw %}

Calcul de la CSG 'a la mano' pour verifier les ordres de grandeur

{% raw %}
len(erfs_ind)
erfs_ind = erfs_ind[erfs_ind["chomage_brut"] > 0]
len(erfs_ind)
{% endraw %} {% raw %}
# erfs_ind.sort_values(by=['chomage_brut'], inplace=True)
# erfs_ind = erfs_ind.loc[[4, 5695, 65040]]
erfs_ind
{% endraw %} {% raw %}
erfs_ind.columns
{% endraw %} {% raw %}
erfs_calc = erfs_ind[
    [
        "activite",
        "age",
        "categorie_salarie",
        "contrat_de_travail",
        "date_naissance",
        "effectif_entreprise",
        "heures_remunerees_volume",
        "idfam",
        "idfoy",
        "idmen",
        "noindiv",
        "quifam",
        "quifoy",
        "quimen",
        "statut_marital",
        "idmen_original",
        "idfoy_original",
        "idfam_original",
        "idmen_x",
        "wprm",
        "zone_apl",
        "fake_id",
        "quimenof",
        "quifoyof",
        "quifamof",
        "chomage_brut",
        "rfr",
        "nbptr",
    ]
]
{% endraw %} {% raw %}
# seuil_exoneration = seuils.seuil_rfr1 + (nbptr - 1) * seuils.demi_part_suppl_rfr1
# seuil_reduction = seuils.seuil_rfr2 + (nbptr - 1) * seuils.demi_part_suppl_rfr2


def calc_taux(rfr, seuil_exoneration, seuil_reduction):

    # Exoneration
    if rfr < seuil_exoneration:
        taux_imp = 0
        taux_ded = 0
    # Taux reduit
    elif rfr < seuil_reduction:
        taux_imp = 0
        taux_ded = 0.038
    # Taux plein
    else:
        taux_imp = 0.024
        taux_ded = 0.038

    return taux_imp, taux_ded
{% endraw %} {% raw %}
# En 2018
# seuil_exoneration = seuils.seuil_rfr1 + (nbptr - 1) * seuils.demi_part_suppl_rfr1
# seuil_reduction = seuils.seuil_rfr2 + (nbptr - 1) * seuils.demi_part_suppl_rfr2

# ie:
# seuil_exoneration = 11018.0 + (nbptr - 1) * 2942.0
# seuil_reduction = 14404.0 + (nbptr - 1) * 3846.0

erfs_calc["seuil_exoneration"] = erfs_calc["nbptr"].apply(
    lambda row: 11018.0 + (row - 1) * 2942.0
)
erfs_calc["seuil_reduction"] = erfs_calc["nbptr"].apply(
    lambda row: 14404.0 + (row - 1) * 3846.0
)

erfs_calc.head()
{% endraw %} {% raw %}
def func1(cols):
    return calc_taux(cols["rfr"], cols["seuil_exoneration"], cols["seuil_reduction"])[0]


def func2(cols):
    return calc_taux(cols["rfr"], cols["seuil_exoneration"], cols["seuil_reduction"])[1]


erfs_calc["taux_imp"] = erfs_calc.apply(func1, axis=1)
erfs_calc["taux_ded"] = erfs_calc.apply(func2, axis=1)

erfs_calc
{% endraw %} {% raw %}
# 4 * PSS = 4 * 41136
erfs_calc["base"] = erfs_calc["chomage_brut"].apply(
    lambda row: 0.9825 * row if row < 4 * 41136 else row
)
erfs_calc.head()

erfs_calc["base"].sum()
{% endraw %} {% raw %}
erfs_calc["montant_csg_imp"] = erfs_calc["base"] * erfs_calc["taux_imp"]
erfs_calc["montant_csg_ded"] = erfs_calc["base"] * erfs_calc["taux_ded"]
{% endraw %} {% raw %}
erfs_calc["cho_seuil_exo"] = (
    12 * 1.0 * 9.88 * 151.67
)  # erfs_calc["heures_remunerees_volume"].apply(lambda row :
erfs_calc["cho_seuil_exo"].sum()  #!! ANNUEL
{% endraw %} {% raw %}
def func3(cols):
    return max(
        cols["base"] * cols["taux_imp"]
        - max(
            cols["cho_seuil_exo"]
            - (cols["chomage_brut"] - cols["base"] * cols["taux_imp"]),
            0,
        ),
        0,
    )


def func4(cols):
    return max(
        cols["base"] * cols["taux_ded"]
        - max(
            cols["cho_seuil_exo"]
            - (cols["chomage_brut"] - cols["base"] * cols["taux_ded"]),
            0,
        ),
        0,
    )


erfs_calc["csg_imp"] = erfs_calc.apply(func3, axis=1)
erfs_calc["csg_ded"] = erfs_calc.apply(func4, axis=1)
{% endraw %} {% raw %}
erfs_ff = individus_to_foyers_fiscaux(erfs_calc)
erfs_ff.columns
{% endraw %} {% raw %}
sum_imp = (erfs_ff["csg_imp"] * erfs_ff["wprm"]).sum()
sum_imp
sum_ded = (erfs_ff["csg_ded"] * erfs_ff["wprm"]).sum()
sum_ded
{% endraw %} {% raw %}
print("Soit le total de CSG chomage:", f"{ sum_imp + sum_ded:,}")
{% endraw %} {% raw %}
erfs_ff[
    [
        "idfoy",
        "wprm",
        "heures_remunerees_volume",
        "chomage_brut",
        "rfr",
        "nbptr",
        "taux_imp",
        "taux_ded",
        "seuil_exoneration",
        "seuil_reduction",
        "base",
        "cho_seuil_exo",
        "csg_imp",
        "csg_ded",
    ]
]
{% endraw %}

Comparer avec OF

{% raw %}
erfs_ind = erfs_calc.copy()
erfs_ind.columns
{% endraw %} {% raw %}
print(len(erfs_ind))
print(
    "Somme du Chômage post calibration : "
    + f'{ (erfs_ind["wprm"] * erfs_ind["chomage_brut"]).sum() :,}'
    + " €"
)
annee_de_calcul = 2018
# SImulation
erfs_min = erfs_ind[
    [
        "activite",
        "age",
        "categorie_salarie",
        "contrat_de_travail",
        "date_naissance",
        "effectif_entreprise",
        "heures_remunerees_volume",
        "idfam",
        "idfoy",
        "idmen",
        "noindiv",
        "quifam",
        "quifoy",
        "quimen",
        "statut_marital",
        "idmen_original",
        "idfoy_original",
        "idfam_original",
        "idmen_x",
        "wprm",
        "zone_apl",
        "fake_id",
        "quimenof",
        "quifoyof",
        "quifamof",
        "chomage_brut",
        "rfr",
        "nbptr",
    ]
]
my_simu, _ = create_simulation(data=erfs_min, tbs=TBS, period=annee_de_calcul)


to_calc = my_simu.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
my_simu.trace = True


print(len(to_calc))
erfs_ind["csg_imposable_chomage"] = to_calc

print(
    "Total de CSG imposable sur le chomage en 2021 : ",
    f'{(erfs_ind["csg_imposable_chomage"]*erfs_ind["wprm"]).sum():,}',
)

to_calc = my_simu.calculate_add("csg_deductible_chomage", period=annee_de_calcul)
erfs_ind["csg_deductible_chomage"] = to_calc
print(
    "Total de CSG deductible sur le chomage en 2021 : ",
    f'{(erfs_ind["csg_deductible_chomage"]*erfs_ind["wprm"]).sum():,}',
)

sum_csg_chom = (erfs_ind["csg_deductible_chomage"] * erfs_ind["wprm"]).sum() + (
    erfs_ind["csg_imposable_chomage"] * erfs_ind["wprm"]
).sum()

erfs_ind.head()

# print calculation steps
my_simu.tracer.print_computation_log()
# tc.assertLessEqual(-sum_csg_chom, 1.2 * 1_037_000_000)
# tc.assertGreaterEqual(-sum_csg_chom, 0.8 * 1_037_000_000)
{% endraw %} {% raw %}
print(
    "Total de CSG Chomage calculé : ",
    f"{sum_csg_chom:,}",
    "€ et attendu : ",
    f"{(1_037_000_000):,}",
)
{% endraw %}

Comparer les 2 calculs

{% raw %}
erfs_ind.columns
erfs_ind[
    [
        "idfoy",
        "quifoy",
        "wprm",
        "chomage_brut",
        "rfr",
        "nbptr",
        "seuil_exoneration",
        "seuil_reduction",
        "taux_imp",
        "taux_ded",
        "base",
        "cho_seuil_exo",
        "csg_imp",
        "csg_ded",
        "montant_csg_imp",
        "csg_imposable_chomage",
        "csg_deductible_chomage",
    ]
]
{% endraw %} {% raw %}
erfs_ind["diff_imp"] = (
    abs(-erfs_ind["csg_imposable_chomage"] - erfs_ind["csg_imp"]) / erfs_ind["csg_imp"]
)
erfs_ind["diff_imp"].sort_values()
erfs_ind["diff_imp"].max()
{% endraw %}

Analyse de la distribution

{% raw %}
erfs_payants = erfs_ind[erfs_ind["rfr"] > erfs_ind["cho_seuil_exo"]]

print(
    "Sur ",
    len(erfs_ind),
    " personnes touchant du chômage, soit un total de ",
    f'{(erfs_ind["chomage_brut"]*erfs_ind["wprm"]).sum():,}',
    "€, ",
    len(erfs_payants),
    " personnes ne sont pas exonérées, soit une masse imposable de ",
    f'{(erfs_payants["chomage_brut"]*erfs_payants["wprm"]).sum():,}',
)
{% endraw %} {% raw %}
erfs_red = erfs_payants[erfs_payants["rfr"] < erfs_payants["seuil_reduction"]]
erfs_plein = erfs_payants[erfs_payants["rfr"] > erfs_payants["seuil_reduction"]]

print("Total taux reduit :", (erfs_red["chomage_brut"] * erfs_red["wprm"]).sum())
print("Total taux plein :", (erfs_plein["chomage_brut"] * erfs_plein["wprm"]).sum())

imp_red = (erfs_red["chomage_brut"] * erfs_red["wprm"]).sum() * 0
imp_plein = (erfs_plein["chomage_brut"] * erfs_plein["wprm"]).sum() * 0.024

ded_red = (erfs_red["chomage_brut"] * erfs_red["wprm"]).sum() * 0.038
ded_plein = (erfs_plein["chomage_brut"] * erfs_plein["wprm"]).sum() * 0.038

print(
    "Soit le total de CSG chomage: ",
    f"{(imp_red + imp_plein + ded_plein + ded_red) :,}",
)
print(
    "Attention cela ne prend pas en compte l'exoneration faite si chomage_brut - csg_due < seuil_exo"
)
{% endraw %} {% raw %}
def exo_ind_pour_max(cols):
    if (cols["chomage_brut"] - cols["montant_csg_imp"]) < cols["cho_seuil_exo"]:
        return 0
    else:
        return 1


def exo_ded_pour_max(cols):
    if (cols["chomage_brut"] - cols["montant_csg_ded"]) < cols["cho_seuil_exo"]:
        return 0
    else:
        return 1


erfs_payants["exo_ded_pour_max"] = erfs_payants.apply(exo_ded_pour_max, axis=1)
erfs_payants["exo_imp_pour_max"] = erfs_payants.apply(exo_ind_pour_max, axis=1)

erfs_payants
{% endraw %} {% raw %}
erfs_red = erfs_payants[(erfs_payants["rfr"] < erfs_payants["seuil_reduction"])]
erfs_plein = erfs_payants[erfs_payants["rfr"] > erfs_payants["seuil_reduction"]]

print(
    "Total taux reduit :",
    f'{(erfs_red["chomage_brut"]*erfs_red["wprm"]).sum() :,}',
    "€",
)
print(
    "Total taux plein :",
    f'{(erfs_plein["chomage_brut"]*erfs_plein["wprm"]).sum() :,}',
    "€",
)

imp_red = (
    erfs_red["chomage_brut"] * erfs_red["wprm"] * erfs_payants["exo_imp_pour_max"]
).sum() * 0
imp_plein = (
    erfs_plein["chomage_brut"] * erfs_plein["wprm"] * erfs_payants["exo_imp_pour_max"]
).sum() * 0.024

ded_red = (
    erfs_red["chomage_brut"] * erfs_red["wprm"] * erfs_payants["exo_ded_pour_max"]
).sum() * 0.038
ded_plein = (
    erfs_plein["chomage_brut"] * erfs_plein["wprm"] * erfs_payants["exo_ded_pour_max"]
).sum() * 0.038

print(
    "Soit le total de CSG chomage: ",
    f"{(imp_red + imp_plein + ded_plein + ded_red) :,}",
)
{% endraw %} {% raw %}
1 / 0
{% endraw %}

Calcul de la CSG sur base minimale

{% raw %}
erfs_to_cal_ind = saving_ind.copy()
sample = erfs_to_cal_ind.loc[[223125, 177174, 289306]]
sample
{% endraw %} {% raw %}
erfs_to_cal_ind[erfs_to_cal_ind["idfoy"] == 113256]
{% endraw %} {% raw %}
erfs_to_cal_ind[erfs_to_cal_ind["idfoy"] == 89943]
{% endraw %} {% raw %}
erfs_to_cal_ind[erfs_to_cal_ind["idfoy"] == 146878]
{% endraw %} {% raw %}
sample = erfs_to_cal_ind[erfs_to_cal_ind["idfoy"].isin([113256, 89943, 146878])]

# sample['activite']=4
sample
{% endraw %} {% raw %}
# to_calc
{% endraw %} {% raw %}
# to_calc
{% endraw %} {% raw %}
simulation, _ = create_simulation(data=sample, tbs=TBS, period=annee_de_calcul)
# activate the trace
# simulation.trace = True

to_calc = simulation.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
sample["csg_imposable_chomage"] = to_calc
# my_simu.tracer.print_computation_log()

to_calc = simulation.calculate_add("csg_deductible_chomage", period=annee_de_calcul)
sample["csg_deductible_chomage"] = to_calc

x = sample[
    [
        "rfr",
        "fake_id",
        "chomage_brut",
        "csg_deductible_chomage",
        "csg_imposable_chomage",
    ]
]
print(x)


# print calculation steps
# simulation.tracer.print_computation_log()
{% endraw %} {% raw %}
sample1 = sample[
    [
        "activite",
        "age",
        "categorie_salarie",
        "contrat_de_travail",
        "date_naissance",
        "effectif_entreprise",
        "heures_remunerees_volume",
        "idfam",
        "idfoy",
        "idmen",
        "noindiv",
        "quifam",
        "quifoy",
        "quimen",
        "chomage_brut",
        "csg_imposable_chomage",
        "csg_deductible_chomage",
    ]
]

sample2 = sample[
    [
        "statut_marital",
        "idmen_original",
        "idfoy_original",
        "idfam_original",
        "idmen_x",
        "wprm",
        "zone_apl",
        "fake_id",
        "quimenof",
        "quifoyof",
        "quifamof",
        "pensions_alimentaires_percues",
        "retraite_brute",
        "rag",
        "ric",
        "rnc",
        "chomage_brut",
        "csg_imposable_chomage",
        "csg_deductible_chomage",
    ]
]

sample3 = sample[
    [
        "salaire_de_base",
        "f4ba",
        "rfr",
        "revenus_capitaux_prelevement_bareme",
        "revenus_capitaux_prelevement_liberatoire",
        "revenus_capitaux_prelevement_forfaitaire_unique_ir",
        "salaire_imposable",
        "chomage_brut",
        "csg_imposable_chomage",
        "csg_deductible_chomage",
    ]
]

sample1
sample2
sample3
{% endraw %} {% raw %}
seuil = 11408 + 3046
seuil
{% endraw %} {% raw %}
seuil_reduc = 14914 + 3982
seuil_reduc
{% endraw %}

Recalcul du RFR ?

{% raw %}
sample = erfs_to_cal_ind[erfs_to_cal_ind["idfoy"].isin([113256, 89943, 146878])]
sample.drop(["rfr"], axis=1, inplace=True)

simulation, _ = create_simulation(data=sample, tbs=TBS, period=annee_de_calcul)
# activate the trace
# simulation.trace = True
sample = compute_var_in_ff(
    simulation,
    annee_de_calcul,
    sample,
    ["rfr"],
    ["rfr", "chomage_brut"],
)

to_calc = simulation.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
sample["csg_imposable_chomage"] = to_calc
# my_simu.tracer.print_computation_log()

to_calc = simulation.calculate_add("csg_deductible_chomage", period=annee_de_calcul)
sample["csg_deductible_chomage"] = to_calc

print(sample[["idfoy", "quifoy", "chomage_brut", "rfr", "csg_deductible_chomage"]])

# print calculation steps
# simulation.tracer.print_computation_log()
{% endraw %}

Essai sur la base minimale

{% raw %}
sample = erfs_to_cal_ind[erfs_to_cal_ind["idfoy"].isin([113256, 89943, 146878])]
{% endraw %} {% raw %}
sample1 = sample[["idfoy", "quifoy", "chomage_brut", "rfr"]]
sample1
{% endraw %} {% raw %}
simulation, _ = create_simulation(data=sample1, tbs=TBS, period=annee_de_calcul)
# activate the trace
# simulation.trace = True

to_calc = simulation.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
sample1["csg_imposable_chomage"] = to_calc
# my_simu.tracer.print_computation_log()

to_calc = simulation.calculate_add("csg_deductible_chomage", period=annee_de_calcul)
sample1["csg_deductible_chomage"] = to_calc

print(sample1)

# print calculation steps
# simulation.tracer.print_computation_log()
{% endraw %} {% raw %}
1 / 0
{% endraw %}

Analyse par quantile

{% raw %}
df_check = pd.DataFrame(
    columns=[
        "total_cal",
        "total_pote",
        "error_chomage",
        "total_csg",
        "total_csg_imp",
        "total_csg_ded",
    ],
    index=range(len(Distrib_CAL.bucket_list)),
)
{% endraw %} {% raw %}
for nb in Distrib_CAL.bucket_list:
    bucket = Distrib_CAL.bucket_list[nb]
    idx = int(nb)
    sample_ff = bucket.sample
    # sample_ff.set_index("idfoy", drop=False, inplace=True)

    # Calcul du total de chomage
    df_check.at[idx, "total_cal"] = Distrib_CAL.df["sum"][idx]
    df_check["total_pote"].loc[idx] = Distrib_POTE.df["sum"][idx]
    error = (
        Distrib_CAL.df["sum"][idx] - Distrib_POTE.df["sum"][idx]
    ) / Distrib_POTE.df["sum"][idx]

    if idx > 0:
        df_check["error_chomage"].loc[idx] = 100 * error
        df_check["error_chomage"].loc[0] = 0

    # On repasse en individus
    # sample = erfs_ind [erfs_ind['idfoy'] in list(sample_ff['idfoy'])]
    lst_idfoy = list(sample_ff["idfoy"])
    sample = erfs_ind[erfs_ind["idfoy"].isin(lst_idfoy)]
    tc.assertGreaterEqual(len(sample), len(sample_ff))
    # print(sample)

    # Calcul du total de CSG chomage
    my_simux, _ = create_simulation(data=sample, tbs=TBS, period=annee_de_calcul)
    to_calcIMP = my_simux.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
    sample.loc[:, "csg_imposable_chomage"] = to_calcIMP
    to_calcDED = my_simux.calculate_add(
        "csg_deductible_chomage", period=annee_de_calcul
    )
    sample.loc[:, "csg_deductible_chomage"] = to_calcDED

    total_imp = (sample["csg_imposable_chomage"] * sample["wprm"]).sum()
    total_ded = (sample["csg_deductible_chomage"] * sample["wprm"]).sum()

    df_check.at[idx, "total_csg_imp"] = round(total_imp, 0)
    df_check.at[idx, "total_csg_ded"] = round(total_ded, 0)
    df_check.at[idx, "total_csg"] = round(total_imp + total_ded, 0)

    del my_simux
    # break

# df_check = df_check.astype(float).round(1) # Ne marche pas... wtf?
df_check
{% endraw %}

10. Vieillissement vers 2021 (ind)

{% raw %}
erfs_cal_ind = erfs_to_cal_ind.copy()
erfs_cal_ff = individus_to_foyers_fiscaux(erfs_cal_ind)

# On vérifie qu'on ne perd personne en route
tc.assertEqual(
    len(erfs_cal_ff["idfoy"].unique()), len(erfs_to_cal_ind["idfoy"].unique())
)
{% endraw %}

Calculs

{% raw %}
# Année de départ: année de la base vieillie
year_start = int(config.get("YEAR_POTE"))
# Année de fin : année de production de la base pour calculs sur l'API
year_end = int(config.get("YEAR_COMPUTATION"))
print("On passe la base de ", year_start, " à ", year_end)

erfs_to_cal_ind.columns
{% endraw %}

10.1 - Inflation économique 2019 -> 2021

{% raw %}
cols_to_inflate = [
    "chomage_brut",
    "pensions_alimentaires_percues",
    "rag",
    "ric",
    "rnc",
    "salaire_de_base",
    "f4ba",
    "retraite_brute",
    "rfr",
    "revenus_capitaux_prelevement_bareme",
    "revenus_capitaux_prelevement_liberatoire",
    "revenus_capitaux_prelevement_forfaitaire_unique_ir",
    "salaire_imposable",
]

erfs_inflated_ff = inflation_economique(
    erfs_cal_ff, cols_to_inflate, year_start, year_end
)
{% endraw %}

Tracking

{% raw %}
# On ajoute les valeurs dans la base individus
cols_declarant_principal = cols_to_inflate
to_update = cols_to_inflate

erfs_inflated_ind = foyers_fiscaux_to_individus(
    erfs_cal_ind, erfs_inflated_ff, to_update, cols_declarant_principal, new_ppl=False
)
{% endraw %} {% raw %}
for var in data:
    pipeline_tracker.loc[var, "infl_eco21"] = round(
        (erfs_inflated_ind["wprm"] * erfs_inflated_ind[var]).sum()
    )

# Taille de la base
pipeline_tracker.loc["Len_ind", "infl_eco21"] = round(len(erfs_inflated_ind["wprm"]))

erfsff = individus_to_foyers_fiscaux(erfs_inflated_ind)
pipeline_tracker.loc["Nb_foyers", "infl_eco21"] = round((erfsff["wprm"]).sum())
pipeline_tracker.loc["Len_ff", "infl_eco21"] = round(len(erfsff["wprm"]))
del erfsff
pipeline_tracker
{% endraw %}

10.2 - Inflation du nombre de foyers fiscaux

{% raw %}
erfs_inflated_ff, erfs_inflate_foyers_ind = inflation_foyers(
    erfs_inflated_ind, year_start, year_end
)
{% endraw %}

Tracking

{% raw %}
for var in data:
    pipeline_tracker.loc[var, "infl_ff21"] = round(
        (erfs_inflate_foyers_ind["wprm"] * erfs_inflate_foyers_ind[var]).sum()
    )

# Taille de la base
pipeline_tracker.loc["Len_ind", "infl_ff21"] = round(
    len(erfs_inflate_foyers_ind["wprm"])
)

erfsff = individus_to_foyers_fiscaux(erfs_inflate_foyers_ind)
pipeline_tracker.loc["Nb_foyers", "infl_ff21"] = round((erfsff["wprm"]).sum())
pipeline_tracker.loc["Len_ff", "infl_ff21"] = round(len(erfsff["wprm"]))
del erfsff
pipeline_tracker
{% endraw %}

11. Bruitage de la base

On bruite la base par souci d'anonymité, en accord avec nos engagements auprès des fournisseurs de données (la DGFiP, pour POTE)

Calculs

{% raw %}
erfs_ind = bruitage(erfs_inflate_foyers_ind)
erfs_ind.columns
{% endraw %}

12 - Mise en forme de la base

{% raw %}
erfs_ind.columns
erfs_ind.describe()
{% endraw %} {% raw %}
for col in erfs_ind.columns:
    if erfs_ind[col].max() == 0:
        print("On supprime cette colonne qui est vide :", col)
        erfs_ind = erfs_ind.drop(col, axis=1)
    elif erfs_ind[col].max() == np.nan:
        print("On supprime cette colonne qui est vide :", col)
        erfs_ind = erfs_ind.drop(col, axis=1)
{% endraw %}

TRACKING FINAL

{% raw %}
# Ajout des valeurs
for var in data:
    pipeline_tracker.loc[var, "final"] = round((erfs_ind["wprm"] * erfs_ind[var]).sum())

# Taille de la base
pipeline_tracker.loc["Len_ind", "final"] = round(len(erfs_ind["wprm"]))

erfs_ff = individus_to_foyers_fiscaux(erfs_ind)
pipeline_tracker.loc["Nb_foyers", "final"] = round((erfs_ff["wprm"]).sum())
pipeline_tracker.loc["Len_ff", "final"] = round(len(erfs_ff["wprm"]))

pipeline_tracker
{% endraw %}

ANNEXES

Import

{% raw %}
# NB: on prend toujours la base locale
N = 3
erfs_ind = pd.read_hdf(
    config.get("DATA_OUT")
    + "erfs_final_ind_"
    + config.get("YEAR_ERFS")
    + "_aged_to_"
    + config.get("YEAR_COMPUTATION")
    + "_N_"
    + str(N)
    + ".h5"
)
{% endraw %}

A - Check des agrégats d'intéret

{% raw %}
nb_foyers = erfs_ind["wprm"].sum()
print("Total de foyers: ", nb_foyers, " agrégat POTE 2019 : 39_331_689 ")
{% endraw %} {% raw %}
tc.assertGreater(
    abs(nb_foyers), 39_331_689
)  # 39_331_689 est le Nb de foyers en 2019, la dernière valeur connue
{% endraw %}

A.3 - Retraites

{% raw %}
sum_retraite = (erfs_ind["retraite_brute"] * erfs_ind["wprm"]).sum()
print("Total de RFR: ", sum_retraite, " agrégat POTE : ", wanted["retraite_brute"])
{% endraw %} {% raw %}
tc.assertGreater(abs(sum_retraite), 0.8 * wanted["retraite_brute"])
{% endraw %}

B - Calculs de CSG

Les agrégats 2021 sont obtenus à la p.47 des Comptes de la Sécurité Sociale

https://www.securite-sociale.fr/files/live/sites/SSFR/files/medias/CCSS/2021/RAPPORT%20CCSS%20JUIN%202021.pdf

{% raw %}
my_simu, _ = create_simulation(data=erfs_ind, tbs=TBS, period=annee_de_calcul)
{% endraw %}

B.2 - CSG Retraites

{% raw %}
print(
    "Somme des retraites post calibration : "
    + f'{ (erfs_ind["wprm"] * erfs_ind["retraite_brute"]).sum() :,}'
    + " €"
)

to_calc = my_simu.calculate_add("csg_imposable_retraite", period=annee_de_calcul)
erfs_ind["csg_imposable_retraite"] = to_calc
# my_simu.tracer.print_computation_log()
print(
    "Total de CSG imposable sur les retraites en 2021 : ",
    f'{(erfs_ind["csg_imposable_retraite"]*erfs_ind["wprm"]).sum():,}',
)

to_calc = my_simu.calculate_add("csg_deductible_retraite", period=annee_de_calcul)
erfs_ind["csg_deductible_retraite"] = to_calc
print(
    "Total de CSG deductible sur les retraites en 2021 : ",
    f'{(erfs_ind["csg_deductible_retraite"]*erfs_ind["wprm"]).sum():,}',
)

sum_csg_ret = (erfs_ind["csg_deductible_retraite"] * erfs_ind["wprm"]).sum() + (
    erfs_ind["csg_imposable_retraite"] * erfs_ind["wprm"]
).sum()
print(
    "Total de CSG Retraites calculé : ",
    f"{sum_csg_ret:,}",
    "€ et attendu : ",
    (21_291_000_000),
)
tc.assertLessEqual(-sum_csg_ret, 1.2 * 21_291_000_000)
tc.assertGreaterEqual(-sum_csg_ret, 0.8 * 21_291_000_000)
{% endraw %}

B.3 - CSG Chômage

{% raw %}
print(
    "Somme du Chômage post calibration : "
    + f'{ (erfs_ind["wprm"] * erfs_ind["chomage_brut"]).sum() :,}'
    + " €"
)

to_calc = my_simu.calculate_add("csg_imposable_chomage", period=annee_de_calcul)
erfs_ind["csg_imposable_chomage"] = to_calc
# my_simu.tracer.print_computation_log()
print(
    "Total de CSG imposable sur le chomage en 2021 : ",
    f'{(erfs_ind["csg_imposable_chomage"]*erfs_ind["wprm"]).sum():,}',
)

to_calc = my_simu.calculate_add("csg_deductible_chomage", period=annee_de_calcul)
erfs_ind["csg_deductible_chomage"] = to_calc
print(
    "Total de CSG deductible sur le chomage en 2021 : ",
    f'{(erfs_ind["csg_deductible_chomage"]*erfs_ind["wprm"]).sum():,}',
)

sum_csg_chom = (erfs_ind["csg_deductible_chomage"] * erfs_ind["wprm"]).sum() + (
    erfs_ind["csg_imposable_chomage"] * erfs_ind["wprm"]
).sum()
print(
    "Total de CSG Chomage calculé : ",
    f"{sum_csg_chom:,}",
    "€ et attendu : ",
    f"{1_037_000_000:,}",
)
tc.assertLessEqual(-sum_csg_chom, 1.2 * 1_037_000_000)
tc.assertGreaterEqual(-sum_csg_chom, 0.8 * 1_037_000_000)
{% endraw %}

C - Calculs de CRDS

C.2 - CRDS Retraites

{% raw %}
to_calc = my_simu.calculate_add("crds_retraite", period=annee_de_calcul)
erfs_ind["crds_retraite"] = to_calc

sum_crds_ret = (erfs_ind["crds_retraite"] * erfs_ind["wprm"]).sum()

print(
    "Somme pondérée de CRDS Retraite en ",
    annee_de_calcul,
    " : " + f"{sum_crds_ret:,}",
)

print(
    "Total de CRDS sur les Retraites calculé : ",
    f"{sum_crds_ret:,}",
    "€ et attendu : ",
    (1000_000),
)
# tc.assertLessEqual(sum_crds_ret, 1.2 * 000_000)
# tc.assertGreaterEqual(sum_crds_ret, 0.8 * 000_000)
{% endraw %}

C.3 - CRDS Chômage

{% raw %}
to_calc = my_simu.calculate_add("crds_chomage", period=annee_de_calcul)
erfs_ind["crds_chomage"] = to_calc
sum_crds_chom = (erfs_ind["crds_chomage"] * erfs_ind["wprm"]).sum()

print(
    "Somme pondérée de CRDS Chômage en ",
    annee_de_calcul,
    " : " + f"{sum_crds_chom:,}",
)

print(
    "Total de CRDS sur le Chômage calculé : ",
    f"{sum_crds_chom:,}",
    "€ et attendu : ",
    (1000_000),
)
# tc.assertLessEqual(sum_crds_chom, 1.2 * 000_000)
# tc.assertGreaterEqual(sum_crds_chom, 0.8 * 000_000)
{% endraw %}