--- title: Simulation CSG keywords: fastai sidebar: home_sidebar nb_path: "notebooks/analyses/lex_academy.ipynb" ---
import pandas as pd
!ls *.h5
df = pd.read_hdf("dummy_data_final.h5")
df
df.describe()
df.info()
from openfisca_core.simulation_builder import SimulationBuilder # type: ignore
def simulation(period, data, tbs):
# Traduction des roles attribués au format openfisca
data["quimenof"] = "enfant"
data.loc[data["quifoy"] == 1, "quimenof"] = "conjoint"
data.loc[data["quifoy"] == 0, "quimenof"] = "personne_de_reference"
data["quifoyof"] = "personne_a_charge"
data.loc[data["quifoy"] == 1, "quifoyof"] = "conjoint"
data.loc[data["quifoy"] == 0, "quifoyof"] = "declarant_principal"
data["quifamof"] = "enfant"
data.loc[data["quifam"] == 1, "quifamof"] = "conjoint"
data.loc[data["quifam"] == 0, "quifamof"] = "demandeur"
sb = SimulationBuilder()
sb.create_entities(tbs)
sb.declare_person_entity("individu", data.index)
# Creates openfisca entities and generates grouped
listentities = {"foy": "foyer_fiscal", "men": "menage", "fam": "famille"}
instances = {}
dictionnaire_datagrouped = {"individu": data}
for ent, ofent in listentities.items():
persons_ent = data["id" + ent].values
persons_ent_roles = data["qui" + ent + "of"].values
ent_ids = data["id" + ent].unique()
instances[ofent] = sb.declare_entity(ofent, ent_ids)
sb.join_with_persons(instances[ofent], persons_ent, roles=persons_ent_roles)
# The following ssumes data defined for an entity are the same for all rows in
# the same entity. Or at least that the first non null value found for an
# entity will always be the total value for an entity (which is the case for
# f4ba). These checks are performed in the checkdata function defined below.
dictionnaire_datagrouped[ofent] = (
data.groupby("id" + ent, as_index=False).first().sort_values(by="id" + ent)
)
# These variables should not be attributed to any OpenFisca Entity
columns_not_OF_variables = {
"idmen",
"idfoy",
"idfam",
"noindiv",
"level_0",
"quifam",
"quifoy",
"quimen",
"idmen_x",
"idmen_y",
"wprm",
"index",
"idmen_original",
"idfoy_original",
"idfam_original",
"quifamof",
"quifoyof",
"quimenof",
}
simulation = sb.build(tbs)
memory_config = MemoryConfig(
max_memory_occupation=0.95, # When 95% of the virtual memory is full, switch to disk storage
priority_variables=["salary", "age"], # Always store these variables in memory
variables_to_drop=non_cached_variables,
)
simulation.memory_config = memory_config
# Attribution des variables à la bonne entité OpenFisca
for colonne in data.columns:
if colonne not in columns_not_OF_variables:
# try:
simulation.set_input(
colonne,
period,
dictionnaire_datagrouped[tbs.get_variable(colonne).entity.key][colonne],
)
return simulation, dictionnaire_datagrouped
from openfisca_core.memory_config import MemoryConfig
from openfisca_france import FranceTaxBenefitSystem # type: ignore
from Simulation_engine.non_cached_variables import non_cached_variables
tbs = FranceTaxBenefitSystem()
ma_simu = simulation("2020", df, tbs)
pour_2020 = ma_simu[0].calculate("irpp", 2020)
ma_simu[1]["foyer_fiscal"].wprm
pour_2020
len(pour_2020)
(ma_simu[1]["foyer_fiscal"].wprm * pour_2020).sum() / 1e9
csg = ma_simu[0].calculate("csg", 2020)
csg
len(csg)
(df.wprm * csg).sum() / 1e9
Cas individuel correct ?
82 Milliards est-ce réaliste ?
A-t-on les ressources pour la bonne période ?
Aller voir les administrateurs des commissions concernées pour valider les chiffres.
from openfisca_france.model.base import ADD
sal = ma_simu[0].individu("salaire_imposable", "2020-12", options=[ADD])
sal
len(sal)
Somme des revenus des français
from openfisca_core.memory_config import MemoryConfig
from openfisca_france import FranceTaxBenefitSystem # type: ignore
from Simulation_engine.non_cached_variables import non_cached_variables
tbs = FranceTaxBenefitSystem()
ma_simu = simulation("2020", df, tbs)
pour_2020 = ma_simu[0].calculate("irpp", 2020)
csg = ma_simu[0].calculate("csg", 2020)
(df.wprm * csg).sum() / 1e9
df.columns
(df.wprm * df.salaire_de_base).sum() / 1e9
(df.wprm * df.retraite_brute).sum() / 1e9
champs = [
"chomage_brut",
"pensions_alimentaires_percues",
"retraite_brute",
"salaire_de_base",
"f4ba",
]
for ch in champs:
montant = (df.wprm * df[ch]).sum() / 1e9
print(f"Montant pour {ch} : {montant} G€")
len(ma_simu[1]["foyer_fiscal"]["wprm"])
montant_impot_total = (
ma_simu[1]["foyer_fiscal"]["wprm"] * pour_2020
).sum() / 1e9 # don't work : individu vs foyer
print(f"{montant_impot_total=} sans credit d'impôts")
ma_simu[1].keys()
f"{montant_impot_total}"
f"{montant_impot_total:.2f}"
f"{montant_impot_total=:.2f} G€"
rni = ma_simu[0].calculate("rni", 2020)
print(
f"Somme de tous les revenus nets imposables, y compris les retraites {(ma_simu[1]['foyer_fiscal']['wprm'] * rni).sum() / 1e9:.2f} G€"
)
ma_simu = simulation("2020", df, tbs)
rni = ma_simu[0].calculate("rni", 2020)
print(
f"Somme de tous les revenus nets imposables, y compris les retraites {(ma_simu[1]['foyer_fiscal']['wprm'] * rni).sum() / 1e9:.2f} G€"
)
csg = ma_simu[0].calculate("csg", 2020)
print(f"Montant de la CSG : {(df.wprm * csg).sum() / 1e9} G€")
csg_de_base = (df.wprm * csg).sum()
montants = {}
champs = [
"chomage_brut",
"pensions_alimentaires_percues",
"retraite_brute",
"salaire_de_base",
"f4ba",
"ric",
]
for ch in champs:
print(f"Sans {ch}: ")
df2 = df.copy()
df2[ch] = 0
ma_simu = simulation("2020", df2, tbs)
rni = ma_simu[0].calculate("rni", 2020)
print(
f"\tSomme de tous les revenus nets imposables, y compris les retraites {(ma_simu[1]['foyer_fiscal']['wprm'] * rni).sum() / 1e9:.2f} G€"
)
csg = ma_simu[0].calculate("csg", 2020)
print(f"\tMontant de la CSG : {(df.wprm * csg).sum() / 1e9} G€")
montants[ch] = ((df.wprm * csg).sum() - csg_de_base) / 1e9
montants
Madhinette 2020 : ''' {'chomage_brut': 0.24275506332902527, 'pensions_alimentaires_percues': 0.0, 'retraite_brute': 23.638260595473465, 'salaire_de_base': 54.64542825501262, 'f4ba': 3.562409495149399, 'ric': 0.0} '''
Après calibration Leximpact : ''' {'chomage_brut': 0.3620467490209198, 'pensions_alimentaires_percues': 0.0, 'retraite_brute': 29.32082822751544, 'salaire_de_base': 76.13060511864713, 'f4ba': 5.449210381833191, 'ric': 0.0} '''
csg_de_base / 1e9
Il nous manque les 12 milliards de CSG sur les revenus du capital.
Dans les déclarations d'impôt il y a 75 G€ de revenus du capital. cf
(df.wprm * df.f4ba).sum() / 1e9
Sortie Madhinette : 36 G€ pour f4BA , ce qui est cohérent avec Bercy.
Mais après la calibration LexImpact on est à 55 G€
=> Nous avons probablement grossi les revenus du capital, ce qui était le but.
Il nous faut :