--- title: Verif des extractions CASD keywords: fastai sidebar: home_sidebar nb_path: "notebooks/analyses/csg_40_verif_extractions_casd.ipynb" ---
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import pandas as pd
import seaborn as sns
from leximpact_socio_fisca_simu_etat.config import Configuration
config = Configuration(project_name="leximpact-prepare-data")
!ls ./csg/data
calib_rfr = pd.read_csv(config.get("DATA_IN") + "CalibPOTE_2019.csv")
calib_rfr.tail(3)
liste_des_variables_csg = pd.read_csv(
config.get("DATA_IN") + "liste_des_variables_csg-POTE_2019.csv"
)
liste_des_variables_csg.columns
liste_des_variables_csg = liste_des_variables_csg.loc[
liste_des_variables_csg["nom_variable"].str.len() > 6
]
liste_des_variables_csg_style = liste_des_variables_csg.style.format(
{"somme_en_euros": "{:,.2f} €", "pct_zero": "{:.0f} %"}
)
liste_des_variables_csg_style.bar(subset=["somme_en_euros"], color="#d65f5f")
def read_copule(filepath):
# On charge notre distribution de RFR et de Rk
with open(filepath) as fichier:
contenu = fichier.read()
dictionnaire_fichier = eval(contenu)
return dictionnaire_fichier
dictionnaire_fichier = read_copule(
config.get("COPULE_FOLDER") + "20210610ExportCopule-rev_capital_partiel.txt"
)
len(dictionnaire_fichier)
len(dictionnaire_fichier[50]["nonzerobuckets"])
def copule_to_df(copule, sub_bucket_id):
"""
::copule:: Le contenu brut du copule
::sub_bucket_id:: 0 : Nombre de personne, 1 : somme de Var, -1 : moyenne
"""
out = []
for bucket in copule:
out_dict = {
"lower_bound": bucket["lower_bound"],
"upper_bound": bucket["upper_bound"],
"nb_people_zero": bucket["nb_people"]["zero"],
"nb_people_nonzero": bucket["nb_people"]["nonzero"],
}
for i, sub_bucket in enumerate(bucket["nonzerobuckets"]):
if sub_bucket_id < 0:
out_dict[i] = sub_bucket[1] / sub_bucket[0]
else:
out_dict[i] = sub_bucket[sub_bucket_id]
out.append(out_dict)
df = pd.DataFrame(out)
return df
df_foyer = copule_to_df(dictionnaire_fichier, 0)
df_rk = copule_to_df(dictionnaire_fichier, 1)
df_rk_moyen = copule_to_df(dictionnaire_fichier, -1)
df_foyer[56].min()
def copule_to_df_mean_rk(copule):
out = []
for bucket in copule:
for i, sub_bucket in enumerate(bucket["nonzerobuckets"]):
out_dict = {
"lower_rfr": bucket["lower_bound"],
"mean_rk": sub_bucket[1] / sub_bucket[0],
}
out.append(out_dict)
df = pd.DataFrame(out)
return df
df_rfr_rk_moyen_yo_plot = copule_to_df_mean_rk(dictionnaire_fichier)
df_rfr_rk_moyen_yo_plot.head(3)
def sum_copule_col(row):
"""
Somme toute les colonnes de VAr d'un bucket
"""
sum = 0
for i in range(99 + 1):
sum += row[i]
return sum
df_rk["sum_rk"] = df_rk.apply(sum_copule_col, axis=1)
print(f"Sommes des revenus du capital {df_rk['sum_rk'].sum():,} €")
assert 33836323865 == df_rk["sum_rk"].sum()
import plotly.express as px
fig = px.scatter(df_rfr_rk_moyen_yo_plot, x="lower_rfr", y="mean_rk", trendline="ols")
fig.show()
pd.options.display.float_format = "{:,.2f}".format
# df_foyer
Dans la tranche des personnes ayant un RFR entre 0 et 8 euros, il y a 2 595 993 de foyers qui n'ont pas de revenus du capital et 148 595 qui en ont.
df = df_rk_moyen.drop(
[
"lower_bound",
"upper_bound",
"nb_people_zero",
"nb_people_nonzero",
],
axis=1,
)
cm = sns.light_palette("green", as_cmap=True)
s = df.style.background_gradient(cmap=cm)
# s
1486 * 100
#!poetry run python -m ipykernel install --name leximpact-socio-fiscal-simu-etat-kernel --user
df = df_foyer.drop(
[
"lower_bound",
"upper_bound",
"nb_people_zero",
"nb_people_nonzero",
],
axis=1,
)
cm = sns.light_palette("green", as_cmap=True)
s = df.style.background_gradient(cmap=cm)
# s
pd.options.display.float_format = "{:,.2f}".format
df_rk
df = df_rk.drop(
[
"lower_bound",
"upper_bound",
"nb_people_zero",
"nb_people_nonzero",
],
axis=1,
)
cm = sns.light_palette("green", as_cmap=True)
s = df.style.background_gradient(cmap=cm)
# s