Skip to content
Snippets Groups Projects
Commit 839688e6 authored by Benoît Courty's avatar Benoît Courty
Browse files

fix logements

parent 81e222dd
No related branches found
No related tags found
1 merge request!54Mise à jour de DataCirco
Pipeline #20083 failed
......@@ -15,6 +15,7 @@ MODULE_FOLDER=$DATA/`echo $MODULE_NAME | sed 's/-.*//'`
echo "MODULE_FOLDER: $MODULE_FOLDER"
cd $MODULE_FOLDER
# if the module name is 'entreprises', then we have to run 'entreprises-01.py' and 'entreprises-02.py'
if [ $MODULE_NAME == "entreprises" ]; then
echo "Running python3 $RACINE/populate_db/entreprises-01.py"
......@@ -22,6 +23,12 @@ if [ $MODULE_NAME == "entreprises" ]; then
echo "Running python3 $RACINE/populate_db/entreprises-02.py"
python3 $RACINE/populate_db/entreprises-02.py
echo "Done script at `date` for $MODULE_NAME"
elif [ $MODULE_NAME == "logements" ]; then
echo "Running python3 $RACINE/populate_db/logements-01.py"
python3 $RACINE/populate_db/logements-01.py
echo "Running python3 $RACINE/populate_db/logements-02.py"
python3 $RACINE/populate_db/logements-02.py
echo "Done script at `date` for $MODULE_NAME"
elif [ $MODULE_NAME == "meteo" ]; then
echo "Running python3 $RACINE/populate_db/meteo_convert.py"
python3 $RACINE/populate_db/meteo_convert.py
......
......@@ -32,7 +32,7 @@ class GetRevenu:
self.data.revenu.sources = utilitaires.get_sources("revenu")
def get_carreaux_revenu(self):
"""A partir de la table insee_pop_2020, récupérer la valeur Men_pauv du
"""A partir de la table insee_pop_2021, récupérer la valeur Men_pauv du
nombre de ménages pauvres et en obtenir la proportion en la divisant
par le nombre de ménages."""
# On vérifie d'abord le rapport entre valeurs imputées et nombres de carreaux total de la circonscription
......@@ -226,8 +226,8 @@ class GetRevenu:
self.db.mogrify(
f"""WITH somme_totale AS (
SELECT SUM(st_area(st_intersection(iris2.wkb_geometry, circo2.wkb_geometry)::geography)*popi2.p20_pop::numeric) as pond_revenu
FROM insee_pop_2020 as popi2
JOIN insee_revenu_pauvrete_2020 as reveni2 ON popi2.iris = reveni2.iris
FROM insee_pop_2021 as popi2
JOIN insee_revenu_pauvrete_2021 as reveni2 ON popi2.iris = reveni2.iris
JOIN iris_ge AS iris2 ON iris2.code_iris = popi2.iris
JOIN zone_circo AS circo2 ON st_intersects(iris2.wkb_geometry, circo2.wkb_geometry)
WHERE circo2.ref = '{self.circo}'
......@@ -245,8 +245,8 @@ class GetRevenu:
SUM(cast(t.DISP_D920 as float)*t.pop*t.surface/pond_revenu)
from somme_totale, (
SELECT reveni.*, st_area(st_intersection(iris.wkb_geometry, circo.wkb_geometry)::geography) as surface, popi.p20_pop::numeric as pop
FROM insee_pop_2020 as popi
JOIN insee_revenu_pauvrete_2020 as reveni ON popi.iris = reveni.iris
FROM insee_pop_2021 as popi
JOIN insee_revenu_pauvrete_2021 as reveni ON popi.iris = reveni.iris
JOIN iris_ge AS iris ON iris.code_iris = popi.iris
JOIN zone_circo AS circo ON st_intersects(iris.wkb_geometry, circo.wkb_geometry)
WHERE circo.ref = '{self.circo}'
......@@ -257,17 +257,17 @@ class GetRevenu:
)
dict_deciles = dict(liste_circo=list(self.db.fetchone()))
# On remplace par les valeurs de l'INSEE en 2020
# On remplace par les valeurs de l'INSEE en 2021
# Source : https://www.insee.fr/fr/statistiques/2417897#tableau-figure1
dict_deciles["liste_nationale"] = [
9980,
15070,
18140,
20700,
23140,
25800,
28640,
32550,
38550,
9_280,
14_740,
17_780,
20_610,
23_120,
25_740,
28_730,
32_630,
38_990,
]
self.data.revenu.dict_deciles = dict_deciles
from datacirco import utilitaires
from tqdm import tqdm
from datacirco.connexion_db import db, run
utilitaires.start("logement-01.py")
print(" ------------- Logements (Insee)")
annees = ["2018", "2021"]
for annee in annees:
print(f"\tBase logement {annee}")
utilitaires.unzip_file(f"base-ic-logement-{annee}_csv.zip")
fields, _ = utilitaires.get_fields(f"base-ic-logement-{annee}.CSV", ";")
run(
f"""
DROP TABLE IF EXISTS insee_logement_{annee} CASCADE;
CREATE TABLE insee_logement_{annee} ({fields});
"""
)
with open(f"base-ic-logement-{annee}.CSV", "r") as f:
db.copy_expert(
f"COPY insee_logement_{annee} FROM STDIN WITH (FORMAT CSV, HEADER TRUE, DELIMITER ';')",
f,
)
run(
f"CREATE INDEX IF NOT EXISTS insee_logement_{annee}_iris ON insee_logement_{annee} (iris);"
)
print("Import de ademe_dpe_logements_pre_2021")
fields, _ = utilitaires.get_fields("dpe_01.csv", ",")
run(
f"""
DROP TABLE IF EXISTS ademe_dpe_logements CASCADE;
CREATE TABLE IF NOT EXISTS ademe_dpe_logements ({fields});
TRUNCATE ademe_dpe_logements;
"""
)
for d in tqdm([*range(1, 20), "2a", "2b", *range(21, 96)]):
with open(f"dpe_{str(d).zfill(2)}.csv", "r") as f:
db.copy_expert(
"COPY ademe_dpe_logements FROM STDIN WITH (FORMAT CSV, HEADER TRUE)", f
)
table_name = "ademe_dpe_logements"
print("Création des géométries : c'est long...")
run(
f"""
ALTER TABLE {table_name} ADD geom geometry;
UPDATE {table_name} SET geom=ST_SetSRID(ST_Makepoint(longitude::numeric, latitude::numeric),4326);
CREATE INDEX {table_name}_geom ON {table_name} USING GIST (geom);
CLUSTER {table_name} USING {table_name}_geom;
-- Pour les filtres principaux
CREATE INDEX idx_dpe_code_insee ON ademe_dpe_logements(code_insee_commune_actualise);
CREATE INDEX idx_dpe_date ON ademe_dpe_logements((LEFT(date_etablissement_dpe, 4)));
CREATE INDEX idx_dpe_classe_ges ON ademe_dpe_logements(classe_estimation_ges);
"""
)
run(
"""
CREATE INDEX IF NOT EXISTS ademe_dpe_logements_depcom ON ademe_dpe_logements (code_insee_commune_actualise);
DROP MATERIALIZED VIEW IF EXISTS ademe_dpe_logements_stats;
CREATE MATERIALIZED VIEW ademe_dpe_logements_stats AS
SELECT
classe_consommation_energie,
left(date_etablissement_dpe,4) as annee,
code_insee_commune_actualise,
count(*) as nb
FROM ademe_dpe_logements
GROUP BY 1,2,3;
"""
)
print("-- correction code Insee incorrects")
run(
"""
with u as (
select d.code_postal, commune, code_insee_commune_actualise, cp.code_commune_insee
from ademe_dpe_logements d
join laposte_cp cp
on (
cp.code_postal=trim(d.code_postal)
and libelle_d_acheminement=trim(regexp_replace(upper(unaccent(commune)),'[^A-Z]+',' ','g'))
)
where code_insee_commune_actualise !~ '^\d(\d|[AB])\d\d\d$' group by 1,2,3,4
)
update ademe_dpe_logements o
set code_insee_commune_actualise=u.code_commune_insee
from u
where o.code_insee_commune_actualise=u.code_insee_commune_actualise
and o.code_postal=u.code_postal
and o.commune=u.commune;
"""
)
run(
"""
with u as (
select d.code_postal, commune, code_insee_commune_actualise,cp.code_commune_insee
from ademe_dpe_logements d
join laposte_cp cp on (cp.code_postal=trim(d.code_postal)
and libelle_d_acheminement=trim(regexp_replace(upper(unaccent(commune)),'[^A-Z]+',' ','g')))
left join ign_commune c on (c.insee_com = d.code_insee_commune_actualise)
left join ign_arrondissement_municipal a on (insee_arm = d.code_insee_commune_actualise)
where c.insee_com is null
and a.insee_arm is null
group by 1,2,3,4
)
update ademe_dpe_logements o
set code_insee_commune_actualise=u.code_commune_insee
from u
where o.code_insee_commune_actualise=u.code_insee_commune_actualise
and o.code_postal=u.code_postal
and o.commune=u.commune;
"""
)
utilitaires.end("logement-01.py")
......@@ -6,121 +6,7 @@ from datacirco.connexion_db import db, run, engine
import glob
import os
utilitaires.start("logement.py")
print(" ------------- Logements (Insee)")
annees = ["2018", "2021"]
for annee in annees:
print(f"\tBase logement {annee}")
utilitaires.unzip_file(f"base-ic-logement-{annee}_csv.zip")
fields, _ = utilitaires.get_fields(f"base-ic-logement-{annee}.CSV", ";")
run(
f"""
DROP TABLE IF EXISTS insee_logement_{annee} CASCADE;
CREATE TABLE insee_logement_{annee} ({fields});
"""
)
with open(f"base-ic-logement-{annee}.CSV", "r") as f:
db.copy_expert(
f"COPY insee_logement_{annee} FROM STDIN WITH (FORMAT CSV, HEADER TRUE, DELIMITER ';')",
f,
)
run(
f"CREATE INDEX IF NOT EXISTS insee_logement_{annee}_iris ON insee_logement_{annee} (iris);"
)
print("Import de ademe_dpe_logements_pre_2021")
fields, _ = utilitaires.get_fields("dpe_01.csv", ",")
run(
f"""
DROP TABLE IF EXISTS ademe_dpe_logements CASCADE;
CREATE TABLE IF NOT EXISTS ademe_dpe_logements ({fields});
TRUNCATE ademe_dpe_logements;
"""
)
for d in tqdm([*range(1, 20), "2a", "2b", *range(21, 96)]):
with open(f"dpe_{str(d).zfill(2)}.csv", "r") as f:
db.copy_expert(
"COPY ademe_dpe_logements FROM STDIN WITH (FORMAT CSV, HEADER TRUE)", f
)
table_name = "ademe_dpe_logements"
print("Création des géométries : c'est long...")
run(
f"""
ALTER TABLE {table_name} ADD geom geometry;
UPDATE {table_name} SET geom=ST_SetSRID(ST_Makepoint(longitude::numeric, latitude::numeric),4326);
CREATE INDEX {table_name}_geom ON {table_name} USING GIST (geom);
CLUSTER {table_name} USING {table_name}_geom;
-- Pour les filtres principaux
CREATE INDEX idx_dpe_code_insee ON ademe_dpe_logements(code_insee_commune_actualise);
CREATE INDEX idx_dpe_date ON ademe_dpe_logements((LEFT(date_etablissement_dpe, 4)));
CREATE INDEX idx_dpe_classe_ges ON ademe_dpe_logements(classe_estimation_ges);
"""
)
run(
"""
CREATE INDEX IF NOT EXISTS ademe_dpe_logements_depcom ON ademe_dpe_logements (code_insee_commune_actualise);
DROP MATERIALIZED VIEW IF EXISTS ademe_dpe_logements_stats;
CREATE MATERIALIZED VIEW ademe_dpe_logements_stats AS
SELECT
classe_consommation_energie,
left(date_etablissement_dpe,4) as annee,
code_insee_commune_actualise,
count(*) as nb
FROM ademe_dpe_logements
GROUP BY 1,2,3;
"""
)
print("-- correction code Insee incorrects")
run(
"""
with u as (
select d.code_postal, commune, code_insee_commune_actualise, cp.code_commune_insee
from ademe_dpe_logements d
join laposte_cp cp
on (
cp.code_postal=trim(d.code_postal)
and libelle_d_acheminement=trim(regexp_replace(upper(unaccent(commune)),'[^A-Z]+',' ','g'))
)
where code_insee_commune_actualise !~ '^\d(\d|[AB])\d\d\d$' group by 1,2,3,4
)
update ademe_dpe_logements o
set code_insee_commune_actualise=u.code_commune_insee
from u
where o.code_insee_commune_actualise=u.code_insee_commune_actualise
and o.code_postal=u.code_postal
and o.commune=u.commune;
"""
)
run(
"""
with u as (
select d.code_postal, commune, code_insee_commune_actualise,cp.code_commune_insee
from ademe_dpe_logements d
join laposte_cp cp on (cp.code_postal=trim(d.code_postal)
and libelle_d_acheminement=trim(regexp_replace(upper(unaccent(commune)),'[^A-Z]+',' ','g')))
left join ign_commune c on (c.insee_com = d.code_insee_commune_actualise)
left join ign_arrondissement_municipal a on (insee_arm = d.code_insee_commune_actualise)
where c.insee_com is null
and a.insee_arm is null
group by 1,2,3,4
)
update ademe_dpe_logements o
set code_insee_commune_actualise=u.code_commune_insee
from u
where o.code_insee_commune_actualise=u.code_insee_commune_actualise
and o.code_postal=u.code_postal
and o.commune=u.commune;
"""
)
utilitaires.start("logement-02.py")
print(
"Creation d'une MATERIALIZED VIEW pour les DPE 2013 à 2021, cela va être long... 14 minutes"
......@@ -128,6 +14,7 @@ print(
run(
"""
-- Création d'une table de liaison circonscription/communes
DROP MATERIALIZED VIEW IF EXISTS mv_circo_communes;
CREATE MATERIALIZED VIEW mv_circo_communes AS
SELECT ref, unnest(communes) AS code_insee
FROM zone_circo;
......@@ -136,6 +23,7 @@ FROM zone_circo;
CREATE INDEX idx_mv_circo_communes ON mv_circo_communes(ref, code_insee);
-- Vue matérialisée principale
DROP MATERIALIZED VIEW IF EXISTS mv_dpe_stats_2013_2021;
CREATE MATERIALIZED VIEW mv_dpe_stats_2013_2021 AS
SELECT
COALESCE(c1.ref, c2.ref) AS circonscription,
......@@ -359,4 +247,4 @@ run(
"""
)
utilitaires.end("logement.py")
utilitaires.end("logement-02.py")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment