From 3129e51fe2bafa716de8e9cc5952815393ad9ee8 Mon Sep 17 00:00:00 2001
From: benoit-cty <benoit-cty@leximpact.dev>
Date: Mon, 24 Mar 2025 17:38:29 +0100
Subject: [PATCH] wip: fix CI

---
 .gitlab-ci.yml       |  8 ++++----
 deploy/Dockerfile-ci |  1 +
 deploy/README.md     | 13 ++++++++++---
 pyproject.toml       | 36 +++++++++++++++++-------------------
 4 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index be72c2d8..d5f27252 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -39,14 +39,16 @@ cache:
     - ${PRE_COMMIT_HOME}
 
 before_script:
-  # git is needed for pre-commit
-  - pip install --find-links=${PIP_DOWNLOAD_DIR} poetry # STEP 2
   - cp deploy/.env-ci notebooks/.env
   - cp deploy/.env-ci leximpact_prepare_data/.env
   - cp deploy/.env-ci .env 
   - sed -i "s/BRANCH_NAME/$OUT_FOLDER/" notebooks/.env
   - sed -i "s/BRANCH_NAME/$OUT_FOLDER/" leximpact_prepare_data/.env
   - sed -i "s/BRANCH_NAME/$OUT_FOLDER/" .env
+  # Run install on every job to ensure that the environment is up to date. Cache will be used.
+  - poetry config virtualenvs.in-project true
+  - poetry install --extras "pipeline"
+  - poetry run pre-commit install --install-hooks
 
 build docker image:
   stage: docker
@@ -67,8 +69,6 @@ build docker image:
 install:
   stage: init
   script:
-#     # Allow caching by only downloading first:
-#     - pip download --dest=${PIP_DOWNLOAD_DIR} poetry # STEP 1
     - poetry config virtualenvs.in-project true
     - poetry install --extras "pipeline"
     - poetry run pre-commit install --install-hooks
diff --git a/deploy/Dockerfile-ci b/deploy/Dockerfile-ci
index 8cb8a33b..94013817 100644
--- a/deploy/Dockerfile-ci
+++ b/deploy/Dockerfile-ci
@@ -4,6 +4,7 @@ FROM python:3.10-slim
 RUN apt-get update && apt-get install -y pandoc texlive-latex-base texlive-fonts-recommended texlive-latex-extra git make curl
 
 RUN pip install poetry==2.1.1
+RUN poetry config virtualenvs.in-project true
 
 # Install Leximpact prepare data
 WORKDIR /src/leximpact-prepare-data
diff --git a/deploy/README.md b/deploy/README.md
index 60ed7f94..bb717ea0 100644
--- a/deploy/README.md
+++ b/deploy/README.md
@@ -21,7 +21,7 @@ cd ..
 
 Depuis la racine du projet `leximpact-prepare-data`:
 ```bash
-docker build -t leximpact/prepare-data:0.0.1 -f deploy/Dockerfile-Debian11-Python39 .
+docker build -t leximpact/prepare-data:0.0.1 -f deploy/Dockerfile-ci .
 ## Construction de l'image
 
 Il faut récupérer le projet Git :
@@ -43,7 +43,7 @@ cd ..
 
 Depuis la racine du projet `leximpact-prepare-data`:
 ```bash
-docker build -t leximpact-prepare-data -f deploy/Dockerfile-Debian11-Python39 .
+docker build -t leximpact-prepare-data -f deploy/Dockerfile-ci .
 ```
 
 ## Copier les fichiers de données
@@ -68,7 +68,14 @@ docker run -v $PWD/data-in/:/mnt/data-in/ -v $PWD/data-out/:/mnt/data-out/ lexim
 
 ## Instancier l'image et obtenir un shell dans le conteneur
 
-`docker run -it -v $PWD/data-in/:/mnt/data-in/ -v $PWD/data-out/:/mnt/data-out/ leximpact/prepare-data:0.0.1 bash`
+```bash
+docker run -it -v $PWD/data-in/:/mnt/data-in/ -v $PWD/data-out/:/mnt/data-out/ leximpact/prepare-data:latest bash
+git fetch
+git switch relance_data
+git pull
+make install
+poetry run pre-commit run --all-files --show-diff-on-failure
+```
 
 ## Publication de l'image
 
diff --git a/pyproject.toml b/pyproject.toml
index ec3c054a..51c379a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,6 +4,23 @@ pipeline = "leximpact_prepare_data.run_pipeline:run_pipeline"
 
 [tool.poetry.group.dev.dependencies]
 pydocstyle = "^6.3.0"
+seaborn = "^0.13.2"
+jupyter = "^1.0.0"
+jupyterlab = "^3.2.8"
+ipykernel = "^6.2.0"
+lab = "^6.4"
+# pytest = "^5.2" Tiré par OpenFisca-France-Data
+papermill = "^2.3.3"
+plotly = "^5.4.0"
+xlrd = "^2.0.1"  # For Excel
+openpyxl = "^3.0.9"  # For Excel
+nbdev = "^2.0.0"
+# markupsafe = "2.0.1" # For NBDev : https://github.com/pallets/markupsafe/issues/284
+pre-commit = "*"
+nbqa = ">=1.2.3"
+flake8 = ">=3.9.0"
+black = ">=21.5b1"
+toml = "^0.10.2"
 
 [tool.poetry]
 name = "leximpact_prepare_data"
@@ -43,25 +60,6 @@ pip = "^23.2"
 pypandoc = "^1.11"
 dtale = "^3.3.0"
 
-[tool.poetry.dev-dependencies]
-seaborn = "^0.13.2"
-jupyter = "^1.0.0"
-jupyterlab = "^3.2.8"
-ipykernel = "^6.2.0"
-lab = "^6.4"
-# pytest = "^5.2" Tiré par OpenFisca-France-Data
-papermill = "^2.3.3"
-plotly = "^5.4.0"
-xlrd = "^2.0.1"  # For Excel
-openpyxl = "^3.0.9"  # For Excel
-nbdev = "^2.0.0"
-# markupsafe = "2.0.1" # For NBDev : https://github.com/pallets/markupsafe/issues/284
-pre-commit = "*"
-nbqa = ">=1.2.3"
-flake8 = ">=3.9.0"
-black = ">=21.5b1"
-toml = "^0.10.2"
-
 [tool.poetry_bumpversion.file."leximpact_prepare_data/__init__.py"]
 [tool.poetry_bumpversion.file."settings.ini"]
 [tool.poetry_bumpversion.file.".conda/meta.yaml"]
-- 
GitLab