Skip to content
Snippets Groups Projects
Commit 39c0e240 authored by Emmanuel Raviart's avatar Emmanuel Raviart
Browse files

fix: ignore first row of HTML table when is contains only a single column

parent 2d5ffe11
No related branches found
No related tags found
No related merge requests found
......@@ -1003,11 +1003,17 @@ async function normalizeEcheancier(
} else {
// 2024 and later syntax
let skipNumeroOrdreColumn = false
let expectingLabels = true
for (const [trIndex, trElement] of document
.querySelectorAll("table tbody tr")
.entries()) {
const tdElements = [...trElement.querySelectorAll("td")]
if (trIndex === 0) {
if (expectingLabels) {
if (tdElements.length === 1) {
// The row contains a title of the table. Ignore it.
// See for example:
// https://www.legifrance.gouv.fr/dossierlegislatif/JORFDOLE000050427907/?detailType=CONTENU&detailId=1
} else {
const texts = tdElements.map((tdElement) =>
tdElement?.textContent?.trim(),
)
......@@ -1025,6 +1031,8 @@ async function normalizeEcheancier(
`Dans le dossier ${dossier.META.META_COMMUN.ID}, les entêtes de l'échéancier (${texts.join(", ")}) ne correspondent pas à ceux attendus`,
)
}
expectingLabels = false
}
} else if (tdElements.length <= 2) {
// This is a followup line: same article but different decret.
const { ARTICLE, BASE_LEGALE, OBJET } = lignes.at(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment