Skip to content
Snippets Groups Projects
Commit 6476fb6c authored by Emmanuel Raviart's avatar Emmanuel Raviart
Browse files

Divise les lignes qui ont plusieurs décrets

parent 5e59c179
Branches
No related tags found
No related merge requests found
......@@ -75,6 +75,38 @@ const yesterday = new Date()
yesterday.setDate(yesterday.getDate() - 1)
const yesterdayString = yesterday.toISOString().split("T")[0]
function* walkAElementsOrTexts(
dossier: DossierLegislatif,
element: Element,
trIndex: number,
): Generator<HTMLAnchorElement | Text, void, unknown> {
if (element.tagName === "A") {
yield element as HTMLAnchorElement
} else {
for (const childNode of element.childNodes) {
switch (childNode.nodeType) {
case 1: {
// Node.ELEMENT_NODE
yield* walkAElementsOrTexts(dossier, childNode as Element, trIndex)
break
}
case 3: {
// Node.TEXT_NODE
yield childNode as Text
break
}
default: {
throw new Error(
`Dans le dossier ${dossier.META.META_COMMUN.ID}, à la ligne ${trIndex}, nœud de type inattendu ${childNode.nodeType} dans :\n${element.innerHTML}`,
)
}
}
}
}
}
async function addLignes({
ARTICLE,
BASE_LEGALE,
......@@ -96,19 +128,45 @@ async function addLignes({
trIndex: number
withHtml: boolean
}): Promise<void> {
const aElements = objectifTdElement.querySelectorAll("a")
let cidLoiCible: string | undefined = undefined
if (aElements.length > 1) {
const lignesInfos: Array<{
aElementOrText: HTMLAnchorElement | Text
cidLoiCible?: string
}> = []
for (const aElementOrText of walkAElementsOrTexts(
dossier,
objectifTdElement,
trIndex,
)) {
switch (aElementOrText.nodeType) {
case 1: {
// Node.ELEMENT_NODE with tagName === "A"
const aElement = aElementOrText as HTMLAnchorElement
const decret = aElement.textContent?.trim() || undefined
if (decret !== undefined && decretRegexp.test(decret)) {
const aElementHref = aElement.getAttribute("href")
const match = aElementHref?.match(
/^(https:\/\/www\.legifrance\.gouv.fr)?\/(jorf|loda)\/id\/(JORFTEXT\d{12})\/?(\?|$)/,
)
if (match == null) {
throw new Error(
`Dans le dossier ${dossier.META.META_COMMUN.ID}, à la ligne ${trIndex} et la colonne Objectif de l'échéancier, se trouvent plusieurs liens au lieu d'un`,
`Dans le dossier ${dossier.META.META_COMMUN.ID}, à la ligne ${trIndex} et la colonne Objectif/Décret de l'échéancier, le lien ${aElementHref} ne pointe pas vers un JORFTEXT`,
)
}
if (aElements.length === 0) {
const decret = objectifTdElement.textContent?.trim() || undefined
lignesInfos.push({
aElementOrText,
cidLoiCible: match[3],
})
}
break
}
case 3: {
// Node.TEXT_NODE
const decret = aElementOrText.nodeValue?.trim() || undefined
if (decret !== undefined) {
const match = decret.match(decretRegexp)
if (match !== null) {
cidLoiCible = (
const cidLoiCible = (
await legiDb<{ id: string }[]>`
SELECT id
FROM texte_version
......@@ -117,28 +175,80 @@ async function addLignes({
AND data -> 'META' -> 'META_COMMUN' ->> 'ORIGINE' = 'JORF'
`
)[0]?.id
lignesInfos.push({
aElementOrText,
cidLoiCible,
})
}
}
} else {
const aElement = aElements[0]
const decret = aElement.textContent?.trim() || undefined
if (decret !== undefined && decretRegexp.test(decret)) {
const aElementHref = aElement.getAttribute("href")
const match = aElementHref?.match(
/^(https:\/\/www\.legifrance\.gouv.fr)?\/(jorf|loda)\/id\/(JORFTEXT\d{12})\/?(\?|$)/,
)
if (match == null) {
break
}
default: {
throw new Error(
`Dans le dossier ${dossier.META.META_COMMUN.ID}, à la ligne ${trIndex} et la colonne Objectif de l'échéancier, le lien ${aElementHref} ne pointe pas vers un JORFTEXT`,
`Dans le dossier ${dossier.META.META_COMMUN.ID}, à la ligne ${trIndex}, nœud de type inattendu ${aElementOrText.nodeType} dans :\n${objectifTdElement.innerHTML}`,
)
}
cidLoiCible = match[3]
}
}
if (lignesInfos.length === 0) {
const decretText = objectifTdElement.textContent?.trim() || undefined
const decretHtml = withHtml
? cleanDecretTdElement(objectifTdElement)?.innerHTML.trim() || undefined
? cleanDecretElement(objectifTdElement).innerHTML.trim() || undefined
: undefined
lignes.push({
ARTICLE,
BASE_LEGALE,
CID_LOI_CIBLE: undefined,
DATE_PREVUE,
DECRET: decretText,
DECRET_HTML: decretHtml === decretText ? undefined : decretHtml,
NUMERO_ORDRE: (lignes.length + 1).toString(),
OBJET,
})
} else {
for (const [
index,
{ aElementOrText, cidLoiCible },
] of lignesInfos.entries()) {
// Look for largest HTML element containing only this decree.
let containingNode: HTMLElement | Text = objectifTdElement
iterOthers: for (const [
otherIndex,
{ aElementOrText: otherAElementOrText },
] of lignesInfos.entries()) {
if (otherIndex == index) {
continue
}
for (
let ancestor: HTMLElement | Text | null = aElementOrText;
ancestor !== null && ancestor !== containingNode;
ancestor = ancestor.parentElement
) {
for (
let otherAncestor: HTMLElement | Text | null = otherAElementOrText;
otherAncestor !== null;
otherAncestor = otherAncestor.parentElement
) {
if (ancestor.parentElement === otherAncestor) {
containingNode = ancestor
continue iterOthers
}
if (otherAncestor === objectifTdElement) {
break
}
}
}
}
const decretText = containingNode.textContent?.trim() || undefined
const decretHtml = withHtml
? containingNode.nodeType === 1 /* Node.Element */
? cleanDecretElement(containingNode as HTMLElement)[
containingNode === objectifTdElement ? "innerHTML" : "outerHTML"
].trim() || undefined
: decretText
: undefined
lignes.push({
ARTICLE,
......@@ -147,20 +257,19 @@ async function addLignes({
DATE_PREVUE,
DECRET: decretText,
DECRET_HTML: decretHtml === decretText ? undefined : decretHtml,
NUMERO_ORDRE: trIndex.toString(),
NUMERO_ORDRE: (lignes.length + 1).toString(),
OBJET,
})
}
}
}
function assertNever(name: string, value: never): never {
throw `Unexpected value for "${name}" : ${value}`
}
function cleanDecretTdElement(
tdElement?: HTMLTableCellElement,
): HTMLTableCellElement | undefined {
if (tdElement !== undefined) {
tdElement.querySelectorAll("a").forEach((aElement) => {
function cleanDecretElement(containingElement: HTMLElement): HTMLElement {
containingElement.querySelectorAll("a").forEach((aElement) => {
const url = aElement.getAttribute("href")
if (url !== null) {
aElement.setAttribute(
......@@ -192,8 +301,7 @@ function cleanDecretTdElement(
aElement.append(svg)
}
})
}
return tdElement
return containingElement
}
async function generateEcheanciersJson({
......@@ -287,6 +395,9 @@ async function generateEcheanciersJson({
}),
)
for (const [loiJorfId, dossier] of Object.entries(dossierByLoiJorfId)) {
// Replace dossier.CONTENU.ECHEANCIER with its normalized version.
// Normalize new version of échéancier.
const echeancierLegifranceHtmlFilePath = path.join(
doleEcheanciersHtmlDir,
`${dossier.META.META_COMMUN.ID}_echeancier_table.html`,
......@@ -302,21 +413,18 @@ async function generateEcheanciersJson({
dossier,
echeancierLegifranceHtmlText,
)
if (
echeancier !== undefined &&
(dossier.CONTENU.ECHEANCIER === undefined ||
hashEcheancier(echeancier) !==
hashEcheancier(dossier.CONTENU.ECHEANCIER))
) {
// Échéancier has been improved. Change it in dossier.CONTENU.ECHEANCIER,
// but if latest échéancier in history is the same as the improved one,
// use the latest échéancier in history, to preserve @derniere_maj.
if (echeancier !== undefined) {
// Retrieve latest normalized version of échéancier.
const echeancierFilename = `${loiJorfId}.json`
const echeancierFilePath = path.join(echeanciersDir, echeancierFilename)
const echeanciers = (await fs.pathExists(echeancierFilePath))
? ((await fs.readJson(echeancierFilePath)) as LoiEtMesures).echeanciers
: []
const latestEcheancier = echeanciers[0]
// if the latest échéancier in history is the same as the new one,
// use the latest échéancier in history, to preserve @derniere_maj.
dossier.CONTENU.ECHEANCIER =
latestEcheancier !== undefined &&
hashEcheancier(latestEcheancier) === hashEcheancier(echeancier)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment