Skip to content

Commit 263be2f

Browse files
committed
kumottujenpoisto-endpoint
1 parent db86355 commit 263be2f

File tree

2 files changed

+201
-2
lines changed

2 files changed

+201
-2
lines changed

backend/src/app.ts

Lines changed: 143 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@ import statuteRouter from './controllers/statute.js';
77
import judgmentRouter from './controllers/judgment.js';
88
import keywordRouter from './controllers/keyword.js';
99
import judgmentKeywordRouter from './controllers/judgmentKeyword.js';
10+
import axios from 'axios';
1011
import { fileURLToPath } from 'url';
1112
import { runSetup } from './dbSetup.js';
1213
import { getLatestStatusEntry, getAllStatusEntries, clearAllStatusEntries } from './db/models/status.js';
1314
import { addStatusRow, createTables, dropTables, dropJudgmentsTables, createJudgmentsTables, deleteStatutesByYear, normalizeStatuteTitlesByYear } from './db/db.js';
1415
import { VALID_LANGUAGES, yearFrom, yearTo } from './util/config.js';
15-
import { buildFinlexUrl, buildJudgmentUrl, listStatutesByYear, setSingleJudgment, setSingleStatute } from './db/load.js';
16+
import { buildFinlexUrl, buildJudgmentUrl, listStatutesByYear, setSingleJudgment, setSingleStatute, parseIsInForceFromXml, finlexLimiter, fetchWithBackoff, startFinlexLimiterLogging, stopFinlexLimiterLogging } from './db/load.js';
1617
import type { JudgmentKey } from './types/judgment.js';
1718
import type { StatuteKey } from './types/statute.js';
1819
import { getRecentLogs, pushLog } from './util/logBuffer.js';
@@ -494,6 +495,147 @@ app.get('/api/admin/check-title-issues', verifyAdminToken, async (req: express.R
494495
}
495496
});
496497

498+
app.post('/api/admin/purge-not-in-force-statutes', verifyAdminToken, async (req: express.Request, res: express.Response): Promise<void> => {
499+
const dryRun = String(req.query?.dryRun ?? 'false') === 'true';
500+
const startYear = req.query.startYear ? parseInt(String(req.query.startYear), 10) : null;
501+
const endYear = req.query.endYear ? parseInt(String(req.query.endYear), 10) : null;
502+
503+
console.log(`[maintenance] purge-not-in-force-statutes started dryRun=${dryRun} startYear=${startYear ?? '-'} endYear=${endYear ?? '-'}`);
504+
startFinlexLimiterLogging();
505+
506+
try {
507+
const { rows } = await query(
508+
`SELECT uuid, year, number, language, version
509+
FROM statutes
510+
ORDER BY year ASC, number ASC, language ASC`
511+
);
512+
513+
let checkedTotal = 0;
514+
let deletedTotal = 0;
515+
let fetchFailed = 0;
516+
517+
let currentYear: number | null = null;
518+
let checkedThisYear = 0;
519+
let deletedThisYear = 0;
520+
521+
const headers = { Accept: 'application/xml', 'Accept-Encoding': 'gzip' };
522+
523+
for (const row of rows) {
524+
const year = Number(row.year);
525+
526+
if (startYear !== null && year < startYear) continue;
527+
if (endYear !== null && year > endYear) continue;
528+
529+
if (currentYear === null) {
530+
currentYear = year;
531+
checkedThisYear = 0;
532+
deletedThisYear = 0;
533+
console.log(`[maintenance] Year ${currentYear}: starting`);
534+
} else if (year !== currentYear) {
535+
console.log(`[maintenance] Year ${currentYear}: done checked=${checkedThisYear} deleted=${deletedThisYear}`);
536+
currentYear = year;
537+
checkedThisYear = 0;
538+
deletedThisYear = 0;
539+
console.log(`[maintenance] Year ${currentYear}: starting`);
540+
}
541+
542+
checkedTotal += 1;
543+
checkedThisYear += 1;
544+
545+
const baseUrl = 'https://opendata.finlex.fi/finlex/avoindata/v1/akn/fi/act';
546+
547+
// Build candidate URLs (try in this exact order)
548+
const candidates: string[] = [
549+
// 1) canonical / work-level endpoint (often best)
550+
`${baseUrl}/statute-consolidated/${row.year}/${row.number}/`,
551+
552+
// 2) expression-level, language but no version
553+
`${baseUrl}/statute-consolidated/${row.year}/${row.number}/${row.language}@`,
554+
];
555+
556+
if (row.version) {
557+
// 3) expression-level, language + version
558+
candidates.push(
559+
`${baseUrl}/statute-consolidated/${row.year}/${row.number}/${row.language}@${row.version}`
560+
);
561+
}
562+
563+
let xml: string | null = null;
564+
let attemptedUris: string[] = [];
565+
566+
for (const u of candidates) {
567+
attemptedUris.push(u);
568+
try {
569+
xml = (await fetchWithBackoff<string>(u, { headers })).data;
570+
break; // success
571+
} catch {
572+
// try next candidate
573+
}
574+
}
575+
576+
if (!xml) {
577+
fetchFailed += 1;
578+
if (fetchFailed <= 20 || fetchFailed % 200 === 0) {
579+
console.warn(
580+
`[maintenance] Finlex fetch failed for ${row.year}/${row.number}/${row.language}@${row.version ?? ''} ` +
581+
`tried=${attemptedUris.join(' | ')}`
582+
);
583+
}
584+
continue;
585+
}
586+
587+
const isInForce = await parseIsInForceFromXml(xml);
588+
589+
if (isInForce === false) {
590+
if (!dryRun) {
591+
await query('DELETE FROM statutes WHERE uuid = $1', [row.uuid]);
592+
}
593+
594+
deletedTotal += 1;
595+
deletedThisYear += 1;
596+
597+
console.log(
598+
`[maintenance] deleted (isInForce=false): ` +
599+
`${row.year}/${row.number}/${row.language}@${row.version ?? ''} ` +
600+
`uuid=${row.uuid} deletedThisYear=${deletedThisYear} deletedTotal=${deletedTotal}`
601+
);
602+
}
603+
604+
if (checkedTotal % 250 === 0) {
605+
console.log(
606+
`[maintenance] progress year=${currentYear} checkedThisYear=${checkedThisYear} deletedThisYear=${deletedThisYear} ` +
607+
`checkedTotal=${checkedTotal} deletedTotal=${deletedTotal} fetchFailed=${fetchFailed}`
608+
);
609+
}
610+
}
611+
612+
if (currentYear !== null) {
613+
console.log(`[maintenance] Year ${currentYear}: done checked=${checkedThisYear} deleted=${deletedThisYear}`);
614+
}
615+
616+
console.log(
617+
`[maintenance] purge-not-in-force-statutes finished dryRun=${dryRun} ` +
618+
`checkedTotal=${checkedTotal} deletedTotal=${deletedTotal} fetchFailed=${fetchFailed}`
619+
);
620+
621+
res.status(200).json({
622+
ok: true,
623+
dryRun,
624+
checkedTotal,
625+
deletedTotal,
626+
fetchFailed,
627+
startYear,
628+
endYear,
629+
});
630+
} catch (error) {
631+
console.error('[maintenance] purge-not-in-force-statutes error:', error);
632+
Sentry.captureException(error);
633+
res.status(500).json({ ok: false, error: 'Failed to purge statutes' });
634+
} finally {
635+
stopFinlexLimiterLogging();
636+
}
637+
});
638+
497639
app.get('/favicon.ico', (request: express.Request, response: express.Response): void => {
498640
response.status(204).end();
499641
})

backend/src/db/load.ts

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -913,4 +913,61 @@ async function listJudgmentsByYear(year: number, language: string, level: string
913913
return Array.from(judgmentURLsSet);
914914
}
915915

916-
export { listStatutesByYear, setSingleStatute, listJudgmentNumbersByYear, listJudgmentsByYear, parseURLfromJudgmentID, setSingleJudgment, parseAkomafromURL, parseFinlexUrl, parseJudgmentUrl, buildFinlexUrl, buildJudgmentUrl }
916+
export async function parseIsInForceFromXml(xmlString: string): Promise<boolean | null> {
917+
try {
918+
const parser = new XMLParser({
919+
ignoreAttributes: false,
920+
attributeNamePrefix: '@_',
921+
removeNSPrefix: true, // finlex:isInForce => isInForce
922+
});
923+
924+
const parsed = parser.parse(xmlString);
925+
926+
const coerce = (value: unknown): boolean | null => {
927+
if (value === 'true' || value === true) return true;
928+
if (value === 'false' || value === false) return false;
929+
return null;
930+
};
931+
932+
const extractFromAkoma = (akoma: any): boolean | null => {
933+
const node = akoma?.act?.meta?.proprietary?.isInForce ?? null;
934+
const v = node?.['@_value'];
935+
return coerce(v);
936+
};
937+
938+
// Helper: given "akomaNtoso" which may be object or array, find first parseable
939+
const extractFromAkomaNtoso = (akomaNtoso: any): boolean | null => {
940+
if (!akomaNtoso) return null;
941+
942+
if (Array.isArray(akomaNtoso)) {
943+
for (const a of akomaNtoso) {
944+
const r = extractFromAkoma(a);
945+
if (r !== null) return r;
946+
}
947+
return null;
948+
}
949+
950+
return extractFromAkoma(akomaNtoso);
951+
};
952+
953+
// Finlex responses can come in a couple of wrappers depending on endpoint.
954+
const candidates = [
955+
parsed?.AknXmlList?.Results?.akomaNtoso,
956+
parsed?.AknXmlList?.Results?.Results?.akomaNtoso, // defensive
957+
parsed?.Results?.akomaNtoso,
958+
parsed?.akomaNtoso,
959+
];
960+
961+
for (const c of candidates) {
962+
const r = extractFromAkomaNtoso(c);
963+
if (r !== null) return r;
964+
}
965+
966+
return null;
967+
} catch (e) {
968+
console.warn('Failed to parse isInForce from XML:', e);
969+
return null;
970+
}
971+
}
972+
973+
export { listStatutesByYear, setSingleStatute, listJudgmentNumbersByYear, listJudgmentsByYear, parseURLfromJudgmentID, setSingleJudgment, parseAkomafromURL, parseFinlexUrl, parseJudgmentUrl, buildFinlexUrl, buildJudgmentUrl, finlexLimiter, fetchWithBackoff }

0 commit comments

Comments
 (0)