Skip to content

Commit 8415c31

Browse files
committed
rikkinäisten otsikkojen korjaamisen korjaaminen lol
1 parent a59fb48 commit 8415c31

File tree

3 files changed

+90
-25
lines changed

3 files changed

+90
-25
lines changed

backend/src/app.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,10 +336,19 @@ app.post('/api/statute/normalize-titles/:year', verifyAdminToken, async (req: ex
336336
return;
337337
}
338338

339+
console.log(`[normalize-titles] Starting normalization for year ${yearNum}`);
339340
const updatedCount = await normalizeStatuteTitlesByYear(yearNum);
341+
console.log(`[normalize-titles] Normalized ${updatedCount} statutes, now syncing to Typesense...`);
342+
340343
await syncStatutes('fin', { startYear: yearNum, endYear: yearNum });
341344
await syncStatutes('swe', { startYear: yearNum, endYear: yearNum });
342-
res.status(200).json({ message: 'Statute titles normalized', year: yearNum, updatedCount });
345+
346+
console.log(`[normalize-titles] Successfully completed normalization and sync for year ${yearNum}`);
347+
res.status(200).json({
348+
message: 'Statute titles normalized and synced',
349+
year: yearNum,
350+
updatedCount
351+
});
343352
} catch (error) {
344353
console.error('Normalize statute titles endpoint error:', error);
345354
Sentry.captureException(error);

backend/src/db/db.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ async function normalizeStatuteTitlesByYear(year: number): Promise<number> {
435435
try {
436436
const client = await pool.connect();
437437
const result = await client.query(
438-
'SELECT uuid, content::text AS content FROM statutes WHERE year = $1',
438+
'SELECT uuid, content::text AS content, number, language FROM statutes WHERE year = $1',
439439
[year]
440440
);
441441
let updatedCount = 0;
@@ -445,13 +445,15 @@ async function normalizeStatuteTitlesByYear(year: number): Promise<number> {
445445
const title = await parseTitleFromXmlString(row.content);
446446
await client.query('UPDATE statutes SET title = $1 WHERE uuid = $2', [title, row.uuid]);
447447
updatedCount += 1;
448+
console.log(`[normalize] Updated title for statute ${row.number}/${year} (${row.language}): "${title}"`);
448449
} catch (error) {
449-
console.error('Failed to normalize title for statute', row.uuid, error);
450+
console.error(`Failed to normalize title for statute ${row.uuid} (${row.number}/${year}, ${row.language}):`, error);
450451
Sentry.captureException(error);
451452
}
452453
}
453454

454455
client.release();
456+
console.log(`[normalize] Successfully updated ${updatedCount} out of ${result.rows.length} statutes for year ${year}`);
455457
return updatedCount;
456458
} catch (error) {
457459
console.error('Error normalizing statute titles by year:', error);

backend/src/db/load.ts

Lines changed: 76 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ finlexLimiter.on('executing', () => {
3636

3737
export function startFinlexLimiterLogging() {
3838
if (finlexLogInterval) {
39-
return;
39+
return; // Already started
4040
}
4141

4242
console.log('[finlexLimiter] Starting rate limiter logging...');
@@ -166,19 +166,40 @@ function buildJudgmentUrl(judgment: JudgmentKey): string {
166166
return `${baseUrl}/${casestatute}/${prefix}/${path}`;
167167
}
168168

169-
170-
169+
function parseTitleFromXMLDOM(xmlString: string): string {
170+
const dom = new JSDOM(xmlString, { contentType: 'text/xml' });
171+
const doc = dom.window.document;
172+
173+
// Find the docTitle element
174+
const docTitleElement = doc.querySelector('docTitle');
175+
176+
if (!docTitleElement) {
177+
throw new Error('docTitle not found in XML');
178+
}
179+
180+
// Get the text content which preserves document order
181+
const title = docTitleElement.textContent || '';
182+
183+
// Clean up extra whitespace and newlines
184+
return title.replace(/\s+/g, ' ').trim();
185+
}
171186

172187
async function parseTitlefromXML(result: AxiosResponse<unknown>): Promise<string> {
173-
const xmlData = result.data as Promise<string>;
174-
const parsedXmlData = await parseStringPromise(xmlData, { explicitArray: false })
175-
176-
const resultNode = parsedXmlData?.akomaNtoso
177-
if (!resultNode) {
178-
throw new Error('Result node not found in XML')
188+
const xmlString = result.data as string;
189+
190+
try {
191+
// Use DOM parser to preserve document order
192+
return parseTitleFromXMLDOM(xmlString);
193+
} catch (domError) {
194+
// Fallback to xml2js parser
195+
console.warn('DOM parsing failed, falling back to xml2js:', domError);
196+
const parsedXmlData = await parseStringPromise(xmlString, { explicitArray: false })
197+
const resultNode = parsedXmlData?.akomaNtoso
198+
if (!resultNode) {
199+
throw new Error('Result node not found in XML')
200+
}
201+
return parseTitleFromXmlObject(resultNode)
179202
}
180-
181-
return parseTitleFromXmlObject(resultNode)
182203
}
183204

184205
export function parseTitleFromXmlObject(resultNode: any): string {
@@ -188,23 +209,56 @@ export function parseTitleFromXmlObject(resultNode: any): string {
188209
throw new Error('docTitle not found')
189210
}
190211

191-
if (typeof docTitleRaw === 'string') {
192-
return docTitleRaw.trim()
193-
}
194-
if (typeof docTitleRaw === 'object' && typeof docTitleRaw._ === 'string') {
195-
return docTitleRaw._.trim()
212+
// Helper function to recursively extract text from nested objects
213+
function extractText(obj: any): string {
214+
if (typeof obj === 'string') {
215+
return obj;
216+
}
217+
218+
if (typeof obj === 'object' && obj !== null) {
219+
let text = '';
220+
221+
// Handle the _ property (text content)
222+
if (obj._ && typeof obj._ === 'string') {
223+
text += obj._;
224+
}
225+
226+
// Handle ref objects (references with text and links)
227+
if (obj.ref) {
228+
text += extractText(obj.ref);
229+
}
230+
231+
// Handle arrays of mixed content
232+
if (Array.isArray(obj)) {
233+
text += obj.map(item => extractText(item)).join('');
234+
}
235+
236+
return text;
237+
}
238+
239+
return String(obj);
196240
}
197241

198-
return String(docTitleRaw).trim()
242+
const title = extractText(docTitleRaw);
243+
244+
// Clean up extra whitespace and newlines
245+
return title.replace(/\s+/g, ' ').trim();
199246
}
200247

201248
export async function parseTitleFromXmlString(xml: string): Promise<string> {
202-
const parsedXmlData = await parseStringPromise(xml, { explicitArray: false })
203-
const resultNode = parsedXmlData?.akomaNtoso
204-
if (!resultNode) {
205-
throw new Error('Result node not found in XML')
249+
try {
250+
// Use DOM parser to preserve document order
251+
return parseTitleFromXMLDOM(xml);
252+
} catch (domError) {
253+
// Fallback to xml2js parser
254+
console.warn('DOM parsing failed, falling back to xml2js:', domError);
255+
const parsedXmlData = await parseStringPromise(xml, { explicitArray: false })
256+
const resultNode = parsedXmlData?.akomaNtoso
257+
if (!resultNode) {
258+
throw new Error('Result node not found in XML')
259+
}
260+
return parseTitleFromXmlObject(resultNode)
206261
}
207-
return parseTitleFromXmlObject(resultNode)
208262
}
209263

210264
async function parseImagesfromXML(result: AxiosResponse<unknown>): Promise<string[]> {

0 commit comments

Comments
 (0)