Skip to content

Commit b9cb683

Browse files
committed
typesense-rebuildi yksittäisille lisäyksille
1 parent 716bc77 commit b9cb683

File tree

2 files changed

+192
-79
lines changed

2 files changed

+192
-79
lines changed

backend/src/app.ts

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import { buildFinlexUrl, buildJudgmentUrl, listStatutesByYear, setSingleJudgment
1616
import type { JudgmentKey } from './types/judgment.js';
1717
import type { StatuteKey } from './types/statute.js';
1818
import { getRecentLogs, pushLog } from './util/logBuffer.js';
19-
import { deleteCollection, syncStatutes, syncJudgments } from './search.js';
19+
import { deleteCollection, syncStatutes, syncJudgments, upsertJudgmentByUuid, upsertStatuteByUuid } from './search.js';
2020
import { query } from './db/db.js';
2121

2222
const app = express()
@@ -184,6 +184,16 @@ app.post('/api/admin/add-statute', verifyAdminToken, async (req: express.Request
184184
}
185185

186186
await setSingleStatute({ uri: statuteUri, uriOld: statuteUri });
187+
188+
const statuteResult = await query(
189+
'SELECT uuid FROM statutes WHERE number = $1 AND year = $2 AND language = $3 ORDER BY version DESC NULLS LAST LIMIT 1',
190+
[numberStr, yearNum, languageStr]
191+
);
192+
const statuteUuid = statuteResult.rows[0]?.uuid;
193+
if (statuteUuid) {
194+
await upsertStatuteByUuid(languageStr, statuteUuid);
195+
}
196+
187197
res.status(200).json({ message: 'Statute added', statute: statuteKey, uri: statuteUri });
188198
} catch (error) {
189199
console.error('Add statute endpoint error:', error);
@@ -213,6 +223,16 @@ app.post('/api/admin/add-judgment', verifyAdminToken, async (req: express.Reques
213223
};
214224

215225
await setSingleJudgment(buildJudgmentUrl(judgmentKey));
226+
227+
const judgmentResult = await query(
228+
'SELECT uuid FROM judgments WHERE number = $1 AND year = $2 AND language = $3 AND level = $4 LIMIT 1',
229+
[numberStr, yearNum, languageStr, levelStr]
230+
);
231+
const judgmentUuid = judgmentResult.rows[0]?.uuid;
232+
if (judgmentUuid) {
233+
await upsertJudgmentByUuid(languageStr, judgmentUuid);
234+
}
235+
216236
res.status(200).json({ message: 'Judgment added', judgment: judgmentKey });
217237
} catch (error) {
218238
console.error('Add judgment endpoint error:', error);

backend/src/search.ts

Lines changed: 171 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,78 @@ export function extractParagraphs(xmlString: string): string[] {
8080
return Array.from(pNodes, p => (p.textContent || '').trim()).filter(t => t);
8181
}
8282

83+
function resolveLangShort(lang: string): "fi" | "sv" {
84+
if (lang === "fin") return "fi";
85+
if (lang === "swe") return "sv";
86+
throw new Error(`Unsupported language: ${lang}`);
87+
}
88+
89+
async function ensureStatuteCollection(lang: string): Promise<string> {
90+
const lang_short = resolveLangShort(lang);
91+
const collectionName = `statutes_${lang}`;
92+
const schema: CollectionCreateSchema = {
93+
name: collectionName,
94+
fields: [
95+
{ name: "id", type: "string", index: false },
96+
{ name: "title", type: "string", locale: lang_short },
97+
{ name: "year_num", type: "int32" },
98+
{ name: "year", type: "string" },
99+
{ name: "number", type: "string" },
100+
{ name: "common_names", type: "string[]", locale: lang_short },
101+
{ name: "keywords", type: "string[]", locale: lang_short },
102+
{ name: "version", type: "string", index: false },
103+
{ name: "headings", type: "string[]", locale: lang_short },
104+
{ name: "paragraphs", type: "string[]", locale: lang_short },
105+
{ name: "has_content", type: "int32" },
106+
],
107+
};
108+
109+
try {
110+
await tsClient.collections().create(schema);
111+
console.log(`Created collection ${collectionName}`);
112+
} catch (err) {
113+
if (!(err instanceof Errors.ObjectAlreadyExists)) {
114+
console.error(`Error creating collection ${collectionName}:`, err);
115+
Sentry.captureException(err);
116+
throw err;
117+
}
118+
}
119+
120+
return collectionName;
121+
}
122+
123+
async function ensureJudgmentCollection(lang: string): Promise<string> {
124+
const lang_short = resolveLangShort(lang);
125+
const collectionName = `judgments_${lang}`;
126+
const schema: CollectionCreateSchema = {
127+
name: collectionName,
128+
fields: [
129+
{ name: "id", type: "string", index: false },
130+
{ name: "year_num", type: "int32" },
131+
{ name: "year", type: "string" },
132+
{ name: "number", type: "string" },
133+
{ name: "level", type: "string" },
134+
{ name: "keywords", type: "string[]", locale: lang_short },
135+
{ name: "headings", type: "string[]", locale: lang_short },
136+
{ name: "paragraphs", type: "string[]", locale: lang_short },
137+
{ name: "has_content", type: "int32" },
138+
],
139+
};
140+
141+
try {
142+
await tsClient.collections().create(schema);
143+
console.log(`Created collection ${collectionName}`);
144+
} catch (err) {
145+
if (!(err instanceof Errors.ObjectAlreadyExists)) {
146+
console.error(`Error creating collection ${collectionName}:`, err);
147+
Sentry.captureException(err);
148+
throw err;
149+
}
150+
}
151+
152+
return collectionName;
153+
}
154+
83155
export function extractParagraphsHtml(html: string): string[] {
84156
const dom = new JSDOM(html);
85157
const ps = dom.window.document.querySelectorAll('p');
@@ -132,46 +204,10 @@ async function upsertWithRetry(collectionName: string, document: Record<string,
132204

133205

134206
export async function syncStatutes(lang: string, range?: { startYear?: number; endYear?: number }) {
135-
let lang_short
136-
if (lang === "fin") {
137-
lang_short = "fi";
138-
} else if (lang === "swe") {
139-
lang_short = "sv";
140-
} else {
141-
throw new Error(`Unsupported language: ${lang}`);
142-
}
143-
const collectionName = `statutes_${lang}`;
207+
const collectionName = await ensureStatuteCollection(lang);
208+
const langKey: "fin" | "swe" = lang === "fin" ? "fin" : "swe";
144209
console.log(`Indexing: ${lang} -> ${collectionName}`);
145210

146-
const schema: CollectionCreateSchema = {
147-
name: collectionName,
148-
fields: [
149-
{ name: "id", type: "string", index: false },
150-
{ name: "title", type: "string", locale: lang_short },
151-
{ name: "year_num", type: "int32" },
152-
{ name: "year", type: "string" },
153-
{ name: "number", type: "string" },
154-
{ name: "common_names", type: "string[]", locale: lang_short },
155-
{ name: "keywords", type: "string[]", locale: lang_short },
156-
{ name: "version", type: "string", index: false },
157-
{ name: "headings", type: "string[]", locale: lang_short },
158-
{ name: "paragraphs", type: "string[]", locale: lang_short },
159-
{ name: "has_content", type: "int32" },
160-
],
161-
};
162-
163-
try {
164-
await tsClient.collections().create(schema);
165-
console.log(`Created collection ${collectionName}`);
166-
} catch (err) {
167-
if (!(err instanceof Errors.ObjectAlreadyExists)) {
168-
console.error(`Error creating collection ${collectionName}:`, err);
169-
Sentry.captureException(err);
170-
throw err;
171-
}
172-
console.log(`Collection ${collectionName} already exists`);
173-
}
174-
175211
const startYear = range?.startYear ?? yearFrom();
176212
const endYear = range?.endYear ?? yearTo();
177213

@@ -223,8 +259,8 @@ export async function syncStatutes(lang: string, range?: { startYear?: number; e
223259
common_names: commonNames,
224260
keywords: keywords,
225261
version: row.version ?? '',
226-
headings: normalizeText(headings, lang),
227-
paragraphs: normalizeText(paragraphs, lang),
262+
headings: normalizeText(headings, langKey),
263+
paragraphs: normalizeText(paragraphs, langKey),
228264
});
229265
} catch (error) {
230266
console.log('--- errored -->', row.id);
@@ -238,44 +274,10 @@ export async function syncStatutes(lang: string, range?: { startYear?: number; e
238274
}
239275

240276
export async function syncJudgments(lang: string, range?: { startYear?: number; endYear?: number }) {
241-
let lang_short
242-
if (lang === "fin") {
243-
lang_short = "fi";
244-
} else if (lang === "swe") {
245-
lang_short = "sv";
246-
} else {
247-
throw new Error(`Unsupported language: ${lang}`);
248-
}
249-
const collectionName = `judgments_${lang}`;
277+
const collectionName = await ensureJudgmentCollection(lang);
278+
const langKey: "fin" | "swe" = lang === "fin" ? "fin" : "swe";
250279
console.log(`\n=== Indexing: ${lang} -> ${collectionName}`);
251280

252-
const schema: CollectionCreateSchema = {
253-
name: collectionName,
254-
fields: [
255-
{ name: "id", type: "string", index: false },
256-
{ name: "year_num", type: "int32" },
257-
{ name: "year", type: "string" },
258-
{ name: "number", type: "string" },
259-
{ name: "level", type: "string" },
260-
{ name: "keywords", type: "string[]", locale: lang_short },
261-
{ name: "headings", type: "string[]", locale: lang_short },
262-
{ name: "paragraphs", type: "string[]", locale: lang_short },
263-
{ name: "has_content", type: "int32" },
264-
],
265-
};
266-
267-
try {
268-
await tsClient.collections().create(schema);
269-
console.log(`Created collection ${collectionName}`);
270-
} catch (err) {
271-
if (!(err instanceof Errors.ObjectAlreadyExists)) {
272-
console.error(`Error creating collection ${collectionName}:`, err);
273-
Sentry.captureException(err);
274-
throw err;
275-
}
276-
console.log(`Collection ${collectionName} already exists`);
277-
}
278-
279281
const startYear = range?.startYear ?? yearFrom();
280282
const endYear = range?.endYear ?? yearTo();
281283

@@ -318,14 +320,105 @@ export async function syncJudgments(lang: string, range?: { startYear?: number;
318320
level: localeLevel(row.level, lang),
319321
number: row.number,
320322
keywords: keywords,
321-
headings: normalizeText(headings, lang),
322-
paragraphs: normalizeText(paragraphs, lang),
323+
headings: normalizeText(headings, langKey),
324+
paragraphs: normalizeText(paragraphs, langKey),
323325
has_content: row.is_empty ? 0 : 1,
324326
});
325327
}
326328
}
327329
}
328330

331+
export async function upsertStatuteByUuid(lang: string, statuteUuid: string): Promise<void> {
332+
const collectionName = await ensureStatuteCollection(lang);
333+
const langKey: "fin" | "swe" = lang === "fin" ? "fin" : "swe";
334+
const { rows } = await query(
335+
`
336+
SELECT
337+
uuid AS id,
338+
title AS title,
339+
number AS number,
340+
year AS year,
341+
is_empty AS is_empty,
342+
version AS version,
343+
content::text AS content
344+
FROM statutes
345+
WHERE uuid = $1 AND language = $2
346+
LIMIT 1
347+
`,
348+
[statuteUuid, lang]
349+
);
350+
351+
if (rows.length === 0) {
352+
console.warn(`upsertStatuteByUuid: no statute found for ${statuteUuid} (${lang})`);
353+
return;
354+
}
355+
356+
const row = rows[0];
357+
const parsed_xml = await parseStringPromise(row.content, { explicitArray: false });
358+
const headingTree: Heading[] = parseXmlHeadings(parsed_xml) ?? [];
359+
const headings = flattenHeadings(headingTree);
360+
const paragraphs = extractParagraphs(row.content);
361+
const commonNames = await getCommonNamesByStatuteUuid(row.id);
362+
const keywords = await getStatuteKeywordsByStatuteUuid(row.id);
363+
364+
await upsertWithRetry(collectionName, {
365+
id: row.id,
366+
title: row.title,
367+
year: String(row.year),
368+
year_num: parseInt(row.year, 10),
369+
number: row.number,
370+
has_content: row.is_empty ? 0 : 1,
371+
common_names: commonNames,
372+
keywords: keywords,
373+
version: row.version ?? '',
374+
headings: normalizeText(headings, langKey),
375+
paragraphs: normalizeText(paragraphs, langKey),
376+
});
377+
}
378+
379+
export async function upsertJudgmentByUuid(lang: string, judgmentUuid: string): Promise<void> {
380+
const collectionName = await ensureJudgmentCollection(lang);
381+
const langKey: "fin" | "swe" = lang === "fin" ? "fin" : "swe";
382+
const { rows } = await query(
383+
`
384+
SELECT
385+
uuid AS id,
386+
number AS number,
387+
year AS year,
388+
level AS level,
389+
is_empty AS is_empty,
390+
content::text AS content
391+
FROM judgments
392+
WHERE uuid = $1 AND language = $2
393+
LIMIT 1
394+
`,
395+
[judgmentUuid, lang]
396+
);
397+
398+
if (rows.length === 0) {
399+
console.warn(`upsertJudgmentByUuid: no judgment found for ${judgmentUuid} (${lang})`);
400+
return;
401+
}
402+
403+
const row = rows[0];
404+
const headingTree: Heading[] = parseHtmlHeadings(row.content) ?? [];
405+
const headings = flattenHeadings(headingTree);
406+
const paragraphs = extractParagraphsHtml(row.content);
407+
const keywords = await getJudgmentKeywordsByJudgmentUuid(row.id);
408+
409+
await upsertWithRetry(collectionName, {
410+
id: row.id,
411+
year: String(row.year),
412+
year_num: parseInt(row.year, 10),
413+
level: localeLevel(row.level, lang),
414+
number: row.number,
415+
keywords: keywords,
416+
headings: normalizeText(headings, langKey),
417+
paragraphs: normalizeText(paragraphs, langKey),
418+
has_content: row.is_empty ? 0 : 1,
419+
});
420+
}
421+
329422

330423
export async function deleteCollection(name: string, lang: string) {
331424
const collectionName = `${name}_${lang}`;

0 commit comments

Comments
 (0)