@@ -80,6 +80,78 @@ export function extractParagraphs(xmlString: string): string[] {
8080 return Array . from ( pNodes , p => ( p . textContent || '' ) . trim ( ) ) . filter ( t => t ) ;
8181}
8282
83+ function resolveLangShort ( lang : string ) : "fi" | "sv" {
84+ if ( lang === "fin" ) return "fi" ;
85+ if ( lang === "swe" ) return "sv" ;
86+ throw new Error ( `Unsupported language: ${ lang } ` ) ;
87+ }
88+
89+ async function ensureStatuteCollection ( lang : string ) : Promise < string > {
90+ const lang_short = resolveLangShort ( lang ) ;
91+ const collectionName = `statutes_${ lang } ` ;
92+ const schema : CollectionCreateSchema = {
93+ name : collectionName ,
94+ fields : [
95+ { name : "id" , type : "string" , index : false } ,
96+ { name : "title" , type : "string" , locale : lang_short } ,
97+ { name : "year_num" , type : "int32" } ,
98+ { name : "year" , type : "string" } ,
99+ { name : "number" , type : "string" } ,
100+ { name : "common_names" , type : "string[]" , locale : lang_short } ,
101+ { name : "keywords" , type : "string[]" , locale : lang_short } ,
102+ { name : "version" , type : "string" , index : false } ,
103+ { name : "headings" , type : "string[]" , locale : lang_short } ,
104+ { name : "paragraphs" , type : "string[]" , locale : lang_short } ,
105+ { name : "has_content" , type : "int32" } ,
106+ ] ,
107+ } ;
108+
109+ try {
110+ await tsClient . collections ( ) . create ( schema ) ;
111+ console . log ( `Created collection ${ collectionName } ` ) ;
112+ } catch ( err ) {
113+ if ( ! ( err instanceof Errors . ObjectAlreadyExists ) ) {
114+ console . error ( `Error creating collection ${ collectionName } :` , err ) ;
115+ Sentry . captureException ( err ) ;
116+ throw err ;
117+ }
118+ }
119+
120+ return collectionName ;
121+ }
122+
123+ async function ensureJudgmentCollection ( lang : string ) : Promise < string > {
124+ const lang_short = resolveLangShort ( lang ) ;
125+ const collectionName = `judgments_${ lang } ` ;
126+ const schema : CollectionCreateSchema = {
127+ name : collectionName ,
128+ fields : [
129+ { name : "id" , type : "string" , index : false } ,
130+ { name : "year_num" , type : "int32" } ,
131+ { name : "year" , type : "string" } ,
132+ { name : "number" , type : "string" } ,
133+ { name : "level" , type : "string" } ,
134+ { name : "keywords" , type : "string[]" , locale : lang_short } ,
135+ { name : "headings" , type : "string[]" , locale : lang_short } ,
136+ { name : "paragraphs" , type : "string[]" , locale : lang_short } ,
137+ { name : "has_content" , type : "int32" } ,
138+ ] ,
139+ } ;
140+
141+ try {
142+ await tsClient . collections ( ) . create ( schema ) ;
143+ console . log ( `Created collection ${ collectionName } ` ) ;
144+ } catch ( err ) {
145+ if ( ! ( err instanceof Errors . ObjectAlreadyExists ) ) {
146+ console . error ( `Error creating collection ${ collectionName } :` , err ) ;
147+ Sentry . captureException ( err ) ;
148+ throw err ;
149+ }
150+ }
151+
152+ return collectionName ;
153+ }
154+
83155export function extractParagraphsHtml ( html : string ) : string [ ] {
84156 const dom = new JSDOM ( html ) ;
85157 const ps = dom . window . document . querySelectorAll ( 'p' ) ;
@@ -132,46 +204,10 @@ async function upsertWithRetry(collectionName: string, document: Record<string,
132204
133205
134206export async function syncStatutes ( lang : string , range ?: { startYear ?: number ; endYear ?: number } ) {
135- let lang_short
136- if ( lang === "fin" ) {
137- lang_short = "fi" ;
138- } else if ( lang === "swe" ) {
139- lang_short = "sv" ;
140- } else {
141- throw new Error ( `Unsupported language: ${ lang } ` ) ;
142- }
143- const collectionName = `statutes_${ lang } ` ;
207+ const collectionName = await ensureStatuteCollection ( lang ) ;
208+ const langKey : "fin" | "swe" = lang === "fin" ? "fin" : "swe" ;
144209 console . log ( `Indexing: ${ lang } -> ${ collectionName } ` ) ;
145210
146- const schema : CollectionCreateSchema = {
147- name : collectionName ,
148- fields : [
149- { name : "id" , type : "string" , index : false } ,
150- { name : "title" , type : "string" , locale : lang_short } ,
151- { name : "year_num" , type : "int32" } ,
152- { name : "year" , type : "string" } ,
153- { name : "number" , type : "string" } ,
154- { name : "common_names" , type : "string[]" , locale : lang_short } ,
155- { name : "keywords" , type : "string[]" , locale : lang_short } ,
156- { name : "version" , type : "string" , index : false } ,
157- { name : "headings" , type : "string[]" , locale : lang_short } ,
158- { name : "paragraphs" , type : "string[]" , locale : lang_short } ,
159- { name : "has_content" , type : "int32" } ,
160- ] ,
161- } ;
162-
163- try {
164- await tsClient . collections ( ) . create ( schema ) ;
165- console . log ( `Created collection ${ collectionName } ` ) ;
166- } catch ( err ) {
167- if ( ! ( err instanceof Errors . ObjectAlreadyExists ) ) {
168- console . error ( `Error creating collection ${ collectionName } :` , err ) ;
169- Sentry . captureException ( err ) ;
170- throw err ;
171- }
172- console . log ( `Collection ${ collectionName } already exists` ) ;
173- }
174-
175211 const startYear = range ?. startYear ?? yearFrom ( ) ;
176212 const endYear = range ?. endYear ?? yearTo ( ) ;
177213
@@ -223,8 +259,8 @@ export async function syncStatutes(lang: string, range?: { startYear?: number; e
223259 common_names : commonNames ,
224260 keywords : keywords ,
225261 version : row . version ?? '' ,
226- headings : normalizeText ( headings , lang ) ,
227- paragraphs : normalizeText ( paragraphs , lang ) ,
262+ headings : normalizeText ( headings , langKey ) ,
263+ paragraphs : normalizeText ( paragraphs , langKey ) ,
228264 } ) ;
229265 } catch ( error ) {
230266 console . log ( '--- errored -->' , row . id ) ;
@@ -238,44 +274,10 @@ export async function syncStatutes(lang: string, range?: { startYear?: number; e
238274}
239275
240276export async function syncJudgments ( lang : string , range ?: { startYear ?: number ; endYear ?: number } ) {
241- let lang_short
242- if ( lang === "fin" ) {
243- lang_short = "fi" ;
244- } else if ( lang === "swe" ) {
245- lang_short = "sv" ;
246- } else {
247- throw new Error ( `Unsupported language: ${ lang } ` ) ;
248- }
249- const collectionName = `judgments_${ lang } ` ;
277+ const collectionName = await ensureJudgmentCollection ( lang ) ;
278+ const langKey : "fin" | "swe" = lang === "fin" ? "fin" : "swe" ;
250279 console . log ( `\n=== Indexing: ${ lang } -> ${ collectionName } ` ) ;
251280
252- const schema : CollectionCreateSchema = {
253- name : collectionName ,
254- fields : [
255- { name : "id" , type : "string" , index : false } ,
256- { name : "year_num" , type : "int32" } ,
257- { name : "year" , type : "string" } ,
258- { name : "number" , type : "string" } ,
259- { name : "level" , type : "string" } ,
260- { name : "keywords" , type : "string[]" , locale : lang_short } ,
261- { name : "headings" , type : "string[]" , locale : lang_short } ,
262- { name : "paragraphs" , type : "string[]" , locale : lang_short } ,
263- { name : "has_content" , type : "int32" } ,
264- ] ,
265- } ;
266-
267- try {
268- await tsClient . collections ( ) . create ( schema ) ;
269- console . log ( `Created collection ${ collectionName } ` ) ;
270- } catch ( err ) {
271- if ( ! ( err instanceof Errors . ObjectAlreadyExists ) ) {
272- console . error ( `Error creating collection ${ collectionName } :` , err ) ;
273- Sentry . captureException ( err ) ;
274- throw err ;
275- }
276- console . log ( `Collection ${ collectionName } already exists` ) ;
277- }
278-
279281 const startYear = range ?. startYear ?? yearFrom ( ) ;
280282 const endYear = range ?. endYear ?? yearTo ( ) ;
281283
@@ -318,14 +320,105 @@ export async function syncJudgments(lang: string, range?: { startYear?: number;
318320 level : localeLevel ( row . level , lang ) ,
319321 number : row . number ,
320322 keywords : keywords ,
321- headings : normalizeText ( headings , lang ) ,
322- paragraphs : normalizeText ( paragraphs , lang ) ,
323+ headings : normalizeText ( headings , langKey ) ,
324+ paragraphs : normalizeText ( paragraphs , langKey ) ,
323325 has_content : row . is_empty ? 0 : 1 ,
324326 } ) ;
325327 }
326328 }
327329}
328330
331+ export async function upsertStatuteByUuid ( lang : string , statuteUuid : string ) : Promise < void > {
332+ const collectionName = await ensureStatuteCollection ( lang ) ;
333+ const langKey : "fin" | "swe" = lang === "fin" ? "fin" : "swe" ;
334+ const { rows } = await query (
335+ `
336+ SELECT
337+ uuid AS id,
338+ title AS title,
339+ number AS number,
340+ year AS year,
341+ is_empty AS is_empty,
342+ version AS version,
343+ content::text AS content
344+ FROM statutes
345+ WHERE uuid = $1 AND language = $2
346+ LIMIT 1
347+ ` ,
348+ [ statuteUuid , lang ]
349+ ) ;
350+
351+ if ( rows . length === 0 ) {
352+ console . warn ( `upsertStatuteByUuid: no statute found for ${ statuteUuid } (${ lang } )` ) ;
353+ return ;
354+ }
355+
356+ const row = rows [ 0 ] ;
357+ const parsed_xml = await parseStringPromise ( row . content , { explicitArray : false } ) ;
358+ const headingTree : Heading [ ] = parseXmlHeadings ( parsed_xml ) ?? [ ] ;
359+ const headings = flattenHeadings ( headingTree ) ;
360+ const paragraphs = extractParagraphs ( row . content ) ;
361+ const commonNames = await getCommonNamesByStatuteUuid ( row . id ) ;
362+ const keywords = await getStatuteKeywordsByStatuteUuid ( row . id ) ;
363+
364+ await upsertWithRetry ( collectionName , {
365+ id : row . id ,
366+ title : row . title ,
367+ year : String ( row . year ) ,
368+ year_num : parseInt ( row . year , 10 ) ,
369+ number : row . number ,
370+ has_content : row . is_empty ? 0 : 1 ,
371+ common_names : commonNames ,
372+ keywords : keywords ,
373+ version : row . version ?? '' ,
374+ headings : normalizeText ( headings , langKey ) ,
375+ paragraphs : normalizeText ( paragraphs , langKey ) ,
376+ } ) ;
377+ }
378+
379+ export async function upsertJudgmentByUuid ( lang : string , judgmentUuid : string ) : Promise < void > {
380+ const collectionName = await ensureJudgmentCollection ( lang ) ;
381+ const langKey : "fin" | "swe" = lang === "fin" ? "fin" : "swe" ;
382+ const { rows } = await query (
383+ `
384+ SELECT
385+ uuid AS id,
386+ number AS number,
387+ year AS year,
388+ level AS level,
389+ is_empty AS is_empty,
390+ content::text AS content
391+ FROM judgments
392+ WHERE uuid = $1 AND language = $2
393+ LIMIT 1
394+ ` ,
395+ [ judgmentUuid , lang ]
396+ ) ;
397+
398+ if ( rows . length === 0 ) {
399+ console . warn ( `upsertJudgmentByUuid: no judgment found for ${ judgmentUuid } (${ lang } )` ) ;
400+ return ;
401+ }
402+
403+ const row = rows [ 0 ] ;
404+ const headingTree : Heading [ ] = parseHtmlHeadings ( row . content ) ?? [ ] ;
405+ const headings = flattenHeadings ( headingTree ) ;
406+ const paragraphs = extractParagraphsHtml ( row . content ) ;
407+ const keywords = await getJudgmentKeywordsByJudgmentUuid ( row . id ) ;
408+
409+ await upsertWithRetry ( collectionName , {
410+ id : row . id ,
411+ year : String ( row . year ) ,
412+ year_num : parseInt ( row . year , 10 ) ,
413+ level : localeLevel ( row . level , lang ) ,
414+ number : row . number ,
415+ keywords : keywords ,
416+ headings : normalizeText ( headings , langKey ) ,
417+ paragraphs : normalizeText ( paragraphs , langKey ) ,
418+ has_content : row . is_empty ? 0 : 1 ,
419+ } ) ;
420+ }
421+
329422
330423export async function deleteCollection ( name : string , lang : string ) {
331424 const collectionName = `${ name } _${ lang } ` ;
0 commit comments