@@ -36,7 +36,7 @@ finlexLimiter.on('executing', () => {
3636
3737export function startFinlexLimiterLogging ( ) {
3838 if ( finlexLogInterval ) {
39- return ;
39+ return ; // Already started
4040 }
4141
4242 console . log ( '[finlexLimiter] Starting rate limiter logging...' ) ;
@@ -166,19 +166,40 @@ function buildJudgmentUrl(judgment: JudgmentKey): string {
166166 return `${ baseUrl } /${ casestatute } /${ prefix } /${ path } ` ;
167167}
168168
169-
170-
169+ function parseTitleFromXMLDOM ( xmlString : string ) : string {
170+ const dom = new JSDOM ( xmlString , { contentType : 'text/xml' } ) ;
171+ const doc = dom . window . document ;
172+
173+ // Find the docTitle element
174+ const docTitleElement = doc . querySelector ( 'docTitle' ) ;
175+
176+ if ( ! docTitleElement ) {
177+ throw new Error ( 'docTitle not found in XML' ) ;
178+ }
179+
180+ // Get the text content which preserves document order
181+ const title = docTitleElement . textContent || '' ;
182+
183+ // Clean up extra whitespace and newlines
184+ return title . replace ( / \s + / g, ' ' ) . trim ( ) ;
185+ }
171186
172187async function parseTitlefromXML ( result : AxiosResponse < unknown > ) : Promise < string > {
173- const xmlData = result . data as Promise < string > ;
174- const parsedXmlData = await parseStringPromise ( xmlData , { explicitArray : false } )
175-
176- const resultNode = parsedXmlData ?. akomaNtoso
177- if ( ! resultNode ) {
178- throw new Error ( 'Result node not found in XML' )
188+ const xmlString = result . data as string ;
189+
190+ try {
191+ // Use DOM parser to preserve document order
192+ return parseTitleFromXMLDOM ( xmlString ) ;
193+ } catch ( domError ) {
194+ // Fallback to xml2js parser
195+ console . warn ( 'DOM parsing failed, falling back to xml2js:' , domError ) ;
196+ const parsedXmlData = await parseStringPromise ( xmlString , { explicitArray : false } )
197+ const resultNode = parsedXmlData ?. akomaNtoso
198+ if ( ! resultNode ) {
199+ throw new Error ( 'Result node not found in XML' )
200+ }
201+ return parseTitleFromXmlObject ( resultNode )
179202 }
180-
181- return parseTitleFromXmlObject ( resultNode )
182203}
183204
184205export function parseTitleFromXmlObject ( resultNode : any ) : string {
@@ -188,23 +209,56 @@ export function parseTitleFromXmlObject(resultNode: any): string {
188209 throw new Error ( 'docTitle not found' )
189210 }
190211
191- if ( typeof docTitleRaw === 'string' ) {
192- return docTitleRaw . trim ( )
193- }
194- if ( typeof docTitleRaw === 'object' && typeof docTitleRaw . _ === 'string' ) {
195- return docTitleRaw . _ . trim ( )
212+ // Helper function to recursively extract text from nested objects
213+ function extractText ( obj : any ) : string {
214+ if ( typeof obj === 'string' ) {
215+ return obj ;
216+ }
217+
218+ if ( typeof obj === 'object' && obj !== null ) {
219+ let text = '' ;
220+
221+ // Handle the _ property (text content)
222+ if ( obj . _ && typeof obj . _ === 'string' ) {
223+ text += obj . _ ;
224+ }
225+
226+ // Handle ref objects (references with text and links)
227+ if ( obj . ref ) {
228+ text += extractText ( obj . ref ) ;
229+ }
230+
231+ // Handle arrays of mixed content
232+ if ( Array . isArray ( obj ) ) {
233+ text += obj . map ( item => extractText ( item ) ) . join ( '' ) ;
234+ }
235+
236+ return text ;
237+ }
238+
239+ return String ( obj ) ;
196240 }
197241
198- return String ( docTitleRaw ) . trim ( )
242+ const title = extractText ( docTitleRaw ) ;
243+
244+ // Clean up extra whitespace and newlines
245+ return title . replace ( / \s + / g, ' ' ) . trim ( ) ;
199246}
200247
201248export async function parseTitleFromXmlString ( xml : string ) : Promise < string > {
202- const parsedXmlData = await parseStringPromise ( xml , { explicitArray : false } )
203- const resultNode = parsedXmlData ?. akomaNtoso
204- if ( ! resultNode ) {
205- throw new Error ( 'Result node not found in XML' )
249+ try {
250+ // Use DOM parser to preserve document order
251+ return parseTitleFromXMLDOM ( xml ) ;
252+ } catch ( domError ) {
253+ // Fallback to xml2js parser
254+ console . warn ( 'DOM parsing failed, falling back to xml2js:' , domError ) ;
255+ const parsedXmlData = await parseStringPromise ( xml , { explicitArray : false } )
256+ const resultNode = parsedXmlData ?. akomaNtoso
257+ if ( ! resultNode ) {
258+ throw new Error ( 'Result node not found in XML' )
259+ }
260+ return parseTitleFromXmlObject ( resultNode )
206261 }
207- return parseTitleFromXmlObject ( resultNode )
208262}
209263
210264async function parseImagesfromXML ( result : AxiosResponse < unknown > ) : Promise < string [ ] > {
0 commit comments