@@ -15,6 +15,61 @@ import xmldom from '@xmldom/xmldom';
1515import { JSDOM } from 'jsdom' ;
1616import { XMLParser } from 'fast-xml-parser' ;
1717import { getLatestStatuteVersions } from '../util/parse.js' ;
18+ import Bottleneck from 'bottleneck' ;
19+
20+ const finlexLimiter = new Bottleneck ( {
21+ minTime : 350 ,
22+ maxConcurrent : 1 ,
23+ reservoir : 200 ,
24+ reservoirRefreshInterval : 60 * 1000 ,
25+ reservoirRefreshAmount : 200 ,
26+ } ) ;
27+
28+ let finlexRequestCount = 0 ;
29+ let lastMinuteCount = 0 ;
30+ finlexLimiter . on ( 'executing' , ( ) => {
31+ finlexRequestCount += 1 ;
32+ } ) ;
33+
34+ // Report request count every minute
35+ setInterval ( ( ) => {
36+ const requestsThisMinute = finlexRequestCount - lastMinuteCount ;
37+ console . log ( `[finlexLimiter] ${ requestsThisMinute } requests in last minute (${ finlexRequestCount } total)` ) ;
38+ lastMinuteCount = finlexRequestCount ;
39+ } , 60 * 1000 ) ;
40+
41+ // Generic fetch with exponential backoff and jitter, still honoring the limiter.
42+ async function fetchWithBackoff < T = unknown > ( url : string , config : any , opts ?: { maxRetries ?: number ; baseDelayMs ?: number ; maxDelayMs ?: number ; retryOn ?: ( status : number ) => boolean } ) : Promise < AxiosResponse < T > > {
43+ const maxRetries = opts ?. maxRetries ?? 5 ;
44+ const baseDelayMs = opts ?. baseDelayMs ?? 500 ; // initial backoff
45+ const maxDelayMs = opts ?. maxDelayMs ?? 8000 ; // cap
46+ const retryOn = opts ?. retryOn ?? ( ( status ) => status === 429 || ( status >= 500 && status < 600 ) ) ;
47+
48+ let attempt = 0 ;
49+ while ( true ) {
50+ try {
51+ // Schedule on limiter to enforce rate limits
52+ const resp = await finlexLimiter . schedule ( ( ) => axios . get < T > ( url , config ) ) ;
53+ return resp ;
54+ } catch ( error ) {
55+ if ( ! axios . isAxiosError ( error ) ) throw error ;
56+ const status = error . response ?. status ?? 0 ;
57+ attempt += 1 ;
58+ if ( attempt > maxRetries || ! retryOn ( status ) ) {
59+ throw error ;
60+ }
61+ // Respect Retry-After header when present
62+ const retryAfterHeader = error . response ?. headers ?. [ 'retry-after' ] ;
63+ let delayMs = retryAfterHeader ? Number ( retryAfterHeader ) * 1000 : Math . min ( maxDelayMs , baseDelayMs * Math . pow ( 2 , attempt - 1 ) ) ;
64+ // Apply jitter (+/- 30%)
65+ const jitter = delayMs * ( Math . random ( ) * 0.6 - 0.3 ) ;
66+ delayMs = Math . max ( 250 , delayMs + jitter ) ;
67+ console . log ( `[backoff] attempt ${ attempt } /${ maxRetries } status ${ status } , delaying ${ Math . round ( delayMs ) } ms for ${ url } ` ) ;
68+ await new Promise ( res => setTimeout ( res , delayMs ) ) ;
69+ // Loop and retry
70+ }
71+ }
72+ }
1873
1974
2075function parseFinlexUrl ( url : string ) : { docYear : number ; docNumber : string ; docLanguage : string ; docVersion : string | null } {
@@ -254,7 +309,7 @@ function parseURLfromJudgmentID(judgmentID: string): string {
254309}
255310
256311async function parseAkomafromURL ( inputURL : string , lang : string ) : Promise < { content : string ; is_empty : boolean , keywords : string [ ] } > {
257- const result = await axios . get ( inputURL , {
312+ const result = await fetchWithBackoff < string > ( inputURL , {
258313 headers : { 'Accept' : 'text/html' , 'Accept-Encoding' : 'gzip' }
259314 } ) ;
260315 const inputHTML = result . data as string ;
@@ -304,10 +359,10 @@ async function setImages(statuteUuid: string, docYear: number, docNumber: string
304359 const path = `/akn/fi/act/statute-consolidated/${ docYear } /${ docNumber } /${ language } @${ version ?? '' } /${ uri } `
305360 const url = `${ baseURL } ${ path } `
306361 try {
307- const result = await axios . get ( url , {
362+ const result = await fetchWithBackoff < ArrayBuffer > ( url , {
308363 headers : { 'Accept' : 'image/*' , 'Accept-Encoding' : 'gzip' } ,
309364 responseType : 'arraybuffer'
310- } )
365+ } ) ;
311366
312367 const name = uri . split ( '/' ) . pop ( )
313368 if ( ! name ) {
@@ -319,7 +374,7 @@ async function setImages(statuteUuid: string, docYear: number, docNumber: string
319374 uuid : imageUuid ,
320375 name : name ,
321376 mime_type : result . headers [ 'content-type' ] ,
322- content : result . data as Buffer ,
377+ content : Buffer . from ( result . data as ArrayBuffer ) ,
323378 }
324379
325380 imageUuid = await setImage ( image )
@@ -333,9 +388,9 @@ async function setImages(statuteUuid: string, docYear: number, docNumber: string
333388
334389async function fetchStatute ( uri : string ) {
335390 try {
336- const result = await axios . get ( `${ uri } ` , {
391+ const result = await fetchWithBackoff < string > ( `${ uri } ` , {
337392 headers : { 'Accept' : 'application/xml' , 'Accept-Encoding' : 'gzip' }
338- } )
393+ } ) ;
339394 return result
340395 } catch {
341396 return null
@@ -462,13 +517,15 @@ async function listStatutesByYear(year: number, language: string): Promise<strin
462517
463518 try {
464519 while ( true ) {
465- const result = await axios . get < StatuteVersionResponse [ ] > ( `${ baseURL } ${ path } ` , {
520+ const result = await finlexLimiter . schedule ( ( ) => axios . get < StatuteVersionResponse [ ] > ( `${ baseURL } ${ path } ` , {
466521 params : queryParams ,
467522 headers : {
468523 Accept : 'application/json' ,
469524 'Accept-Encoding' : 'gzip'
470525 }
471- } ) ;
526+ } ) ) ;
527+ // Optionally we could use fetchWithBackoff here as well, but since pagination drives many calls
528+ // and limiter already smooths throughput, keeping as-is to avoid excessive retries.
472529
473530 if ( ! Array . isArray ( result . data ) ) {
474531 throw new Error ( 'Invalid response format: expected an array' ) ;
@@ -487,8 +544,8 @@ async function listStatutesByYear(year: number, language: string): Promise<strin
487544 if ( axios . isAxiosError ( error ) ) {
488545 console . error ( `Failed to fetch statute versions for year ${ year } , type ${ typeStatute } : ${ error . message } ` ) ;
489546 if ( error . response ) {
490- console . error ( 'Response status:' , error . response . status ) ;
491- console . error ( 'Response data:' , error . response . data ) ;
547+ // console.error('Response status:', error.response.status);
548+ // console.error('Response data:', error.response.data);
492549 }
493550 } else {
494551 console . error ( `Unexpected error while fetching statute versions: ${ error } ` ) ;
@@ -519,7 +576,7 @@ async function listJudgmentNumbersByYear(year: number, language: string, level:
519576 : `https://finlex.fi/sv/rattspraxis/${ courtLevel . sv } /prejudikat/${ year } ` ;
520577 let parsedList : string [ ] = [ ] ;
521578 try {
522- const result = await axios . get ( inputUrl , {
579+ const result = await fetchWithBackoff < string > ( inputUrl , {
523580 headers : { 'Accept' : 'text/html' , 'Accept-Encoding' : 'gzip' }
524581 } ) ;
525582 const inputHTML = result . data as string ;
0 commit comments