Skip to content

Commit 6e090e8

Browse files
committed
Finlex api requestien rate limiter, tietokannan täytön lokien siistimistä
1 parent 09b7ba8 commit 6e090e8

File tree

6 files changed

+79
-20
lines changed

6 files changed

+79
-20
lines changed

backend/package-lock.json

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"@babel/runtime": "^7.27.6",
3535
"@xmldom/xmldom": "^0.9.8",
3636
"axios": "^1.9.0",
37+
"bottleneck": "^2.19.5",
3738
"dotenv": "^16.5.0",
3839
"express": "^5.1.0",
3940
"fast-xml-parser": "^5.2.3",

backend/src/db/db.ts

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,8 @@ async function fillDb(statutes: StatuteKey[], judgments: JudgmentKey[]): Promise
2323
for (const key of statutes) {
2424
++i;
2525
const { uri } = buildFinlexUrl(key)
26-
console.log('URL', uri)
2726
await setSingleStatute(buildFinlexUrl(key));
28-
if (i % 100 === 0) {
27+
if (i % 50 === 0) {
2928
console.log(`Inserted ${i} statutes (${statutes.length})`)
3029
}
3130
}
@@ -50,9 +49,7 @@ async function fillDb(statutes: StatuteKey[], judgments: JudgmentKey[]): Promise
5049

5150
async function dbIsReady(): Promise<boolean> {
5251
try {
53-
console.log('tsekataan db')
5452
const client = await pool.connect();
55-
console.log('yli poolin')
5653
let result = await client.query("SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = 'images');")
5754
const imagesExists = result.rows[0].exists;
5855

@@ -166,7 +163,7 @@ async function dbIsUpToDate(startYear?: number): Promise<{upToDate: boolean, sta
166163

167164
if (missingStatutes.length > 0) {
168165
//console.log(existingStatutesFin)
169-
console.log('missingStatutes', missingStatutes, missingStatutes.length)
166+
//console.log('missingStatutes', missingStatutes, missingStatutes.length)
170167
}
171168

172169

@@ -209,7 +206,7 @@ async function dbIsUpToDate(startYear?: number): Promise<{upToDate: boolean, sta
209206
}
210207

211208
if (missingJudgments.length > 0) {
212-
console.log('missingJudgments', missingJudgments, missingJudgments.length)
209+
//console.log('missingJudgments', missingJudgments, missingJudgments.length)
213210
}
214211

215212
return missingJudgments;

backend/src/db/load.ts

Lines changed: 68 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,61 @@ import xmldom from '@xmldom/xmldom';
1515
import { JSDOM } from 'jsdom';
1616
import { XMLParser } from 'fast-xml-parser';
1717
import { getLatestStatuteVersions } from '../util/parse.js';
18+
import Bottleneck from 'bottleneck';
19+
20+
const finlexLimiter = new Bottleneck({
21+
minTime: 350,
22+
maxConcurrent: 1,
23+
reservoir: 200,
24+
reservoirRefreshInterval: 60 * 1000,
25+
reservoirRefreshAmount: 200,
26+
});
27+
28+
let finlexRequestCount = 0;
29+
let lastMinuteCount = 0;
30+
finlexLimiter.on('executing', () => {
31+
finlexRequestCount += 1;
32+
});
33+
34+
// Report request count every minute
35+
setInterval(() => {
36+
const requestsThisMinute = finlexRequestCount - lastMinuteCount;
37+
console.log(`[finlexLimiter] ${requestsThisMinute} requests in last minute (${finlexRequestCount} total)`);
38+
lastMinuteCount = finlexRequestCount;
39+
}, 60 * 1000);
40+
41+
// Generic fetch with exponential backoff and jitter, still honoring the limiter.
42+
async function fetchWithBackoff<T = unknown>(url: string, config: any, opts?: { maxRetries?: number; baseDelayMs?: number; maxDelayMs?: number; retryOn?: (status: number) => boolean }): Promise<AxiosResponse<T>> {
43+
const maxRetries = opts?.maxRetries ?? 5;
44+
const baseDelayMs = opts?.baseDelayMs ?? 500; // initial backoff
45+
const maxDelayMs = opts?.maxDelayMs ?? 8000; // cap
46+
const retryOn = opts?.retryOn ?? ((status) => status === 429 || (status >= 500 && status < 600));
47+
48+
let attempt = 0;
49+
while (true) {
50+
try {
51+
// Schedule on limiter to enforce rate limits
52+
const resp = await finlexLimiter.schedule(() => axios.get<T>(url, config));
53+
return resp;
54+
} catch (error) {
55+
if (!axios.isAxiosError(error)) throw error;
56+
const status = error.response?.status ?? 0;
57+
attempt += 1;
58+
if (attempt > maxRetries || !retryOn(status)) {
59+
throw error;
60+
}
61+
// Respect Retry-After header when present
62+
const retryAfterHeader = error.response?.headers?.['retry-after'];
63+
let delayMs = retryAfterHeader ? Number(retryAfterHeader) * 1000 : Math.min(maxDelayMs, baseDelayMs * Math.pow(2, attempt - 1));
64+
// Apply jitter (+/- 30%)
65+
const jitter = delayMs * (Math.random() * 0.6 - 0.3);
66+
delayMs = Math.max(250, delayMs + jitter);
67+
console.log(`[backoff] attempt ${attempt}/${maxRetries} status ${status}, delaying ${Math.round(delayMs)}ms for ${url}`);
68+
await new Promise(res => setTimeout(res, delayMs));
69+
// Loop and retry
70+
}
71+
}
72+
}
1873

1974

2075
function parseFinlexUrl(url: string): { docYear: number; docNumber: string; docLanguage: string; docVersion: string | null } {
@@ -254,7 +309,7 @@ function parseURLfromJudgmentID(judgmentID: string): string {
254309
}
255310

256311
async function parseAkomafromURL(inputURL: string, lang: string): Promise<{ content: string; is_empty: boolean, keywords: string[] }> {
257-
const result = await axios.get(inputURL, {
312+
const result = await fetchWithBackoff<string>(inputURL, {
258313
headers: { 'Accept': 'text/html', 'Accept-Encoding': 'gzip' }
259314
});
260315
const inputHTML = result.data as string;
@@ -304,10 +359,10 @@ async function setImages(statuteUuid: string, docYear: number, docNumber: string
304359
const path = `/akn/fi/act/statute-consolidated/${docYear}/${docNumber}/${language}@${version ?? ''}/${uri}`
305360
const url = `${baseURL}${path}`
306361
try {
307-
const result = await axios.get(url, {
362+
const result = await fetchWithBackoff<ArrayBuffer>(url, {
308363
headers: { 'Accept': 'image/*', 'Accept-Encoding': 'gzip' },
309364
responseType: 'arraybuffer'
310-
})
365+
});
311366

312367
const name = uri.split('/').pop()
313368
if (!name) {
@@ -319,7 +374,7 @@ async function setImages(statuteUuid: string, docYear: number, docNumber: string
319374
uuid: imageUuid,
320375
name: name,
321376
mime_type: result.headers['content-type'],
322-
content: result.data as Buffer,
377+
content: Buffer.from(result.data as ArrayBuffer),
323378
}
324379

325380
imageUuid = await setImage(image)
@@ -333,9 +388,9 @@ async function setImages(statuteUuid: string, docYear: number, docNumber: string
333388

334389
async function fetchStatute(uri: string) {
335390
try {
336-
const result = await axios.get(`${uri}`, {
391+
const result = await fetchWithBackoff<string>(`${uri}`, {
337392
headers: { 'Accept': 'application/xml', 'Accept-Encoding': 'gzip' }
338-
})
393+
});
339394
return result
340395
} catch {
341396
return null
@@ -462,13 +517,15 @@ async function listStatutesByYear(year: number, language: string): Promise<strin
462517

463518
try {
464519
while (true) {
465-
const result = await axios.get<StatuteVersionResponse[]>(`${baseURL}${path}`, {
520+
const result = await finlexLimiter.schedule(() => axios.get<StatuteVersionResponse[]>(`${baseURL}${path}`, {
466521
params: queryParams,
467522
headers: {
468523
Accept: 'application/json',
469524
'Accept-Encoding': 'gzip'
470525
}
471-
});
526+
}));
527+
// Optionally we could use fetchWithBackoff here as well, but since pagination drives many calls
528+
// and limiter already smooths throughput, keeping as-is to avoid excessive retries.
472529

473530
if (!Array.isArray(result.data)) {
474531
throw new Error('Invalid response format: expected an array');
@@ -487,8 +544,8 @@ async function listStatutesByYear(year: number, language: string): Promise<strin
487544
if (axios.isAxiosError(error)) {
488545
console.error(`Failed to fetch statute versions for year ${year}, type ${typeStatute}: ${error.message}`);
489546
if (error.response) {
490-
console.error('Response status:', error.response.status);
491-
console.error('Response data:', error.response.data);
547+
//console.error('Response status:', error.response.status);
548+
//console.error('Response data:', error.response.data);
492549
}
493550
} else {
494551
console.error(`Unexpected error while fetching statute versions: ${error}`);
@@ -519,7 +576,7 @@ async function listJudgmentNumbersByYear(year: number, language: string, level:
519576
: `https://finlex.fi/sv/rattspraxis/${courtLevel.sv}/prejudikat/${year}`;
520577
let parsedList: string[] = [];
521578
try {
522-
const result = await axios.get(inputUrl, {
579+
const result = await fetchWithBackoff<string>(inputUrl, {
523580
headers: { 'Accept': 'text/html', 'Accept-Encoding': 'gzip' }
524581
});
525582
const inputHTML = result.data as string;

backend/src/db/models/statute.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ export async function getStatuteCountByYear(year: number): Promise<number> {
2323
}
2424

2525
export async function getStatutesByYear(year: number, language: string): Promise<StatuteListItem[]> {
26-
console.log('getStatutesByYear', year)
2726
const sql = 'SELECT title as "docTitle", number as "docNumber", year as "docYear", is_empty as "isEmpty", version as "docVersion" FROM statutes WHERE year = $1 AND language = $2 ORDER BY is_empty ASC, number ASC';
2827
const result = await query(sql, [year, language]);
2928
return result.rows;

backend/src/dbSetup.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ async function initDatabase(startYear?: number) {
2424
console.log('[DB] Checking if database is up to date...');
2525
const { upToDate, statutes, judgments } = await dbIsUpToDate(from);
2626

27-
console.log('[DB] Up to date:', upToDate, 'missing statutes:', statutes.length, 'missing judgements:', judgments.length);
28-
2927
if (!upToDate) {
3028
console.log('[DB] Database is not up to date, filling database...');
3129
await fillDb(statutes, judgments);

0 commit comments

Comments
 (0)