From 3e2b46ef745044da7965eb94cb99c52dc2edf2b7 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 21 Jan 2025 17:21:01 +0100 Subject: [PATCH 01/48] Report stats about blocked and rate limited requests --- library/agent/InspectionStatistics.test.ts | 201 ++++++++++++++++++ library/agent/InspectionStatistics.ts | 65 +++++- library/agent/api/Event.ts | 8 + library/middleware/shouldBlockRequest.ts | 6 + library/sources/FunctionsFramework.test.ts | 24 +++ .../http-server/checkIfRequestIsBlocked.ts | 9 + .../http-server/createRequestListener.ts | 6 +- 7 files changed, 315 insertions(+), 4 deletions(-) diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index fc218b3a0..925c8e097 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -39,6 +39,14 @@ t.test("it resets stats", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -54,6 +62,14 @@ t.test("it resets stats", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -80,6 +96,14 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -112,6 +136,14 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -144,6 +176,14 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -170,6 +210,14 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -202,6 +250,14 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -234,6 +290,14 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -285,6 +349,14 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -332,6 +404,14 @@ t.test("it keeps track of requests", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -347,6 +427,14 @@ t.test("it keeps track of requests", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -363,6 +451,14 @@ t.test("it keeps track of requests", async () => { total: 1, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -379,6 +475,14 @@ t.test("it keeps track of requests", async () => { total: 2, blocked: 1, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -396,6 +500,14 @@ t.test("it keeps track of requests", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -420,6 +532,14 @@ t.test("it force compresses stats", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); @@ -462,6 +582,87 @@ t.test("it keeps track of aborted requests", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, + }, + }); + + clock.uninstall(); +}); + +t.test("it keeps track of blocked requests", async () => { + const clock = FakeTimers.install(); + + const stats = new InspectionStatistics({ + maxPerfSamplesInMemory: 50, + maxCompressedStatsInMemory: 5, + }); + + stats.onBlockedRequest({ reason: "ipBlocklist" }); + stats.onBlockedRequest({ reason: "userAgent" }); + stats.onBlockedRequest({ reason: "allowedIpsRoute" }); + stats.onBlockedRequest({ reason: "userBlock" }); + + t.same(stats.getStats(), { + sinks: {}, + startedAt: 0, + requests: { + total: 0, + aborted: 0, + attacksDetected: { + total: 0, + blocked: 0, + }, + blocked: { + total: 4, + allowedIpsRoute: 1, + userAgent: 1, + ipBlocklist: 1, + userBlocked: 1, + }, + rateLimited: 0, }, }); + + clock.uninstall(); +}); + +t.test("it keeps track of rate limited requests", async () => { + const clock = FakeTimers.install(); + + const stats = new InspectionStatistics({ + maxPerfSamplesInMemory: 50, + maxCompressedStatsInMemory: 5, + }); + + stats.onRateLimitedRequest(); + + t.same(stats.getStats(), { + sinks: {}, + startedAt: 0, + requests: { + total: 0, + aborted: 0, + attacksDetected: { + total: 0, + blocked: 0, + }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 1, + }, + }); + + clock.uninstall(); }); diff --git a/library/agent/InspectionStatistics.ts b/library/agent/InspectionStatistics.ts index ad2076bd5..b449a62e8 100644 --- a/library/agent/InspectionStatistics.ts +++ b/library/agent/InspectionStatistics.ts @@ -34,7 +34,27 @@ export class InspectionStatistics { total: number; blocked: number; }; - } = { total: 0, aborted: 0, attacksDetected: { total: 0, blocked: 0 } }; + blocked: { + total: number; + allowedIpsRoute: number; + userAgent: number; + ipBlocklist: number; + userBlocked: number; + }; + rateLimited: number; + } = { + total: 0, + aborted: 0, + attacksDetected: { total: 0, blocked: 0 }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, + }; constructor({ maxPerfSamplesInMemory, @@ -67,6 +87,14 @@ export class InspectionStatistics { total: 0, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }; this.startedAt = Date.now(); } @@ -81,6 +109,14 @@ export class InspectionStatistics { total: number; blocked: number; }; + blocked: { + total: number; + allowedIpsRoute: number; + userAgent: number; + ipBlocklist: number; + userBlocked: number; + }; + rateLimited: number; }; } { const sinks: Record = {}; @@ -176,6 +212,33 @@ export class InspectionStatistics { } } + onRateLimitedRequest() { + this.requests.rateLimited += 1; + } + + onBlockedRequest({ + reason, + }: { + reason: "allowedIpsRoute" | "userAgent" | "ipBlocklist" | "userBlock"; + }) { + this.requests.blocked.total += 1; + + switch (reason) { + case "allowedIpsRoute": + this.requests.blocked.allowedIpsRoute += 1; + break; + case "userAgent": + this.requests.blocked.userAgent += 1; + break; + case "ipBlocklist": + this.requests.blocked.ipBlocklist += 1; + break; + case "userBlock": + this.requests.blocked.userBlocked += 1; + break; + } + } + onAbortedRequest() { this.requests.aborted += 1; } diff --git a/library/agent/api/Event.ts b/library/agent/api/Event.ts index b2afd469f..0b98ac1f4 100644 --- a/library/agent/api/Event.ts +++ b/library/agent/api/Event.ts @@ -92,6 +92,14 @@ type Heartbeat = { total: number; blocked: number; }; + blocked: { + total: number; + allowedIpsRoute: number; + userAgent: number; + ipBlocklist: number; + userBlocked: number; + }; + rateLimited: number; }; }; hostnames: { hostname: string; port: number | undefined }[]; diff --git a/library/middleware/shouldBlockRequest.ts b/library/middleware/shouldBlockRequest.ts index 135020dea..23e731cbc 100644 --- a/library/middleware/shouldBlockRequest.ts +++ b/library/middleware/shouldBlockRequest.ts @@ -24,11 +24,17 @@ export function shouldBlockRequest(): Result { agent.onMiddlewareExecuted(); if (context.user && agent.getConfig().isUserBlocked(context.user.id)) { + agent.getInspectionStatistics().onBlockedRequest({ + reason: "userBlock", + }); + return { block: true, type: "blocked", trigger: "user" }; } const rateLimitResult = shouldRateLimitRequest(context, agent); if (rateLimitResult.block) { + agent.getInspectionStatistics().onRateLimitedRequest(); + return { block: true, type: "ratelimited", diff --git a/library/sources/FunctionsFramework.test.ts b/library/sources/FunctionsFramework.test.ts index ea5fb23c5..9cfce1a1b 100644 --- a/library/sources/FunctionsFramework.test.ts +++ b/library/sources/FunctionsFramework.test.ts @@ -91,6 +91,14 @@ t.test("it counts requests", async (t) => { total: 1, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }); }); @@ -107,6 +115,14 @@ t.test("it counts attacks", async (t) => { total: 1, aborted: 0, attacksDetected: { total: 1, blocked: 1 }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }); }); @@ -123,6 +139,14 @@ t.test("it counts request if error", async (t) => { total: 1, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }); }); diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index 4be81c7f8..a32da8f55 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -1,3 +1,4 @@ +/* eslint-disable max-lines-per-function */ import type { ServerResponse } from "http"; import { Agent } from "../../agent/Agent"; import { getContext } from "../../agent/Context"; @@ -40,6 +41,8 @@ export function checkIfRequestIsBlocked( res.end(message); + agent.getInspectionStatistics().onBlockedRequest({ reason: "ipBlocklist" }); + return true; } @@ -54,6 +57,10 @@ export function checkIfRequestIsBlocked( res.end(message); + agent + .getInspectionStatistics() + .onBlockedRequest({ reason: "allowedIpsRoute" }); + return true; } @@ -70,6 +77,8 @@ export function checkIfRequestIsBlocked( "You are not allowed to access this resource because you have been identified as a bot." ); + agent.getInspectionStatistics().onBlockedRequest({ reason: "userAgent" }); + return true; } diff --git a/library/sources/http-server/createRequestListener.ts b/library/sources/http-server/createRequestListener.ts index 4f74448a8..4c95569ef 100644 --- a/library/sources/http-server/createRequestListener.ts +++ b/library/sources/http-server/createRequestListener.ts @@ -75,19 +75,19 @@ function callListenerWithContext( const countedRequest = Symbol("__zen_request_counted__"); function createOnFinishRequestHandler( - req: IncomingMessage, + req: IncomingMessage & { [countedRequest]?: boolean }, res: ServerResponse, agent: Agent ) { return function onFinishRequest() { - if ((req as any)[countedRequest]) { + if (req[countedRequest]) { // The request has already been counted // This might happen if the server has multiple listeners return; } // Mark the request as counted - (req as any)[countedRequest] = true; + req[countedRequest] = true; const context = getContext(); From 50d6478e90c2a09c2632d59e24cfefb8391c5723 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 21 Jan 2025 17:25:06 +0100 Subject: [PATCH 02/48] Fix types --- .../agent/api/ReportingAPIRateLimitedClientSide.test.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts index abce879ad..b6e9d0bca 100644 --- a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts +++ b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts @@ -154,6 +154,14 @@ function generateHeartbeatEvent(): Event { blocked: 0, total: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }, agent: { From 1598c09eb9a0495844295ba443b30e00c9fdcc56 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Wed, 22 Jan 2025 10:28:04 +0100 Subject: [PATCH 03/48] Fix unit test --- library/sources/Lambda.test.ts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/library/sources/Lambda.test.ts b/library/sources/Lambda.test.ts index becc6370a..c5d6f0a13 100644 --- a/library/sources/Lambda.test.ts +++ b/library/sources/Lambda.test.ts @@ -303,6 +303,14 @@ t.test("it sends heartbeat after first and every 10 minutes", async () => { total: 0, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }, middlewareInstalled: false, @@ -460,6 +468,14 @@ t.test("it counts attacks", async () => { total: 1, blocked: 0, }, + blocked: { + total: 0, + allowedIpsRoute: 0, + userAgent: 0, + ipBlocklist: 0, + userBlocked: 0, + }, + rateLimited: 0, }, }); }); From 422a57bb57ab8c37ddb64e62232db92723eae591 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Fri, 7 Feb 2025 10:30:29 +0100 Subject: [PATCH 04/48] Improve block request stats object --- library/agent/InspectionStatistics.test.ts | 168 +++++------------- library/agent/InspectionStatistics.ts | 72 ++++---- library/agent/api/Event.ts | 7 +- .../ReportingAPIRateLimitedClientSide.test.ts | 7 +- library/middleware/shouldBlockRequest.ts | 6 - library/sources/FunctionsFramework.test.ts | 21 +-- library/sources/Lambda.test.ts | 14 +- .../http-server/checkIfRequestIsBlocked.ts | 12 +- 8 files changed, 97 insertions(+), 210 deletions(-) diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index 925c8e097..ae80fb1ae 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -41,12 +41,9 @@ t.test("it resets stats", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -64,12 +61,9 @@ t.test("it resets stats", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -98,12 +92,9 @@ t.test("it keeps track of amount of calls", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -138,12 +129,9 @@ t.test("it keeps track of amount of calls", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -178,12 +166,9 @@ t.test("it keeps track of amount of calls", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -212,12 +197,9 @@ t.test("it keeps track of amount of calls", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -252,12 +234,9 @@ t.test("it keeps track of amount of calls", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -292,12 +271,9 @@ t.test("it keeps track of amount of calls", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -351,12 +327,9 @@ t.test("it keeps track of amount of calls", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -406,12 +379,9 @@ t.test("it keeps track of requests", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -429,12 +399,9 @@ t.test("it keeps track of requests", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -453,12 +420,9 @@ t.test("it keeps track of requests", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -477,12 +441,9 @@ t.test("it keeps track of requests", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -502,12 +463,9 @@ t.test("it keeps track of requests", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -534,12 +492,9 @@ t.test("it force compresses stats", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -584,12 +539,9 @@ t.test("it keeps track of aborted requests", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); @@ -604,44 +556,11 @@ t.test("it keeps track of blocked requests", async () => { maxCompressedStatsInMemory: 5, }); - stats.onBlockedRequest({ reason: "ipBlocklist" }); - stats.onBlockedRequest({ reason: "userAgent" }); - stats.onBlockedRequest({ reason: "allowedIpsRoute" }); - stats.onBlockedRequest({ reason: "userBlock" }); - - t.same(stats.getStats(), { - sinks: {}, - startedAt: 0, - requests: { - total: 0, - aborted: 0, - attacksDetected: { - total: 0, - blocked: 0, - }, - blocked: { - total: 4, - allowedIpsRoute: 1, - userAgent: 1, - ipBlocklist: 1, - userBlocked: 1, - }, - rateLimited: 0, - }, + stats.onBlockedRequest({ + match: "ipBlocklist", + key: "known_threat_actors/public_scanners", }); - - clock.uninstall(); -}); - -t.test("it keeps track of rate limited requests", async () => { - const clock = FakeTimers.install(); - - const stats = new InspectionStatistics({ - maxPerfSamplesInMemory: 50, - maxCompressedStatsInMemory: 5, - }); - - stats.onRateLimitedRequest(); + stats.onBlockedRequest({ match: "userAgentList", key: "ai_data_scrapers" }); t.same(stats.getStats(), { sinks: {}, @@ -654,13 +573,16 @@ t.test("it keeps track of rate limited requests", async () => { blocked: 0, }, blocked: { - total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + total: 2, + ipBlocklist: { + // eslint-disable-next-line camelcase + known_threat_actors_public_scanners: 1, + }, + userAgentList: { + // eslint-disable-next-line camelcase + ai_data_scrapers: 1, + }, }, - rateLimited: 1, }, }); diff --git a/library/agent/InspectionStatistics.ts b/library/agent/InspectionStatistics.ts index b449a62e8..7f38b7cb2 100644 --- a/library/agent/InspectionStatistics.ts +++ b/library/agent/InspectionStatistics.ts @@ -22,6 +22,16 @@ type SinkStats = { type SinkStatsWithoutTimings = Omit; +type RequestBlocked = + | { + match: "userAgentList"; + key: string; + } + | { + match: "ipBlocklist"; + key: string; + }; + export class InspectionStatistics { private startedAt = Date.now(); private stats: Record = {}; @@ -36,24 +46,18 @@ export class InspectionStatistics { }; blocked: { total: number; - allowedIpsRoute: number; - userAgent: number; - ipBlocklist: number; - userBlocked: number; + userAgentList: Record; + ipBlocklist: Record; }; - rateLimited: number; } = { total: 0, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }; constructor({ @@ -89,12 +93,9 @@ export class InspectionStatistics { attacksDetected: { total: 0, blocked: 0 }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }; this.startedAt = Date.now(); } @@ -111,12 +112,9 @@ export class InspectionStatistics { }; blocked: { total: number; - allowedIpsRoute: number; - userAgent: number; - ipBlocklist: number; - userBlocked: number; + userAgentList: Record; + ipBlocklist: Record; }; - rateLimited: number; }; } { const sinks: Record = {}; @@ -212,30 +210,24 @@ export class InspectionStatistics { } } - onRateLimitedRequest() { - this.requests.rateLimited += 1; - } - - onBlockedRequest({ - reason, - }: { - reason: "allowedIpsRoute" | "userAgent" | "ipBlocklist" | "userBlock"; - }) { + onBlockedRequest({ match, key }: RequestBlocked) { this.requests.blocked.total += 1; - switch (reason) { - case "allowedIpsRoute": - this.requests.blocked.allowedIpsRoute += 1; - break; - case "userAgent": - this.requests.blocked.userAgent += 1; - break; - case "ipBlocklist": - this.requests.blocked.ipBlocklist += 1; + switch (match) { + case "userAgentList": { + if (!this.requests.blocked.userAgentList[key]) { + this.requests.blocked.userAgentList[key] = 0; + } + this.requests.blocked.userAgentList[key] += 1; break; - case "userBlock": - this.requests.blocked.userBlocked += 1; + } + case "ipBlocklist": { + if (!this.requests.blocked.ipBlocklist[key]) { + this.requests.blocked.ipBlocklist[key] = 0; + } + this.requests.blocked.ipBlocklist[key] += 1; break; + } } } diff --git a/library/agent/api/Event.ts b/library/agent/api/Event.ts index 0b98ac1f4..aa04222ee 100644 --- a/library/agent/api/Event.ts +++ b/library/agent/api/Event.ts @@ -94,12 +94,9 @@ type Heartbeat = { }; blocked: { total: number; - allowedIpsRoute: number; - userAgent: number; - ipBlocklist: number; - userBlocked: number; + userAgentList: Record; + ipBlocklist: Record; }; - rateLimited: number; }; }; hostnames: { hostname: string; port: number | undefined }[]; diff --git a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts index b6e9d0bca..e45d61cdb 100644 --- a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts +++ b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts @@ -156,12 +156,9 @@ function generateHeartbeatEvent(): Event { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }, agent: { diff --git a/library/middleware/shouldBlockRequest.ts b/library/middleware/shouldBlockRequest.ts index 23e731cbc..135020dea 100644 --- a/library/middleware/shouldBlockRequest.ts +++ b/library/middleware/shouldBlockRequest.ts @@ -24,17 +24,11 @@ export function shouldBlockRequest(): Result { agent.onMiddlewareExecuted(); if (context.user && agent.getConfig().isUserBlocked(context.user.id)) { - agent.getInspectionStatistics().onBlockedRequest({ - reason: "userBlock", - }); - return { block: true, type: "blocked", trigger: "user" }; } const rateLimitResult = shouldRateLimitRequest(context, agent); if (rateLimitResult.block) { - agent.getInspectionStatistics().onRateLimitedRequest(); - return { block: true, type: "ratelimited", diff --git a/library/sources/FunctionsFramework.test.ts b/library/sources/FunctionsFramework.test.ts index 9cfce1a1b..4a3c13757 100644 --- a/library/sources/FunctionsFramework.test.ts +++ b/library/sources/FunctionsFramework.test.ts @@ -93,12 +93,9 @@ t.test("it counts requests", async (t) => { attacksDetected: { total: 0, blocked: 0 }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }); }); @@ -117,12 +114,9 @@ t.test("it counts attacks", async (t) => { attacksDetected: { total: 1, blocked: 1 }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }); }); @@ -141,12 +135,9 @@ t.test("it counts request if error", async (t) => { attacksDetected: { total: 0, blocked: 0 }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }); }); diff --git a/library/sources/Lambda.test.ts b/library/sources/Lambda.test.ts index c5d6f0a13..2b3dfa5c9 100644 --- a/library/sources/Lambda.test.ts +++ b/library/sources/Lambda.test.ts @@ -305,12 +305,9 @@ t.test("it sends heartbeat after first and every 10 minutes", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }, middlewareInstalled: false, @@ -470,12 +467,9 @@ t.test("it counts attacks", async () => { }, blocked: { total: 0, - allowedIpsRoute: 0, - userAgent: 0, - ipBlocklist: 0, - userBlocked: 0, + userAgentList: {}, + ipBlocklist: {}, }, - rateLimited: 0, }, }); }); diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index a32da8f55..4fe13bfb7 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -41,7 +41,9 @@ export function checkIfRequestIsBlocked( res.end(message); - agent.getInspectionStatistics().onBlockedRequest({ reason: "ipBlocklist" }); + agent + .getInspectionStatistics() + .onBlockedRequest({ match: "ipBlocklist", key: "TODO" }); return true; } @@ -57,10 +59,6 @@ export function checkIfRequestIsBlocked( res.end(message); - agent - .getInspectionStatistics() - .onBlockedRequest({ reason: "allowedIpsRoute" }); - return true; } @@ -77,7 +75,9 @@ export function checkIfRequestIsBlocked( "You are not allowed to access this resource because you have been identified as a bot." ); - agent.getInspectionStatistics().onBlockedRequest({ reason: "userAgent" }); + agent + .getInspectionStatistics() + .onBlockedRequest({ match: "userAgentList", key: "TODO" }); return true; } From d66f58d58f00bd13eb5f0061ea4e3b70f874266f Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Fri, 7 Feb 2025 11:05:26 +0100 Subject: [PATCH 05/48] Add key of matching IP/User agent list --- end2end/tests/hono-xml-blocklists.test.js | 7 ++- library/agent/Agent.test.ts | 16 ++++- library/agent/InspectionStatistics.test.ts | 2 +- library/agent/ServiceConfig.test.ts | 26 +++++++-- library/agent/ServiceConfig.ts | 58 ++++++++++++++----- library/agent/api/fetchBlockedLists.ts | 17 ++++-- library/sources/HTTPServer.test.ts | 7 ++- library/sources/Hono.test.ts | 12 +++- .../http-server/checkIfRequestIsBlocked.ts | 7 ++- 9 files changed, 118 insertions(+), 34 deletions(-) diff --git a/end2end/tests/hono-xml-blocklists.test.js b/end2end/tests/hono-xml-blocklists.test.js index c93df2143..ee378b77d 100644 --- a/end2end/tests/hono-xml-blocklists.test.js +++ b/end2end/tests/hono-xml-blocklists.test.js @@ -25,7 +25,12 @@ t.beforeEach(async () => { }, body: JSON.stringify({ blockedIPAddresses: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], - blockedUserAgents: "hacker|attacker|GPTBot", + blockedUserAgentsV2: [ + { + key: "some/key", + pattern: "hacker|attacker|GPTBot", + }, + ], }), } ); diff --git a/library/agent/Agent.test.ts b/library/agent/Agent.test.ts index 9e1c81f8e..496ee433b 100644 --- a/library/agent/Agent.test.ts +++ b/library/agent/Agent.test.ts @@ -26,12 +26,22 @@ wrap(fetch, "fetch", function mock() { body: JSON.stringify({ blockedIPAddresses: [ { + key: "some/key", source: "name", description: "Description", ips: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], }, ], - blockedUserAgents: "AI2Bot|Bytespider", + blockedUserAgentsV2: [ + { + key: "ai", + pattern: "AI2Bot|SomethingElse", + }, + { + key: "spider", + pattern: "Bytespider", + }, + ], }), }; }; @@ -1056,10 +1066,12 @@ t.test("it fetches blocked lists", async () => { t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), { blocked: true, reason: "Description", + key: "some/key", }); t.same(agent.getConfig().isIPAddressBlocked("fe80::1234:5678:abcd:ef12"), { blocked: true, reason: "Description", + key: "some/key", }); t.same( @@ -1069,6 +1081,7 @@ t.test("it fetches blocked lists", async () => { "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)" ), { + key: "ai", blocked: true, } ); @@ -1076,6 +1089,7 @@ t.test("it fetches blocked lists", async () => { t.same( agent.getConfig().isUserAgentBlocked("Mozilla/5.0 (compatible) Bytespider"), { + key: "spider", blocked: true, } ); diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index ae80fb1ae..d2d531f9d 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -576,7 +576,7 @@ t.test("it keeps track of blocked requests", async () => { total: 2, ipBlocklist: { // eslint-disable-next-line camelcase - known_threat_actors_public_scanners: 1, + "known_threat_actors/public_scanners": 1, }, userAgentList: { // eslint-disable-next-line camelcase diff --git a/library/agent/ServiceConfig.test.ts b/library/agent/ServiceConfig.test.ts index b2fc4abaf..b7377589a 100644 --- a/library/agent/ServiceConfig.test.ts +++ b/library/agent/ServiceConfig.test.ts @@ -89,6 +89,7 @@ t.test("it checks if IP is allowed", async () => { t.test("ip blocking works", async () => { const config = new ServiceConfig([], 0, [], [], false, [ { + key: "geoip/Belgium;BE", source: "geoip", description: "description", ips: [ @@ -103,15 +104,18 @@ t.test("ip blocking works", async () => { t.same(config.isIPAddressBlocked("1.2.3.4"), { blocked: true, reason: "description", + key: "geoip/Belgium;BE", }); t.same(config.isIPAddressBlocked("2.3.4.5"), { blocked: false }); t.same(config.isIPAddressBlocked("192.168.2.2"), { blocked: true, reason: "description", + key: "geoip/Belgium;BE", }); t.same(config.isIPAddressBlocked("fd00:1234:5678:9abc::1"), { blocked: true, reason: "description", + key: "geoip/Belgium;BE", }); t.same(config.isIPAddressBlocked("fd00:1234:5678:9abc::2"), { blocked: false, @@ -119,27 +123,41 @@ t.test("ip blocking works", async () => { t.same(config.isIPAddressBlocked("fd00:3234:5678:9abc::1"), { blocked: true, reason: "description", + key: "geoip/Belgium;BE", }); t.same(config.isIPAddressBlocked("fd00:3234:5678:9abc::2"), { blocked: true, reason: "description", + key: "geoip/Belgium;BE", }); t.same(config.isIPAddressBlocked("5.6.7.8"), { blocked: true, reason: "description", + key: "geoip/Belgium;BE", }); t.same(config.isIPAddressBlocked("1.2"), { blocked: false }); }); t.test("it blocks bots", async () => { const config = new ServiceConfig([], 0, [], [], true, []); - config.updateBlockedUserAgents("googlebot|bingbot"); + config.updateBlockedUserAgents([ + { + key: "search", + pattern: "googlebot|bingbot", + }, + ]); - t.same(config.isUserAgentBlocked("googlebot"), { blocked: true }); - t.same(config.isUserAgentBlocked("123 bingbot abc"), { blocked: true }); + t.same(config.isUserAgentBlocked("googlebot"), { + blocked: true, + key: "search", + }); + t.same(config.isUserAgentBlocked("123 bingbot abc"), { + blocked: true, + key: "search", + }); t.same(config.isUserAgentBlocked("bing"), { blocked: false }); - config.updateBlockedUserAgents(""); + config.updateBlockedUserAgents([]); t.same(config.isUserAgentBlocked("googlebot"), { blocked: false }); }); diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index 7d15e2306..61c978dbf 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -1,16 +1,25 @@ import { IPMatcher } from "../helpers/ip-matcher/IPMatcher"; import { LimitedContext, matchEndpoints } from "../helpers/matchEndpoints"; import { Endpoint } from "./Config"; -import { Blocklist as BlocklistType } from "./api/fetchBlockedLists"; +import { + Blocklist as BlocklistType, + AgentBlockList, +} from "./api/fetchBlockedLists"; export class ServiceConfig { private blockedUserIds: Map = new Map(); private allowedIPAddresses: Map = new Map(); private nonGraphQLEndpoints: Endpoint[] = []; private graphqlFields: Endpoint[] = []; - private blockedIPAddresses: { blocklist: IPMatcher; description: string }[] = - []; - private blockedUserAgentRegex: RegExp | undefined; + private blockedIPAddresses: { + key: string; + blocklist: IPMatcher; + description: string; + }[] = []; + private blockedUserAgentRegex: { + key: string; + pattern: RegExp; + }[] = []; constructor( endpoints: Endpoint[], @@ -84,13 +93,17 @@ export class ServiceConfig { isIPAddressBlocked( ip: string - ): { blocked: true; reason: string } | { blocked: false } { + ): { blocked: true; reason: string; key: string } | { blocked: false } { const blocklist = this.blockedIPAddresses.find((blocklist) => blocklist.blocklist.has(ip) ); if (blocklist) { - return { blocked: true, reason: blocklist.description }; + return { + blocked: true, + reason: blocklist.description, + key: blocklist.key, + }; } return { blocked: false }; @@ -101,6 +114,7 @@ export class ServiceConfig { for (const source of blockedIPAddresses) { this.blockedIPAddresses.push({ + key: source.key, blocklist: new IPMatcher(source.ips), description: source.description, }); @@ -111,18 +125,32 @@ export class ServiceConfig { this.setBlockedIPAddresses(blockedIPAddresses); } - updateBlockedUserAgents(blockedUserAgents: string) { - if (!blockedUserAgents) { - this.blockedUserAgentRegex = undefined; - return; - } - this.blockedUserAgentRegex = new RegExp(blockedUserAgents, "i"); + private setBlockedUserAgents(blockedUserAgents: AgentBlockList[]) { + this.blockedUserAgentRegex = blockedUserAgents + .filter( + (list) => typeof list.pattern === "string" && list.pattern.length > 0 + ) + .map((list) => { + return { + key: list.key, + pattern: new RegExp(list.pattern, "i"), + }; + }); } - isUserAgentBlocked(ua: string): { blocked: boolean } { - if (this.blockedUserAgentRegex) { - return { blocked: this.blockedUserAgentRegex.test(ua) }; + updateBlockedUserAgents(blockedUserAgents: AgentBlockList[]) { + this.setBlockedUserAgents(blockedUserAgents); + } + + isUserAgentBlocked( + ua: string + ): { blocked: false } | { blocked: true; key: string } { + for (const blocklist of this.blockedUserAgentRegex) { + if (blocklist.pattern.test(ua)) { + return { blocked: true, key: blocklist.key }; + } } + return { blocked: false }; } diff --git a/library/agent/api/fetchBlockedLists.ts b/library/agent/api/fetchBlockedLists.ts index 73fcfd390..e34b120cb 100644 --- a/library/agent/api/fetchBlockedLists.ts +++ b/library/agent/api/fetchBlockedLists.ts @@ -3,14 +3,20 @@ import { getAPIURL } from "../getAPIURL"; import { Token } from "./Token"; export type Blocklist = { + key: string; source: string; description: string; ips: string[]; }; +export type AgentBlockList = { + key: string; + pattern: string; // e.g. "Googlebot|Bingbot" +}; + export async function fetchBlockedLists(token: Token): Promise<{ blockedIPAddresses: Blocklist[]; - blockedUserAgents: string; + blockedUserAgents: AgentBlockList[]; }> { const baseUrl = getAPIURL(); const { body, statusCode } = await fetch({ @@ -30,7 +36,7 @@ export async function fetchBlockedLists(token: Token): Promise<{ const result: { blockedIPAddresses: Blocklist[]; - blockedUserAgents: string; + blockedUserAgentsV2: string; } = JSON.parse(body); return { @@ -38,10 +44,9 @@ export async function fetchBlockedLists(token: Token): Promise<{ result && Array.isArray(result.blockedIPAddresses) ? result.blockedIPAddresses : [], - // Blocked user agents are stored as a string pattern for usage in a regex (e.g. "Googlebot|Bingbot") blockedUserAgents: - result && typeof result.blockedUserAgents === "string" - ? result.blockedUserAgents - : "", + result && Array.isArray(result.blockedUserAgentsV2) + ? result.blockedUserAgentsV2 + : [], }; } diff --git a/library/sources/HTTPServer.test.ts b/library/sources/HTTPServer.test.ts index 743728010..6c3dda8ed 100644 --- a/library/sources/HTTPServer.test.ts +++ b/library/sources/HTTPServer.test.ts @@ -8,7 +8,7 @@ import { wrap } from "../helpers/wrap"; import { HTTPServer } from "./HTTPServer"; import { join } from "path"; import { createTestAgent } from "../helpers/createTestAgent"; -import type { Blocklist } from "../agent/api/fetchBlockedLists"; +import type { Blocklist, AgentBlockList } from "../agent/api/fetchBlockedLists"; import * as fetchBlockedLists from "../agent/api/fetchBlockedLists"; import { mkdtemp, writeFile, unlink } from "fs/promises"; import { exec } from "child_process"; @@ -53,17 +53,18 @@ agent.start([new HTTPServer()]); wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() { return async function fetchBlockedLists(): Promise<{ blockedIPAddresses: Blocklist[]; - blockedUserAgents: string; + blockedUserAgentsV2: AgentBlockList[]; }> { return { blockedIPAddresses: [ { + key: "geoip/Belgium;BE", source: "geoip", ips: ["9.9.9.9"], description: "geo restrictions", }, ], - blockedUserAgents: "", + blockedUserAgentsV2: [], }; }; }); diff --git a/library/sources/Hono.test.ts b/library/sources/Hono.test.ts index 015d1d7bd..a4bede436 100644 --- a/library/sources/Hono.test.ts +++ b/library/sources/Hono.test.ts @@ -1,5 +1,6 @@ /* eslint-disable prefer-rest-params */ import * as t from "tap"; +import type { Blocklist, AgentBlockList } from "../agent/api/fetchBlockedLists"; import { ReportingAPIForTesting } from "../agent/api/ReportingAPIForTesting"; import { Token } from "../agent/api/Token"; import { setUser } from "../agent/context/user"; @@ -25,12 +26,21 @@ wrap(fetch, "fetch", function mock(original) { body: JSON.stringify({ blockedIPAddresses: [ { + key: "geoip/Belgium;BE", source: "geoip", description: "geo restrictions", ips: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], }, ], - blockedUserAgents: "hacker|attacker", + blockedUserAgentsV2: [ + { + key: "key", + pattern: "hacker|attacker", + }, + ], + } satisfies { + blockedIPAddresses: Blocklist[]; + blockedUserAgentsV2: AgentBlockList[]; }), }; } diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index 4fe13bfb7..2e6668191 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -43,7 +43,7 @@ export function checkIfRequestIsBlocked( agent .getInspectionStatistics() - .onBlockedRequest({ match: "ipBlocklist", key: "TODO" }); + .onBlockedRequest({ match: "ipBlocklist", key: result.key }); return true; } @@ -77,7 +77,10 @@ export function checkIfRequestIsBlocked( agent .getInspectionStatistics() - .onBlockedRequest({ match: "userAgentList", key: "TODO" }); + .onBlockedRequest({ + match: "userAgentList", + key: isUserAgentBlocked.key, + }); return true; } From c6814e703eb5dba28d525dbe243eca94a8c8e187 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Fri, 7 Feb 2025 11:17:24 +0100 Subject: [PATCH 06/48] Update mock server --- end2end/server/src/handlers/lists.js | 4 +++- end2end/server/src/handlers/updateLists.js | 6 +++--- library/agent/Agent.test.ts | 3 ++- library/agent/ServiceConfig.ts | 2 +- library/agent/api/fetchBlockedLists.ts | 14 ++++++++------ library/sources/HTTPServer.test.ts | 7 ++----- library/sources/Hono.test.ts | 7 ++----- 7 files changed, 21 insertions(+), 22 deletions(-) diff --git a/end2end/server/src/handlers/lists.js b/end2end/server/src/handlers/lists.js index 9334a1f2e..23c2d2321 100644 --- a/end2end/server/src/handlers/lists.js +++ b/end2end/server/src/handlers/lists.js @@ -24,6 +24,8 @@ module.exports = function lists(req, res) { }, ] : [], - blockedUserAgents: blockedUserAgents, + blockedUserAgentsV2: Array.isArray(blockedUserAgents) + ? blockedUserAgents + : [], }); }; diff --git a/end2end/server/src/handlers/updateLists.js b/end2end/server/src/handlers/updateLists.js index e0c7d9080..290f9880f 100644 --- a/end2end/server/src/handlers/updateLists.js +++ b/end2end/server/src/handlers/updateLists.js @@ -32,10 +32,10 @@ module.exports = function updateIPLists(req, res) { updateBlockedIPAddresses(req.app, req.body.blockedIPAddresses); if ( - req.body.blockedUserAgents && - typeof req.body.blockedUserAgents === "string" + req.body.blockedUserAgentsV2 && + typeof req.body.blockedUserAgentsV2 === "string" ) { - updateBlockedUserAgents(req.app, req.body.blockedUserAgents); + updateBlockedUserAgents(req.app, req.body.blockedUserAgentsV2); } res.json({ success: true }); diff --git a/library/agent/Agent.test.ts b/library/agent/Agent.test.ts index 496ee433b..f4c3ab81f 100644 --- a/library/agent/Agent.test.ts +++ b/library/agent/Agent.test.ts @@ -18,6 +18,7 @@ import { Wrapper } from "./Wrapper"; import { Context } from "./Context"; import { createTestAgent } from "../helpers/createTestAgent"; import { setTimeout } from "node:timers/promises"; +import type { Response } from "./api/fetchBlockedLists"; wrap(fetch, "fetch", function mock() { return async function mock() { @@ -42,7 +43,7 @@ wrap(fetch, "fetch", function mock() { pattern: "Bytespider", }, ], - }), + } satisfies Response), }; }; }); diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index 61c978dbf..5a9921053 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -2,7 +2,7 @@ import { IPMatcher } from "../helpers/ip-matcher/IPMatcher"; import { LimitedContext, matchEndpoints } from "../helpers/matchEndpoints"; import { Endpoint } from "./Config"; import { - Blocklist as BlocklistType, + IPBlocklist as BlocklistType, AgentBlockList, } from "./api/fetchBlockedLists"; diff --git a/library/agent/api/fetchBlockedLists.ts b/library/agent/api/fetchBlockedLists.ts index e34b120cb..d725b6b74 100644 --- a/library/agent/api/fetchBlockedLists.ts +++ b/library/agent/api/fetchBlockedLists.ts @@ -2,7 +2,7 @@ import { fetch } from "../../helpers/fetch"; import { getAPIURL } from "../getAPIURL"; import { Token } from "./Token"; -export type Blocklist = { +export type IPBlocklist = { key: string; source: string; description: string; @@ -14,8 +14,13 @@ export type AgentBlockList = { pattern: string; // e.g. "Googlebot|Bingbot" }; +export type Response = { + blockedIPAddresses: IPBlocklist[]; + blockedUserAgentsV2: AgentBlockList[]; +}; + export async function fetchBlockedLists(token: Token): Promise<{ - blockedIPAddresses: Blocklist[]; + blockedIPAddresses: IPBlocklist[]; blockedUserAgents: AgentBlockList[]; }> { const baseUrl = getAPIURL(); @@ -34,10 +39,7 @@ export async function fetchBlockedLists(token: Token): Promise<{ throw new Error(`Failed to fetch blocked lists: ${statusCode}`); } - const result: { - blockedIPAddresses: Blocklist[]; - blockedUserAgentsV2: string; - } = JSON.parse(body); + const result: Response = JSON.parse(body); return { blockedIPAddresses: diff --git a/library/sources/HTTPServer.test.ts b/library/sources/HTTPServer.test.ts index 6c3dda8ed..a376e9a79 100644 --- a/library/sources/HTTPServer.test.ts +++ b/library/sources/HTTPServer.test.ts @@ -8,7 +8,7 @@ import { wrap } from "../helpers/wrap"; import { HTTPServer } from "./HTTPServer"; import { join } from "path"; import { createTestAgent } from "../helpers/createTestAgent"; -import type { Blocklist, AgentBlockList } from "../agent/api/fetchBlockedLists"; +import type { Response } from "../agent/api/fetchBlockedLists"; import * as fetchBlockedLists from "../agent/api/fetchBlockedLists"; import { mkdtemp, writeFile, unlink } from "fs/promises"; import { exec } from "child_process"; @@ -51,10 +51,7 @@ const agent = createTestAgent({ agent.start([new HTTPServer()]); wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() { - return async function fetchBlockedLists(): Promise<{ - blockedIPAddresses: Blocklist[]; - blockedUserAgentsV2: AgentBlockList[]; - }> { + return async function fetchBlockedLists(): Promise { return { blockedIPAddresses: [ { diff --git a/library/sources/Hono.test.ts b/library/sources/Hono.test.ts index a4bede436..26c50091b 100644 --- a/library/sources/Hono.test.ts +++ b/library/sources/Hono.test.ts @@ -1,6 +1,6 @@ /* eslint-disable prefer-rest-params */ import * as t from "tap"; -import type { Blocklist, AgentBlockList } from "../agent/api/fetchBlockedLists"; +import type { Response } from "../agent/api/fetchBlockedLists"; import { ReportingAPIForTesting } from "../agent/api/ReportingAPIForTesting"; import { Token } from "../agent/api/Token"; import { setUser } from "../agent/context/user"; @@ -38,10 +38,7 @@ wrap(fetch, "fetch", function mock(original) { pattern: "hacker|attacker", }, ], - } satisfies { - blockedIPAddresses: Blocklist[]; - blockedUserAgentsV2: AgentBlockList[]; - }), + } satisfies Response), }; } From 5d0c34e389c95764b1adec53416e64a7721e5b1f Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Fri, 7 Feb 2025 11:36:05 +0100 Subject: [PATCH 07/48] Fix linting --- library/sources/http-server/checkIfRequestIsBlocked.ts | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index 2e6668191..0de740c90 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -75,12 +75,10 @@ export function checkIfRequestIsBlocked( "You are not allowed to access this resource because you have been identified as a bot." ); - agent - .getInspectionStatistics() - .onBlockedRequest({ - match: "userAgentList", - key: isUserAgentBlocked.key, - }); + agent.getInspectionStatistics().onBlockedRequest({ + match: "userAgentList", + key: isUserAgentBlocked.key, + }); return true; } From 1062e5d23e9512dafc2eb0d090eb78e38be22360 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Fri, 7 Feb 2025 11:45:14 +0100 Subject: [PATCH 08/48] Fixes --- end2end/tests/hono-xml-blocklists.test.js | 10 +++++----- library/sources/http-server/checkIfRequestIsBlocked.ts | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/end2end/tests/hono-xml-blocklists.test.js b/end2end/tests/hono-xml-blocklists.test.js index 03f9dac9d..446a9257e 100644 --- a/end2end/tests/hono-xml-blocklists.test.js +++ b/end2end/tests/hono-xml-blocklists.test.js @@ -46,11 +46,11 @@ t.beforeEach(async () => { body: JSON.stringify({ blockedIPAddresses: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], blockedUserAgentsV2: [ - { - key: "some/key", - pattern: "hacker|attacker|GPTBot", - }, - ], + { + key: "some/key", + pattern: "hacker|attacker|GPTBot", + }, + ], }), }); t.same(lists.status, 200); diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index a32b6b65b..aa3a5e1bd 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -37,10 +37,6 @@ export function checkIfRequestIsBlocked( res.end(message); - agent - .getInspectionStatistics() - .onBlockedRequest({ match: "ipBlocklist", key: result.key }); - return true; } @@ -67,6 +63,10 @@ export function checkIfRequestIsBlocked( res.end(message); + agent + .getInspectionStatistics() + .onBlockedRequest({ match: "ipBlocklist", key: result.key }); + return true; } From b83646585432906c187d2c825c112e318541b90b Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Fri, 7 Feb 2025 11:59:57 +0100 Subject: [PATCH 09/48] Fix end2end test --- end2end/server/src/handlers/updateLists.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/end2end/server/src/handlers/updateLists.js b/end2end/server/src/handlers/updateLists.js index 290f9880f..c0935ff5f 100644 --- a/end2end/server/src/handlers/updateLists.js +++ b/end2end/server/src/handlers/updateLists.js @@ -33,7 +33,7 @@ module.exports = function updateIPLists(req, res) { if ( req.body.blockedUserAgentsV2 && - typeof req.body.blockedUserAgentsV2 === "string" + Array.isArray(req.body.blockedUserAgentsV2) ) { updateBlockedUserAgents(req.app, req.body.blockedUserAgentsV2); } From b94d1b8fff4276d2e5af0fb3448f54d20f1e971f Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Mon, 17 Feb 2025 15:02:52 +0100 Subject: [PATCH 10/48] Improve diff --- library/agent/Agent.test.ts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/library/agent/Agent.test.ts b/library/agent/Agent.test.ts index 3198deede..b660e1674 100644 --- a/library/agent/Agent.test.ts +++ b/library/agent/Agent.test.ts @@ -35,16 +35,6 @@ wrap(fetch, "fetch", function mock() { ips: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], }, ], - allowedIPAddresses: shouldOnlyAllowSomeIPAddresses - ? [ - { - key: "some/key", - source: "name", - description: "Description", - ips: ["4.3.2.1"], - }, - ] - : [], blockedUserAgentsV2: [ { key: "ai", @@ -55,6 +45,16 @@ wrap(fetch, "fetch", function mock() { pattern: "Bytespider", }, ], + allowedIPAddresses: shouldOnlyAllowSomeIPAddresses + ? [ + { + key: "some/key", + source: "name", + description: "Description", + ips: ["4.3.2.1"], + }, + ] + : [], } satisfies Response), }; }; From bef8fe2d7f5cc7bca3e4159f28ff76ee82c2df79 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Mon, 17 Feb 2025 17:58:58 +0100 Subject: [PATCH 11/48] Fix test file --- library/sources/Hono.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/sources/Hono.test.ts b/library/sources/Hono.test.ts index 6f34c9cdb..78f73a30b 100644 --- a/library/sources/Hono.test.ts +++ b/library/sources/Hono.test.ts @@ -32,11 +32,11 @@ wrap(fetch, "fetch", function mock(original) { ips: ["1.3.2.0/24", "e98c:a7ba:2329:8c69::/64"], }, ], + allowedIPAddresses: [], blockedUserAgentsV2: [ { key: "key", pattern: "hacker|attacker", - allowedIPAddresses: [], }, ], } satisfies Response), From c39a440bf18cfae5796ee802041ceaaef719191f Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Mon, 17 Feb 2025 18:00:58 +0100 Subject: [PATCH 12/48] Fix agent test --- library/agent/Agent.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/library/agent/Agent.test.ts b/library/agent/Agent.test.ts index b660e1674..116e549b8 100644 --- a/library/agent/Agent.test.ts +++ b/library/agent/Agent.test.ts @@ -1156,10 +1156,12 @@ t.test("it only allows some IP addresses", async () => { t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), { blocked: true, reason: "Description", + key: "some/key", }); t.same(agent.getConfig().isIPAddressBlocked("fe80::1234:5678:abcd:ef12"), { blocked: true, reason: "Description", + key: "some/key", }); t.same(agent.getConfig().isAllowedIPAddress("1.2.3.4"), { From acd2939a9a4cd90a9571f7bc58763f980fb48559 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 25 Mar 2025 16:48:18 +0100 Subject: [PATCH 13/48] Revert changes --- end2end/server/src/handlers/lists.js | 2 +- end2end/server/src/handlers/updateLists.js | 7 ++-- end2end/tests/hono-xml-blocklists.test.js | 2 +- library/agent/Agent.test.ts | 2 +- library/agent/ServiceConfig.test.ts | 19 +++-------- library/agent/ServiceConfig.ts | 37 +++++++++------------- library/agent/api/fetchBlockedLists.ts | 23 +++++++++----- library/sources/HTTPServer.test.ts | 2 +- library/sources/Hono.test.ts | 2 +- 9 files changed, 41 insertions(+), 55 deletions(-) diff --git a/end2end/server/src/handlers/lists.js b/end2end/server/src/handlers/lists.js index f432f6e9c..08d02daa1 100644 --- a/end2end/server/src/handlers/lists.js +++ b/end2end/server/src/handlers/lists.js @@ -26,7 +26,7 @@ module.exports = function lists(req, res) { }, ] : [], - blockedUserAgentsV2: Array.isArray(blockedUserAgents) + blockedUserAgents: Array.isArray(blockedUserAgents) ? blockedUserAgents : [], allowedIPAddresses: diff --git a/end2end/server/src/handlers/updateLists.js b/end2end/server/src/handlers/updateLists.js index de0dd9637..46a7613e9 100644 --- a/end2end/server/src/handlers/updateLists.js +++ b/end2end/server/src/handlers/updateLists.js @@ -32,11 +32,8 @@ module.exports = function updateIPLists(req, res) { updateBlockedIPAddresses(req.app, req.body.blockedIPAddresses); - if ( - req.body.blockedUserAgentsV2 && - Array.isArray(req.body.blockedUserAgentsV2) - ) { - updateBlockedUserAgents(req.app, req.body.blockedUserAgentsV2); + if (req.body.blockedUserAgents && Array.isArray(req.body.blockedUserAgents)) { + updateBlockedUserAgents(req.app, req.body.blockedUserAgents); } if ( diff --git a/end2end/tests/hono-xml-blocklists.test.js b/end2end/tests/hono-xml-blocklists.test.js index 677f116ea..044366b10 100644 --- a/end2end/tests/hono-xml-blocklists.test.js +++ b/end2end/tests/hono-xml-blocklists.test.js @@ -45,7 +45,7 @@ t.beforeEach(async () => { }, body: JSON.stringify({ blockedIPAddresses: ["1.3.2.0/24", "e98c:a7ba:2329:8c69::/64"], - blockedUserAgentsV2: [ + blockedUserAgents: [ { key: "some/key", pattern: "hacker|attacker|GPTBot", diff --git a/library/agent/Agent.test.ts b/library/agent/Agent.test.ts index 0546b584b..6645896e8 100644 --- a/library/agent/Agent.test.ts +++ b/library/agent/Agent.test.ts @@ -35,7 +35,7 @@ wrap(fetch, "fetch", function mock() { ips: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], }, ], - blockedUserAgentsV2: [ + blockedUserAgents: [ { key: "ai", pattern: "AI2Bot|SomethingElse", diff --git a/library/agent/ServiceConfig.test.ts b/library/agent/ServiceConfig.test.ts index 6681ca9cf..b14f8e466 100644 --- a/library/agent/ServiceConfig.test.ts +++ b/library/agent/ServiceConfig.test.ts @@ -149,24 +149,13 @@ t.test("ip blocking works", async () => { t.test("it blocks bots", async () => { const config = new ServiceConfig([], 0, [], [], true, [], []); - config.updateBlockedUserAgents([ - { - key: "search", - pattern: "googlebot|bingbot", - }, - ]); + config.updateBlockedUserAgents("googlebot|bingbot"); - t.same(config.isUserAgentBlocked("googlebot"), { - blocked: true, - key: "search", - }); - t.same(config.isUserAgentBlocked("123 bingbot abc"), { - blocked: true, - key: "search", - }); + t.same(config.isUserAgentBlocked("googlebot"), { blocked: true }); + t.same(config.isUserAgentBlocked("123 bingbot abc"), { blocked: true }); t.same(config.isUserAgentBlocked("bing"), { blocked: false }); - config.updateBlockedUserAgents([]); + config.updateBlockedUserAgents(""); t.same(config.isUserAgentBlocked("googlebot"), { blocked: false }); }); diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index e8d085378..45f0ea749 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -15,7 +15,8 @@ export class ServiceConfig { blocklist: IPMatcher; description: string; }[] = []; - private blockedUserAgentRegex: { + private blockedUserAgentRegex: RegExp | undefined; + private monitoredUserAgents: { key: string; pattern: RegExp; }[] = []; @@ -25,6 +26,10 @@ export class ServiceConfig { allowlist: IPMatcher; description: string; }[] = []; + private monitoredIPAddresses: { + key: string; + blocklist: IPMatcher; + }[] = []; constructor( endpoints: Endpoint[], @@ -133,30 +138,18 @@ export class ServiceConfig { this.setBlockedIPAddresses(blockedIPAddresses); } - private setBlockedUserAgents(blockedUserAgents: AgentBlockList[]) { - this.blockedUserAgentRegex = blockedUserAgents - .filter( - (list) => typeof list.pattern === "string" && list.pattern.length > 0 - ) - .map((list) => { - return { - key: list.key, - pattern: new RegExp(list.pattern, "i"), - }; - }); - } + updateBlockedUserAgents(blockedUserAgents: string) { + if (!blockedUserAgents) { + this.blockedUserAgentRegex = undefined; + return; + } - updateBlockedUserAgents(blockedUserAgents: AgentBlockList[]) { - this.setBlockedUserAgents(blockedUserAgents); + this.blockedUserAgentRegex = new RegExp(blockedUserAgents, "i"); } - isUserAgentBlocked( - ua: string - ): { blocked: false } | { blocked: true; key: string } { - for (const blocklist of this.blockedUserAgentRegex) { - if (blocklist.pattern.test(ua)) { - return { blocked: true, key: blocklist.key }; - } + isUserAgentBlocked(ua: string): { blocked: boolean } { + if (this.blockedUserAgentRegex) { + return { blocked: this.blockedUserAgentRegex.test(ua) }; } return { blocked: false }; diff --git a/library/agent/api/fetchBlockedLists.ts b/library/agent/api/fetchBlockedLists.ts index 76642e419..cb975072e 100644 --- a/library/agent/api/fetchBlockedLists.ts +++ b/library/agent/api/fetchBlockedLists.ts @@ -17,14 +17,12 @@ export type AgentBlockList = { export type Response = { blockedIPAddresses: IPList[]; allowedIPAddresses: IPList[]; - blockedUserAgentsV2: AgentBlockList[]; + monitoredIPAddresses: IPList[]; + blockedUserAgents: string; + monitoredUserAgents: AgentBlockList[]; }; -export async function fetchBlockedLists(token: Token): Promise<{ - blockedIPAddresses: IPList[]; - allowedIPAddresses: IPList[]; - blockedUserAgents: AgentBlockList[]; -}> { +export async function fetchBlockedLists(token: Token): Promise { const baseUrl = getAPIURL(); const { body, statusCode } = await fetch({ url: new URL(`${baseUrl.toString()}api/runtime/firewall/lists`), @@ -57,9 +55,18 @@ export async function fetchBlockedLists(token: Token): Promise<{ result && Array.isArray(result.allowedIPAddresses) ? result.allowedIPAddresses : [], + monitoredIPAddresses: + result && Array.isArray(result.monitoredIPAddresses) + ? result.monitoredIPAddresses + : [], + // Blocked user agents are stored as a string pattern for usage in a regex (e.g. "Googlebot|Bingbot") blockedUserAgents: - result && Array.isArray(result.blockedUserAgentsV2) - ? result.blockedUserAgentsV2 + result && typeof result.blockedUserAgents === "string" + ? result.blockedUserAgents + : "", + monitoredUserAgents: + result && Array.isArray(result.monitoredUserAgents) + ? result.monitoredUserAgents : [], }; } diff --git a/library/sources/HTTPServer.test.ts b/library/sources/HTTPServer.test.ts index 6dfbb7566..34bfd3916 100644 --- a/library/sources/HTTPServer.test.ts +++ b/library/sources/HTTPServer.test.ts @@ -62,7 +62,7 @@ wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() { description: "geo restrictions", }, ], - blockedUserAgentsV2: [], + blockedUserAgents: [], }; }; }); diff --git a/library/sources/Hono.test.ts b/library/sources/Hono.test.ts index a4db921ba..b6c79f044 100644 --- a/library/sources/Hono.test.ts +++ b/library/sources/Hono.test.ts @@ -33,7 +33,7 @@ wrap(fetch, "fetch", function mock(original) { }, ], allowedIPAddresses: [], - blockedUserAgentsV2: [ + blockedUserAgents: [ { key: "key", pattern: "hacker|attacker", From 41169e379873a0e57dfb4cef17f3fe112b2f0cf2 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Wed, 26 Mar 2025 11:10:02 +0100 Subject: [PATCH 14/48] Undo --- end2end/server/src/handlers/lists.js | 4 +--- end2end/server/src/handlers/updateLists.js | 5 ++++- end2end/tests/hono-xml-blocklists.test.js | 7 +------ library/agent/Agent.test.ts | 13 +++---------- .../sources/http-server/checkIfRequestIsBlocked.ts | 9 --------- 5 files changed, 9 insertions(+), 29 deletions(-) diff --git a/end2end/server/src/handlers/lists.js b/end2end/server/src/handlers/lists.js index 08d02daa1..617ac2ff0 100644 --- a/end2end/server/src/handlers/lists.js +++ b/end2end/server/src/handlers/lists.js @@ -26,9 +26,7 @@ module.exports = function lists(req, res) { }, ] : [], - blockedUserAgents: Array.isArray(blockedUserAgents) - ? blockedUserAgents - : [], + blockedUserAgents: blockedUserAgents, allowedIPAddresses: allowedIps.length > 0 ? [ diff --git a/end2end/server/src/handlers/updateLists.js b/end2end/server/src/handlers/updateLists.js index 46a7613e9..4b0a02164 100644 --- a/end2end/server/src/handlers/updateLists.js +++ b/end2end/server/src/handlers/updateLists.js @@ -32,7 +32,10 @@ module.exports = function updateIPLists(req, res) { updateBlockedIPAddresses(req.app, req.body.blockedIPAddresses); - if (req.body.blockedUserAgents && Array.isArray(req.body.blockedUserAgents)) { + if ( + req.body.blockedUserAgents && + typeof req.body.blockedUserAgents === "string" + ) { updateBlockedUserAgents(req.app, req.body.blockedUserAgents); } diff --git a/end2end/tests/hono-xml-blocklists.test.js b/end2end/tests/hono-xml-blocklists.test.js index 044366b10..fc0761390 100644 --- a/end2end/tests/hono-xml-blocklists.test.js +++ b/end2end/tests/hono-xml-blocklists.test.js @@ -45,12 +45,7 @@ t.beforeEach(async () => { }, body: JSON.stringify({ blockedIPAddresses: ["1.3.2.0/24", "e98c:a7ba:2329:8c69::/64"], - blockedUserAgents: [ - { - key: "some/key", - pattern: "hacker|attacker|GPTBot", - }, - ], + blockedUserAgents: "hacker|attacker|GPTBot", }), }); t.same(lists.status, 200); diff --git a/library/agent/Agent.test.ts b/library/agent/Agent.test.ts index 6645896e8..a7a3fc09f 100644 --- a/library/agent/Agent.test.ts +++ b/library/agent/Agent.test.ts @@ -35,16 +35,7 @@ wrap(fetch, "fetch", function mock() { ips: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], }, ], - blockedUserAgents: [ - { - key: "ai", - pattern: "AI2Bot|SomethingElse", - }, - { - key: "spider", - pattern: "Bytespider", - }, - ], + blockedUserAgents: "AI2Bot|Bytespider", allowedIPAddresses: shouldOnlyAllowSomeIPAddresses ? [ { @@ -55,6 +46,8 @@ wrap(fetch, "fetch", function mock() { }, ] : [], + monitoredUserAgents: [], + monitoredIPAddresses: [], } satisfies Response), }; }; diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index d89b21b37..527808c4c 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -80,10 +80,6 @@ export function checkIfRequestIsBlocked( res.end(message); - agent - .getInspectionStatistics() - .onBlockedRequest({ match: "ipBlocklist", key: result.key }); - return true; } @@ -100,11 +96,6 @@ export function checkIfRequestIsBlocked( "You are not allowed to access this resource because you have been identified as a bot." ); - agent.getInspectionStatistics().onBlockedRequest({ - match: "userAgentList", - key: isUserAgentBlocked.key, - }); - return true; } From 505fb4b14c5e172be1717998d6224d329dd0f6df Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 27 Mar 2025 12:25:04 +0100 Subject: [PATCH 15/48] Update inspection stats --- library/agent/InspectionStatistics.test.ts | 386 ++++++++++++++---- library/agent/InspectionStatistics.ts | 150 +++++-- library/agent/ServiceConfig.ts | 16 +- library/agent/api/Event.ts | 22 +- .../ReportingAPIRateLimitedClientSide.test.ts | 22 +- library/sources/HTTPServer.test.ts | 4 +- library/sources/Hono.test.ts | 9 +- .../http-server/checkIfRequestIsBlocked.ts | 4 + .../http-server/createRequestListener.ts | 25 ++ 9 files changed, 501 insertions(+), 137 deletions(-) diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index d2d531f9d..3305a1eb4 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -39,10 +39,24 @@ t.test("it resets stats", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -59,10 +73,24 @@ t.test("it resets stats", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -90,10 +118,24 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -127,10 +169,24 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -164,10 +220,24 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -195,10 +265,24 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -232,10 +316,24 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -269,10 +367,24 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -325,10 +437,24 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -377,10 +503,24 @@ t.test("it keeps track of requests", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -397,10 +537,24 @@ t.test("it keeps track of requests", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -418,10 +572,24 @@ t.test("it keeps track of requests", async () => { total: 1, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -439,10 +607,24 @@ t.test("it keeps track of requests", async () => { total: 2, blocked: 1, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -461,10 +643,24 @@ t.test("it keeps track of requests", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -490,10 +686,24 @@ t.test("it force compresses stats", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -537,10 +747,24 @@ t.test("it keeps track of aborted requests", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); @@ -556,11 +780,8 @@ t.test("it keeps track of blocked requests", async () => { maxCompressedStatsInMemory: 5, }); - stats.onBlockedRequest({ - match: "ipBlocklist", - key: "known_threat_actors/public_scanners", - }); - stats.onBlockedRequest({ match: "userAgentList", key: "ai_data_scrapers" }); + stats.onBlockedIPAddress("known_threat_actors/public_scanners"); + stats.onBlockedUserAgent(); t.same(stats.getStats(), { sinks: {}, @@ -572,15 +793,26 @@ t.test("it keeps track of blocked requests", async () => { total: 0, blocked: 0, }, - blocked: { - total: 2, - ipBlocklist: { - // eslint-disable-next-line camelcase - "known_threat_actors/public_scanners": 1, + userAgents: { + blocked: { + total: 1, + }, + monitor: { + total: 0, + breakdown: {}, }, - userAgentList: { - // eslint-disable-next-line camelcase - ai_data_scrapers: 1, + }, + ipAddresses: { + blocked: { + total: 1, + breakdown: { + // eslint-disable-next-line camelcase + "known_threat_actors/public_scanners": 1, + }, + }, + monitor: { + total: 0, + breakdown: {}, }, }, }, diff --git a/library/agent/InspectionStatistics.ts b/library/agent/InspectionStatistics.ts index 7f38b7cb2..ef6036215 100644 --- a/library/agent/InspectionStatistics.ts +++ b/library/agent/InspectionStatistics.ts @@ -21,16 +21,8 @@ type SinkStats = { }; type SinkStatsWithoutTimings = Omit; - -type RequestBlocked = - | { - match: "userAgentList"; - key: string; - } - | { - match: "ipBlocklist"; - key: string; - }; +type UserAgentBotKey = string; +type IPListKey = string; export class InspectionStatistics { private startedAt = Date.now(); @@ -44,19 +36,49 @@ export class InspectionStatistics { total: number; blocked: number; }; - blocked: { - total: number; - userAgentList: Record; - ipBlocklist: Record; + userAgents: { + blocked: { + // We cannot build a breakdown for blocked user agents + // We use one regex for matching user agents + total: number; + }; + monitor: { + total: number; + breakdown: Record; + }; + }; + ipAddresses: { + blocked: { + total: number; + breakdown: Record; + }; + monitor: { + total: number; + breakdown: Record; + }; }; } = { total: 0, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }; @@ -91,15 +113,30 @@ export class InspectionStatistics { total: 0, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }; this.startedAt = Date.now(); } + // eslint-disable-next-line max-lines-per-function getStats(): { sinks: Record; startedAt: number; @@ -110,10 +147,24 @@ export class InspectionStatistics { total: number; blocked: number; }; - blocked: { - total: number; - userAgentList: Record; - ipBlocklist: Record; + userAgents: { + blocked: { + total: number; + }; + monitor: { + total: number; + breakdown: Record; + }; + }; + ipAddresses: { + blocked: { + total: number; + breakdown: Record; + }; + monitor: { + total: number; + breakdown: Record; + }; }; }; } { @@ -210,25 +261,38 @@ export class InspectionStatistics { } } - onBlockedRequest({ match, key }: RequestBlocked) { - this.requests.blocked.total += 1; + onBlockedIPAddress(key: string) { + this.requests.ipAddresses.blocked.total += 1; - switch (match) { - case "userAgentList": { - if (!this.requests.blocked.userAgentList[key]) { - this.requests.blocked.userAgentList[key] = 0; - } - this.requests.blocked.userAgentList[key] += 1; - break; - } - case "ipBlocklist": { - if (!this.requests.blocked.ipBlocklist[key]) { - this.requests.blocked.ipBlocklist[key] = 0; - } - this.requests.blocked.ipBlocklist[key] += 1; - break; - } + if (!this.requests.ipAddresses.blocked.breakdown[key]) { + this.requests.ipAddresses.blocked.breakdown[key] = 0; + } + + this.requests.ipAddresses.blocked.breakdown[key] += 1; + } + + onBlockedUserAgent() { + this.requests.userAgents.blocked.total += 1; + } + + detectedMonitoredIPAddress(key: IPListKey) { + this.requests.ipAddresses.monitor.total += 1; + + if (!this.requests.ipAddresses.monitor.breakdown[key]) { + this.requests.ipAddresses.monitor.breakdown[key] = 0; } + + this.requests.ipAddresses.monitor.breakdown[key] += 1; + } + + detectedMonitoredUserAgent(key: UserAgentBotKey) { + this.requests.userAgents.monitor.total += 1; + + if (!this.requests.userAgents.monitor.breakdown[key]) { + this.requests.userAgents.monitor.breakdown[key] = 0; + } + + this.requests.userAgents.monitor.breakdown[key] += 1; } onAbortedRequest() { diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index 45f0ea749..6d2a40a65 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -2,7 +2,7 @@ import { IPMatcher } from "../helpers/ip-matcher/IPMatcher"; import { LimitedContext, matchEndpoints } from "../helpers/matchEndpoints"; import { isPrivateIP } from "../vulnerabilities/ssrf/isPrivateIP"; import { Endpoint } from "./Config"; -import { IPList, AgentBlockList } from "./api/fetchBlockedLists"; +import { IPList } from "./api/fetchBlockedLists"; export class ServiceConfig { private blockedUserIds: Map = new Map(); @@ -28,7 +28,7 @@ export class ServiceConfig { }[] = []; private monitoredIPAddresses: { key: string; - blocklist: IPMatcher; + matcher: IPMatcher; }[] = []; constructor( @@ -191,6 +191,18 @@ export class ServiceConfig { return { allowed: !!allowlist }; } + isMonitoredIPAddress(ip: string): { key: string } | undefined { + const list = this.monitoredIPAddresses.find((list) => list.matcher.has(ip)); + + return list ? { key: list.key } : undefined; + } + + isMonitoredUserAgent(ua: string): { key: string } | undefined { + const list = this.monitoredUserAgents.find((list) => list.pattern.test(ua)); + + return list ? { key: list.key } : undefined; + } + updateConfig( endpoints: Endpoint[], lastUpdatedAt: number, diff --git a/library/agent/api/Event.ts b/library/agent/api/Event.ts index bcf3e0c30..1ab73fca6 100644 --- a/library/agent/api/Event.ts +++ b/library/agent/api/Event.ts @@ -92,10 +92,24 @@ type Heartbeat = { total: number; blocked: number; }; - blocked: { - total: number; - userAgentList: Record; - ipBlocklist: Record; + userAgents: { + blocked: { + total: number; + }; + monitor: { + total: number; + breakdown: Record; + }; + }; + ipAddresses: { + blocked: { + total: number; + breakdown: Record; + }; + monitor: { + total: number; + breakdown: Record; + }; }; }; }; diff --git a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts index e45d61cdb..033a14476 100644 --- a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts +++ b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts @@ -154,10 +154,24 @@ function generateHeartbeatEvent(): Event { blocked: 0, total: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }, diff --git a/library/sources/HTTPServer.test.ts b/library/sources/HTTPServer.test.ts index 34bfd3916..ac5cc356b 100644 --- a/library/sources/HTTPServer.test.ts +++ b/library/sources/HTTPServer.test.ts @@ -62,7 +62,9 @@ wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() { description: "geo restrictions", }, ], - blockedUserAgents: [], + blockedUserAgents: "", + monitoredUserAgents: [], + monitoredIPAddresses: [], }; }; }); diff --git a/library/sources/Hono.test.ts b/library/sources/Hono.test.ts index b6c79f044..2263751c7 100644 --- a/library/sources/Hono.test.ts +++ b/library/sources/Hono.test.ts @@ -33,12 +33,9 @@ wrap(fetch, "fetch", function mock(original) { }, ], allowedIPAddresses: [], - blockedUserAgents: [ - { - key: "key", - pattern: "hacker|attacker", - }, - ], + monitoredIPAddresses: [], + blockedUserAgents: "hacker|attacker", + monitoredUserAgents: [], } satisfies Response), }; } diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index 527808c4c..0a8dab076 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -80,6 +80,8 @@ export function checkIfRequestIsBlocked( res.end(message); + agent.getInspectionStatistics().onBlockedIPAddress(result.key); + return true; } @@ -96,6 +98,8 @@ export function checkIfRequestIsBlocked( "You are not allowed to access this resource because you have been identified as a bot." ); + agent.getInspectionStatistics().onBlockedUserAgent(); + return true; } diff --git a/library/sources/http-server/createRequestListener.ts b/library/sources/http-server/createRequestListener.ts index 4c95569ef..136515df8 100644 --- a/library/sources/http-server/createRequestListener.ts +++ b/library/sources/http-server/createRequestListener.ts @@ -74,6 +74,7 @@ function callListenerWithContext( // Use symbol to avoid conflicts with other properties const countedRequest = Symbol("__zen_request_counted__"); +// eslint-disable-next-line max-lines-per-function function createOnFinishRequestHandler( req: IncomingMessage & { [countedRequest]?: boolean }, res: ServerResponse, @@ -105,10 +106,34 @@ function createOnFinishRequestHandler( } agent.getInspectionStatistics().onRequest(); + if (context && context.attackDetected) { agent.getInspectionStatistics().onDetectedAttack({ blocked: agent.shouldBlock(), }); } + + if (context) { + if ( + context.headers && + typeof context.headers["user-agent"] === "string" + ) { + const match = agent + .getConfig() + .isMonitoredUserAgent(context.headers["user-agent"]); + if (match) { + agent.getInspectionStatistics().detectedMonitoredUserAgent(match.key); + } + } + + if (context.remoteAddress) { + const match = agent + .getConfig() + .isMonitoredIPAddress(context.remoteAddress); + if (match) { + agent.getInspectionStatistics().detectedMonitoredIPAddress(match.key); + } + } + } }; } From 9863cda687b7c02d175ab25eced3b57e907fd2b8 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 27 Mar 2025 12:29:17 +0100 Subject: [PATCH 16/48] add unit test for monitored IPs and user agents --- library/agent/InspectionStatistics.test.ts | 94 ++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index 3305a1eb4..6f7e0f0bc 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -820,3 +820,97 @@ t.test("it keeps track of blocked requests", async () => { clock.uninstall(); }); + +t.test("it keeps track of monitored requests", async () => { + const clock = FakeTimers.install(); + + const stats = new InspectionStatistics({ + maxPerfSamplesInMemory: 50, + maxCompressedStatsInMemory: 5, + }); + + stats.detectedMonitoredIPAddress("known_threat_actors/public_scanners"); + stats.detectedMonitoredUserAgent("ai_data_scrapers"); + + t.same(stats.getStats(), { + sinks: {}, + startedAt: 0, + requests: { + total: 0, + aborted: 0, + attacksDetected: { + total: 0, + blocked: 0, + }, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 1, + breakdown: { + // eslint-disable-next-line camelcase + "ai_data_scrapers": 1, + }, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 1, + breakdown: { + // eslint-disable-next-line camelcase + "known_threat_actors/public_scanners": 1, + }, + }, + }, + }, + }); + + // Test multiple occurrences + stats.detectedMonitoredIPAddress("known_threat_actors/public_scanners"); + stats.detectedMonitoredUserAgent("ai_data_scrapers"); + + t.same(stats.getStats(), { + sinks: {}, + startedAt: 0, + requests: { + total: 0, + aborted: 0, + attacksDetected: { + total: 0, + blocked: 0, + }, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 2, + breakdown: { + // eslint-disable-next-line camelcase + "ai_data_scrapers": 2, + }, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 2, + breakdown: { + // eslint-disable-next-line camelcase + "known_threat_actors/public_scanners": 2, + }, + }, + }, + }, + }); + + clock.uninstall(); +}); From df58be74ba951ed32e3eb29e4e051cee0be53721 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 27 Mar 2025 12:32:07 +0100 Subject: [PATCH 17/48] Improve tests --- library/agent/InspectionStatistics.test.ts | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index 6f7e0f0bc..2ef7a3fcb 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -772,7 +772,7 @@ t.test("it keeps track of aborted requests", async () => { clock.uninstall(); }); -t.test("it keeps track of blocked requests", async () => { +t.test("it keeps track of blocked IPs and user agents", async () => { const clock = FakeTimers.install(); const stats = new InspectionStatistics({ @@ -806,7 +806,6 @@ t.test("it keeps track of blocked requests", async () => { blocked: { total: 1, breakdown: { - // eslint-disable-next-line camelcase "known_threat_actors/public_scanners": 1, }, }, @@ -821,7 +820,7 @@ t.test("it keeps track of blocked requests", async () => { clock.uninstall(); }); -t.test("it keeps track of monitored requests", async () => { +t.test("it keeps track of monitored IPs and user agents", async () => { const clock = FakeTimers.install(); const stats = new InspectionStatistics({ @@ -850,7 +849,7 @@ t.test("it keeps track of monitored requests", async () => { total: 1, breakdown: { // eslint-disable-next-line camelcase - "ai_data_scrapers": 1, + ai_data_scrapers: 1, }, }, }, @@ -862,7 +861,6 @@ t.test("it keeps track of monitored requests", async () => { monitor: { total: 1, breakdown: { - // eslint-disable-next-line camelcase "known_threat_actors/public_scanners": 1, }, }, @@ -892,7 +890,7 @@ t.test("it keeps track of monitored requests", async () => { total: 2, breakdown: { // eslint-disable-next-line camelcase - "ai_data_scrapers": 2, + ai_data_scrapers: 2, }, }, }, @@ -904,7 +902,6 @@ t.test("it keeps track of monitored requests", async () => { monitor: { total: 2, breakdown: { - // eslint-disable-next-line camelcase "known_threat_actors/public_scanners": 2, }, }, From 859a7bbcc666e31109a50c4b6f996b078fd9c8d8 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 27 Mar 2025 12:44:44 +0100 Subject: [PATCH 18/48] Set monitored user agents and IPs in config --- end2end/server/src/handlers/lists.js | 2 ++ library/agent/Agent.ts | 11 ++++++-- library/agent/ServiceConfig.ts | 16 ++++++++++- library/agent/api/fetchBlockedLists.ts | 37 +++++++++----------------- 4 files changed, 39 insertions(+), 27 deletions(-) diff --git a/end2end/server/src/handlers/lists.js b/end2end/server/src/handlers/lists.js index 617ac2ff0..343786013 100644 --- a/end2end/server/src/handlers/lists.js +++ b/end2end/server/src/handlers/lists.js @@ -37,5 +37,7 @@ module.exports = function lists(req, res) { }, ] : [], + monitoredIPAddresses: [], + monitoredUserAgents: [], }); }; diff --git a/library/agent/Agent.ts b/library/agent/Agent.ts index d50f1a4d5..d6a07f68a 100644 --- a/library/agent/Agent.ts +++ b/library/agent/Agent.ts @@ -374,11 +374,18 @@ export class Agent { } try { - const { blockedIPAddresses, blockedUserAgents, allowedIPAddresses } = - await fetchBlockedLists(this.token); + const { + blockedIPAddresses, + blockedUserAgents, + allowedIPAddresses, + monitoredUserAgents, + monitoredIPAddresses, + } = await fetchBlockedLists(this.token); this.serviceConfig.updateBlockedIPAddresses(blockedIPAddresses); this.serviceConfig.updateBlockedUserAgents(blockedUserAgents); this.serviceConfig.updateAllowedIPAddresses(allowedIPAddresses); + this.serviceConfig.updateMonitoredUserAgents(monitoredUserAgents); + this.serviceConfig.updateMonitoredIPAddresses(monitoredIPAddresses); } catch (error: any) { console.error(`Aikido: Failed to update blocked lists: ${error.message}`); } diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index 6d2a40a65..0b487e06e 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -2,7 +2,7 @@ import { IPMatcher } from "../helpers/ip-matcher/IPMatcher"; import { LimitedContext, matchEndpoints } from "../helpers/matchEndpoints"; import { isPrivateIP } from "../vulnerabilities/ssrf/isPrivateIP"; import { Endpoint } from "./Config"; -import { IPList } from "./api/fetchBlockedLists"; +import { AgentBlockList, IPList } from "./api/fetchBlockedLists"; export class ServiceConfig { private blockedUserIds: Map = new Map(); @@ -147,6 +147,20 @@ export class ServiceConfig { this.blockedUserAgentRegex = new RegExp(blockedUserAgents, "i"); } + updateMonitoredUserAgents(monitoredUserAgents: AgentBlockList[]) { + this.monitoredUserAgents = monitoredUserAgents.map((list) => ({ + key: list.key, + pattern: new RegExp(list.pattern, "i"), + })); + } + + updateMonitoredIPAddresses(monitoredIPAddresses: IPList[]) { + this.monitoredIPAddresses = monitoredIPAddresses.map((list) => ({ + key: list.key, + matcher: new IPMatcher(list.ips), + })); + } + isUserAgentBlocked(ua: string): { blocked: boolean } { if (this.blockedUserAgentRegex) { return { blocked: this.blockedUserAgentRegex.test(ua) }; diff --git a/library/agent/api/fetchBlockedLists.ts b/library/agent/api/fetchBlockedLists.ts index cb975072e..a8be4d10b 100644 --- a/library/agent/api/fetchBlockedLists.ts +++ b/library/agent/api/fetchBlockedLists.ts @@ -17,8 +17,8 @@ export type AgentBlockList = { export type Response = { blockedIPAddresses: IPList[]; allowedIPAddresses: IPList[]; - monitoredIPAddresses: IPList[]; blockedUserAgents: string; + monitoredIPAddresses: IPList[]; monitoredUserAgents: AgentBlockList[]; }; @@ -46,27 +46,16 @@ export async function fetchBlockedLists(token: Token): Promise { const result: Response = JSON.parse(body); - return { - blockedIPAddresses: - result && Array.isArray(result.blockedIPAddresses) - ? result.blockedIPAddresses - : [], - allowedIPAddresses: - result && Array.isArray(result.allowedIPAddresses) - ? result.allowedIPAddresses - : [], - monitoredIPAddresses: - result && Array.isArray(result.monitoredIPAddresses) - ? result.monitoredIPAddresses - : [], - // Blocked user agents are stored as a string pattern for usage in a regex (e.g. "Googlebot|Bingbot") - blockedUserAgents: - result && typeof result.blockedUserAgents === "string" - ? result.blockedUserAgents - : "", - monitoredUserAgents: - result && Array.isArray(result.monitoredUserAgents) - ? result.monitoredUserAgents - : [], - }; + const validResponse = + Array.isArray(result.blockedIPAddresses) && + Array.isArray(result.allowedIPAddresses) && + Array.isArray(result.monitoredIPAddresses) && + Array.isArray(result.monitoredUserAgents) && + typeof result.blockedUserAgents === "string"; + + if (!validResponse) { + throw new Error("Invalid response from fetchBlockedLists"); + } + + return result; } From ee32f54b9396f565aa06b84dcfda049bd29cf07b Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 27 Mar 2025 12:50:25 +0100 Subject: [PATCH 19/48] fix: update isUserAgentBlocked test to match implementation --- library/agent/Agent.test.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/library/agent/Agent.test.ts b/library/agent/Agent.test.ts index a7a3fc09f..ada1e855b 100644 --- a/library/agent/Agent.test.ts +++ b/library/agent/Agent.test.ts @@ -1086,7 +1086,6 @@ t.test("it fetches blocked lists", async () => { "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)" ), { - key: "ai", blocked: true, } ); @@ -1094,7 +1093,6 @@ t.test("it fetches blocked lists", async () => { t.same( agent.getConfig().isUserAgentBlocked("Mozilla/5.0 (compatible) Bytespider"), { - key: "spider", blocked: true, } ); From 106762316a5321e698a02d3b66426d37c942017d Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 27 Mar 2025 12:51:52 +0100 Subject: [PATCH 20/48] Update FunctionsFramework tests to match new statistics structure --- library/sources/FunctionsFramework.test.ts | 66 ++++++++++++++++++---- 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/library/sources/FunctionsFramework.test.ts b/library/sources/FunctionsFramework.test.ts index 4a3c13757..e446cd663 100644 --- a/library/sources/FunctionsFramework.test.ts +++ b/library/sources/FunctionsFramework.test.ts @@ -91,10 +91,24 @@ t.test("it counts requests", async (t) => { total: 1, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }); }); @@ -112,10 +126,24 @@ t.test("it counts attacks", async (t) => { total: 1, aborted: 0, attacksDetected: { total: 1, blocked: 1 }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }); }); @@ -133,10 +161,24 @@ t.test("it counts request if error", async (t) => { total: 1, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }); }); From 55787c29c14384360d4053bf38e5bd6b4ec404ff Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 27 Mar 2025 13:08:44 +0100 Subject: [PATCH 21/48] Add tests for user agent and IP monitoring --- library/sources/HTTPServer.stats.test.ts | 148 +++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 library/sources/HTTPServer.stats.test.ts diff --git a/library/sources/HTTPServer.stats.test.ts b/library/sources/HTTPServer.stats.test.ts new file mode 100644 index 000000000..4b920eb5d --- /dev/null +++ b/library/sources/HTTPServer.stats.test.ts @@ -0,0 +1,148 @@ +import { Token } from "../agent/api/Token"; +import * as t from "tap"; +import { ReportingAPIForTesting } from "../agent/api/ReportingAPIForTesting"; +import { getContext } from "../agent/Context"; +import { fetch } from "../helpers/fetch"; +import { wrap } from "../helpers/wrap"; +import { HTTPServer } from "./HTTPServer"; +import { createTestAgent } from "../helpers/createTestAgent"; +import type { Response } from "../agent/api/fetchBlockedLists"; +import * as fetchBlockedLists from "../agent/api/fetchBlockedLists"; + +// Before require("http") +const api = new ReportingAPIForTesting({ + success: true, + configUpdatedAt: 0, + allowedIPAddresses: [], + blockedUserIds: [], + endpoints: [], + heartbeatIntervalInMS: 10 * 60 * 1000, +}); + +const agent = createTestAgent({ + token: new Token("123"), + api, +}); + +agent.start([new HTTPServer()]); + +wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() { + return async function fetchBlockedLists(): Promise { + return { + allowedIPAddresses: [], + blockedIPAddresses: [], + blockedUserAgents: "", + monitoredUserAgents: [ + { + key: "ai_data_scrapers", + pattern: "GPTBot|Google-Extended", + }, + ], + monitoredIPAddresses: [ + { + key: "known_threat_actors/public_scanners", + source: "known_threat_actors", + ips: ["1.2.3.4/32"], + description: "Known public scanners", + }, + ], + }; + }; +}); + +t.setTimeout(30 * 1000); + +const http = require("http") as typeof import("http"); + +t.test("it tracks monitored user agents", async () => { + const server = http.createServer((req, res) => { + res.setHeader("Content-Type", "text/plain"); + res.end("OK"); + }); + + await new Promise((resolve) => { + server.listen(3327, () => { + Promise.all([ + fetch({ + url: new URL("http://localhost:3327/test"), + method: "GET", + headers: { + "user-agent": "GPTBot", + }, + timeoutInMS: 500, + }), + fetch({ + url: new URL("http://localhost:3327/test"), + method: "GET", + headers: { + "user-agent": "Google-Extended", + }, + timeoutInMS: 500, + }), + fetch({ + url: new URL("http://localhost:3327/test"), + method: "GET", + headers: { + "user-agent": "Regular Browser", + }, + timeoutInMS: 500, + }), + ]).then(([response1, response2, response3]) => { + t.equal(response1.statusCode, 200); + t.equal(response2.statusCode, 200); + t.equal(response3.statusCode, 200); + const stats = agent.getInspectionStatistics().getStats(); + t.same(stats.requests.userAgents.monitor, { + total: 2, + breakdown: { + ai_data_scrapers: 2, + }, + }); + server.close(); + resolve(); + }); + }); + }); +}); + +t.test("it tracks monitored IP addresses", async () => { + const server = http.createServer((req, res) => { + res.setHeader("Content-Type", "text/plain"); + res.end("OK"); + }); + + await new Promise((resolve) => { + server.listen(3328, () => { + Promise.all([ + fetch({ + url: new URL("http://localhost:3328/test"), + method: "GET", + headers: { + "x-forwarded-for": "1.2.3.4", + }, + timeoutInMS: 500, + }), + fetch({ + url: new URL("http://localhost:3328/test"), + method: "GET", + headers: { + "x-forwarded-for": "5.6.7.8", + }, + timeoutInMS: 500, + }), + ]).then(([response1, response2]) => { + t.equal(response1.statusCode, 200); + t.equal(response2.statusCode, 200); + const stats = agent.getInspectionStatistics().getStats(); + t.same(stats.requests.ipAddresses.monitor, { + total: 1, + breakdown: { + "known_threat_actors/public_scanners": 1, + }, + }); + server.close(); + resolve(); + }); + }); + }); +}); \ No newline at end of file From 112d9a48c9d989d513a343b901212e01a2636f87 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 27 Mar 2025 13:12:37 +0100 Subject: [PATCH 22/48] Update Lambda test assertions to match new stats structure --- library/sources/Lambda.test.ts | 44 +++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/library/sources/Lambda.test.ts b/library/sources/Lambda.test.ts index 2b3dfa5c9..59cd8e07d 100644 --- a/library/sources/Lambda.test.ts +++ b/library/sources/Lambda.test.ts @@ -303,10 +303,24 @@ t.test("it sends heartbeat after first and every 10 minutes", async () => { total: 0, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }, @@ -465,10 +479,24 @@ t.test("it counts attacks", async () => { total: 1, blocked: 0, }, - blocked: { - total: 0, - userAgentList: {}, - ipBlocklist: {}, + userAgents: { + blocked: { + total: 0, + }, + monitor: { + total: 0, + breakdown: {}, + }, + }, + ipAddresses: { + blocked: { + total: 0, + breakdown: {}, + }, + monitor: { + total: 0, + breakdown: {}, + }, }, }, }); From 82683f3dc948565bb675594698afa29ea1ec8bad Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 27 Mar 2025 13:13:00 +0100 Subject: [PATCH 23/48] Fix formatting --- library/sources/HTTPServer.stats.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/sources/HTTPServer.stats.test.ts b/library/sources/HTTPServer.stats.test.ts index 4b920eb5d..f75f40ba0 100644 --- a/library/sources/HTTPServer.stats.test.ts +++ b/library/sources/HTTPServer.stats.test.ts @@ -145,4 +145,4 @@ t.test("it tracks monitored IP addresses", async () => { }); }); }); -}); \ No newline at end of file +}); From 5992d38361e8143260ca228ee8dcde2a94b317f0 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 27 Mar 2025 13:15:49 +0100 Subject: [PATCH 24/48] Fix linting --- library/sources/HTTPServer.stats.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/sources/HTTPServer.stats.test.ts b/library/sources/HTTPServer.stats.test.ts index f75f40ba0..841f4fa18 100644 --- a/library/sources/HTTPServer.stats.test.ts +++ b/library/sources/HTTPServer.stats.test.ts @@ -1,7 +1,6 @@ import { Token } from "../agent/api/Token"; import * as t from "tap"; import { ReportingAPIForTesting } from "../agent/api/ReportingAPIForTesting"; -import { getContext } from "../agent/Context"; import { fetch } from "../helpers/fetch"; import { wrap } from "../helpers/wrap"; import { HTTPServer } from "./HTTPServer"; @@ -95,6 +94,7 @@ t.test("it tracks monitored user agents", async () => { t.same(stats.requests.userAgents.monitor, { total: 2, breakdown: { + // eslint-disable-next-line camelcase ai_data_scrapers: 2, }, }); From 7c81efaf0e09e2b0f83b3fa95cf7d8b99e564566 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 27 Mar 2025 13:29:17 +0100 Subject: [PATCH 25/48] Ensure fetchBlockedLists returns all properties --- library/sources/Hono.allowedIPAddresses.test.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/library/sources/Hono.allowedIPAddresses.test.ts b/library/sources/Hono.allowedIPAddresses.test.ts index f51ce81b4..25e2ef83c 100644 --- a/library/sources/Hono.allowedIPAddresses.test.ts +++ b/library/sources/Hono.allowedIPAddresses.test.ts @@ -21,6 +21,7 @@ wrap(fetch, "fetch", function mock(original) { body: JSON.stringify({ blockedIPAddresses: [ { + key: "geoip/Belgium;BE", source: "geoip", description: "geo restrictions", ips: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], @@ -29,11 +30,14 @@ wrap(fetch, "fetch", function mock(original) { blockedUserAgents: "hacker|attacker", allowedIPAddresses: [ { + key: "geoip/Belgium;BE", source: "geoip", description: "geo restrictions", ips: ["4.3.2.1"], }, ], + monitoredIPAddresses: [], + monitoredUserAgents: [], }), }; } From 920752fe03402b7698059d73d0e89206f8d707f8 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 15 Apr 2025 16:27:53 +0200 Subject: [PATCH 26/48] Fix comment --- library/sources/http-server/createRequestListener.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/library/sources/http-server/createRequestListener.ts b/library/sources/http-server/createRequestListener.ts index fd7e6e76b..85a284829 100644 --- a/library/sources/http-server/createRequestListener.ts +++ b/library/sources/http-server/createRequestListener.ts @@ -103,9 +103,7 @@ function createOnFinishRequestHandler( }) ) { agent.onRouteExecute(context); - // Only count the request - - if the route is discovered + // Only count the request if the route is discovered agent.getInspectionStatistics().onRequest(); } From 25144ab0b3b4014fa15dec7090311049d47f3c13 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 15 Apr 2025 18:29:06 +0200 Subject: [PATCH 27/48] Use monitor flag instead of separate lists - Remove monitoredIpAddresses and monitoredUserAgents\n- Add monitor flag to IP and user agent lists\n- Add x-supports-monitoring header to fetch new format\n- Update tests to reflect new API format --- library/agent/Agent.test.ts | 14 +++- library/agent/Agent.ts | 11 +-- library/agent/InspectionStatistics.test.ts | 23 +++++- library/agent/InspectionStatistics.ts | 14 +++- library/agent/ServiceConfig.test.ts | 79 ++++++++++++++++++- library/agent/ServiceConfig.ts | 67 ++++++++-------- library/agent/api/fetchBlockedLists.ts | 11 ++- library/sources/HTTPServer.stats.test.ts | 22 +++--- library/sources/HTTPServer.test.ts | 13 ++- library/sources/Hono.test.ts | 11 ++- library/sources/Lambda.test.ts | 2 + .../http-server/checkIfRequestIsBlocked.ts | 2 +- 12 files changed, 192 insertions(+), 77 deletions(-) diff --git a/library/agent/Agent.test.ts b/library/agent/Agent.test.ts index ada1e855b..76b1cbbe5 100644 --- a/library/agent/Agent.test.ts +++ b/library/agent/Agent.test.ts @@ -33,9 +33,16 @@ wrap(fetch, "fetch", function mock() { source: "name", description: "Description", ips: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], + monitor: false, + }, + ], + blockedUserAgents: [ + { + key: "ai_bots", + pattern: "AI2Bot|Bytespider", + monitor: false, }, ], - blockedUserAgents: "AI2Bot|Bytespider", allowedIPAddresses: shouldOnlyAllowSomeIPAddresses ? [ { @@ -43,11 +50,10 @@ wrap(fetch, "fetch", function mock() { source: "name", description: "Description", ips: ["4.3.2.1"], + monitor: false, }, ] : [], - monitoredUserAgents: [], - monitoredIPAddresses: [], } satisfies Response), }; }; @@ -1087,6 +1093,7 @@ t.test("it fetches blocked lists", async () => { ), { blocked: true, + key: "ai_bots", } ); @@ -1094,6 +1101,7 @@ t.test("it fetches blocked lists", async () => { agent.getConfig().isUserAgentBlocked("Mozilla/5.0 (compatible) Bytespider"), { blocked: true, + key: "ai_bots", } ); diff --git a/library/agent/Agent.ts b/library/agent/Agent.ts index ddea28080..2846a4d6f 100644 --- a/library/agent/Agent.ts +++ b/library/agent/Agent.ts @@ -378,18 +378,11 @@ export class Agent { } try { - const { - blockedIPAddresses, - blockedUserAgents, - allowedIPAddresses, - monitoredUserAgents, - monitoredIPAddresses, - } = await fetchBlockedLists(this.token); + const { blockedIPAddresses, blockedUserAgents, allowedIPAddresses } = + await fetchBlockedLists(this.token); this.serviceConfig.updateBlockedIPAddresses(blockedIPAddresses); this.serviceConfig.updateBlockedUserAgents(blockedUserAgents); this.serviceConfig.updateAllowedIPAddresses(allowedIPAddresses); - this.serviceConfig.updateMonitoredUserAgents(monitoredUserAgents); - this.serviceConfig.updateMonitoredIPAddresses(monitoredIPAddresses); } catch (error: any) { console.error(`Aikido: Failed to update blocked lists: ${error.message}`); } diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index 2ef7a3fcb..e5ddf4809 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -42,6 +42,7 @@ t.test("it resets stats", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -76,6 +77,7 @@ t.test("it resets stats", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -121,6 +123,7 @@ t.test("it keeps track of amount of calls", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -172,6 +175,7 @@ t.test("it keeps track of amount of calls", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -223,6 +227,7 @@ t.test("it keeps track of amount of calls", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -268,6 +273,7 @@ t.test("it keeps track of amount of calls", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -319,6 +325,7 @@ t.test("it keeps track of amount of calls", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -370,6 +377,7 @@ t.test("it keeps track of amount of calls", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -440,6 +448,7 @@ t.test("it keeps track of amount of calls", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -506,6 +515,7 @@ t.test("it keeps track of requests", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -540,6 +550,7 @@ t.test("it keeps track of requests", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -575,6 +586,7 @@ t.test("it keeps track of requests", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -610,6 +622,7 @@ t.test("it keeps track of requests", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -646,6 +659,7 @@ t.test("it keeps track of requests", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -689,6 +703,7 @@ t.test("it force compresses stats", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -750,6 +765,7 @@ t.test("it keeps track of aborted requests", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -781,7 +797,7 @@ t.test("it keeps track of blocked IPs and user agents", async () => { }); stats.onBlockedIPAddress("known_threat_actors/public_scanners"); - stats.onBlockedUserAgent(); + stats.onBlockedUserAgent("ai_bots"); t.same(stats.getStats(), { sinks: {}, @@ -796,6 +812,9 @@ t.test("it keeps track of blocked IPs and user agents", async () => { userAgents: { blocked: { total: 1, + breakdown: { + ai_bots: 1, + }, }, monitor: { total: 0, @@ -844,6 +863,7 @@ t.test("it keeps track of monitored IPs and user agents", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 1, @@ -885,6 +905,7 @@ t.test("it keeps track of monitored IPs and user agents", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 2, diff --git a/library/agent/InspectionStatistics.ts b/library/agent/InspectionStatistics.ts index ef6036215..77017c34f 100644 --- a/library/agent/InspectionStatistics.ts +++ b/library/agent/InspectionStatistics.ts @@ -38,9 +38,8 @@ export class InspectionStatistics { }; userAgents: { blocked: { - // We cannot build a breakdown for blocked user agents - // We use one regex for matching user agents total: number; + breakdown: Record; }; monitor: { total: number; @@ -64,6 +63,7 @@ export class InspectionStatistics { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -116,6 +116,7 @@ export class InspectionStatistics { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -150,6 +151,7 @@ export class InspectionStatistics { userAgents: { blocked: { total: number; + breakdown: Record; }; monitor: { total: number; @@ -271,8 +273,14 @@ export class InspectionStatistics { this.requests.ipAddresses.blocked.breakdown[key] += 1; } - onBlockedUserAgent() { + onBlockedUserAgent(key: string) { this.requests.userAgents.blocked.total += 1; + + if (!this.requests.userAgents.blocked.breakdown[key]) { + this.requests.userAgents.blocked.breakdown[key] = 0; + } + + this.requests.userAgents.blocked.breakdown[key] += 1; } detectedMonitoredIPAddress(key: IPListKey) { diff --git a/library/agent/ServiceConfig.test.ts b/library/agent/ServiceConfig.test.ts index 96380397a..967177a3e 100644 --- a/library/agent/ServiceConfig.test.ts +++ b/library/agent/ServiceConfig.test.ts @@ -107,6 +107,7 @@ t.test("ip blocking works", async () => { "fd00:3234:5678:9abc::1/64", "5.6.7.8/32", ], + monitor: false, }, ], [] @@ -150,13 +151,25 @@ t.test("ip blocking works", async () => { t.test("it blocks bots", async () => { const config = new ServiceConfig([], 0, [], [], true, [], []); - config.updateBlockedUserAgents("googlebot|bingbot"); + config.updateBlockedUserAgents([ + { + key: "test", + pattern: "googlebot|bingbot", + monitor: false, + }, + ]); - t.same(config.isUserAgentBlocked("googlebot"), { blocked: true }); - t.same(config.isUserAgentBlocked("123 bingbot abc"), { blocked: true }); + t.same(config.isUserAgentBlocked("googlebot"), { + blocked: true, + key: "test", + }); + t.same(config.isUserAgentBlocked("123 bingbot abc"), { + blocked: true, + key: "test", + }); t.same(config.isUserAgentBlocked("bing"), { blocked: false }); - config.updateBlockedUserAgents(""); + config.updateBlockedUserAgents([]); t.same(config.isUserAgentBlocked("googlebot"), { blocked: false }); }); @@ -175,6 +188,7 @@ t.test("restricting access to some ips", async () => { source: "geoip", description: "description", ips: ["1.2.3.4"], + monitor: false, }, ] ); @@ -203,6 +217,7 @@ t.test("only allow some ips: empty list", async () => { source: "geoip", description: "description", ips: [], + monitor: false, }, ] ); @@ -264,3 +279,59 @@ t.test("bypassed ips support cidr", async () => { t.same(config.isBypassedIP("123.123.123.1"), false); t.same(config.isBypassedIP("999.999.999.999"), false); }); + +t.test("it updates blocked user agents", async (t) => { + const config = new ServiceConfig([], 0, [], [], false, [], []); + config.updateBlockedUserAgents([ + { + key: "bots", + pattern: "googlebot|bingbot", + monitor: false, + }, + ]); + t.same(config.isUserAgentBlocked("googlebot"), { + blocked: true, + key: "bots", + }); + t.same(config.isUserAgentBlocked("firefox"), { blocked: false }); +}); + +t.test("it updates blocked user agents with empty list", async (t) => { + const config = new ServiceConfig([], 0, [], [], false, [], []); + config.updateBlockedUserAgents([]); + t.same(config.isUserAgentBlocked("googlebot"), { blocked: false }); +}); + +t.test("it updates blocked user agents with invalid pattern", async (t) => { + const config = new ServiceConfig([], 0, [], [], false, [], []); + config.updateBlockedUserAgents([ + { + key: "bots", + pattern: "googlebot|bingbot", + monitor: false, + }, + ]); + t.same(config.isUserAgentBlocked("googlebot"), { + blocked: true, + key: "bots", + }); +}); + +t.test("it updates blocked user agents with empty pattern", async (t) => { + const config = new ServiceConfig([], 0, [], [], false, [], []); + config.updateBlockedUserAgents([]); + t.same(config.isUserAgentBlocked("googlebot"), { blocked: false }); +}); + +t.test("it updates blocked user agents with monitor flag", async (t) => { + const config = new ServiceConfig([], 0, [], [], false, [], []); + config.updateBlockedUserAgents([ + { + key: "bots", + pattern: "googlebot|bingbot", + monitor: true, + }, + ]); + t.same(config.isUserAgentBlocked("googlebot"), { blocked: false }); + t.same(config.isMonitoredUserAgent("googlebot"), { key: "bots" }); +}); diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index ef3d58a22..54ea40eaa 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -14,11 +14,12 @@ export class ServiceConfig { key: string; blocklist: IPMatcher; description: string; + monitor: boolean; }[] = []; - private blockedUserAgentRegex: RegExp | undefined; - private monitoredUserAgents: { + private blockedUserAgents: { key: string; pattern: RegExp; + monitor: boolean; }[] = []; // If not empty, only ips in this list are allowed to access the service // e.g. for country allowlists @@ -26,10 +27,6 @@ export class ServiceConfig { allowlist: IPMatcher; description: string; }[] = []; - private monitoredIPAddresses: { - key: string; - matcher: IPMatcher; - }[] = []; constructor( endpoints: EndpointConfig[], @@ -121,9 +118,9 @@ export class ServiceConfig { isIPAddressBlocked( ip: string ): { blocked: true; reason: string; key: string } | { blocked: false } { - const blocklist = this.blockedIPAddresses.find((blocklist) => - blocklist.blocklist.has(ip) - ); + const blocklist = this.blockedIPAddresses + .filter((list) => !list.monitor) + .find((blocklist) => blocklist.blocklist.has(ip)); if (blocklist) { return { @@ -144,6 +141,7 @@ export class ServiceConfig { key: source.key, blocklist: new IPMatcher(source.ips), description: source.description, + monitor: source.monitor, }); } } @@ -152,32 +150,30 @@ export class ServiceConfig { this.setBlockedIPAddresses(blockedIPAddresses); } - updateBlockedUserAgents(blockedUserAgents: string) { - if (!blockedUserAgents) { - this.blockedUserAgentRegex = undefined; - return; - } - - this.blockedUserAgentRegex = new RegExp(blockedUserAgents, "i"); - } - - updateMonitoredUserAgents(monitoredUserAgents: AgentBlockList[]) { - this.monitoredUserAgents = monitoredUserAgents.map((list) => ({ + private setBlockedUserAgents(blockedUserAgents: AgentBlockList[]) { + this.blockedUserAgents = blockedUserAgents.map((list) => ({ key: list.key, pattern: new RegExp(list.pattern, "i"), + monitor: list.monitor, })); } - updateMonitoredIPAddresses(monitoredIPAddresses: IPList[]) { - this.monitoredIPAddresses = monitoredIPAddresses.map((list) => ({ - key: list.key, - matcher: new IPMatcher(list.ips), - })); + updateBlockedUserAgents(blockedUserAgents: AgentBlockList[]) { + this.setBlockedUserAgents(blockedUserAgents); } - isUserAgentBlocked(ua: string): { blocked: boolean } { - if (this.blockedUserAgentRegex) { - return { blocked: this.blockedUserAgentRegex.test(ua) }; + isUserAgentBlocked( + ua: string + ): { blocked: boolean; key: string } | { blocked: false } { + const match = this.blockedUserAgents + .filter((list) => !list.monitor) + .find((list) => list.pattern.test(ua)); + + if (match) { + return { + blocked: true, + key: match.key, + }; } return { blocked: false }; @@ -220,15 +216,22 @@ export class ServiceConfig { } isMonitoredIPAddress(ip: string): { key: string } | undefined { - const list = this.monitoredIPAddresses.find((list) => list.matcher.has(ip)); + const blocklist = this.blockedIPAddresses + .filter((list) => list.monitor) + .find((list) => list.blocklist.has(ip)); + if (blocklist) { + return { key: blocklist.key }; + } - return list ? { key: list.key } : undefined; + return undefined; } isMonitoredUserAgent(ua: string): { key: string } | undefined { - const list = this.monitoredUserAgents.find((list) => list.pattern.test(ua)); + const match = this.blockedUserAgents + .filter((list) => list.monitor) + .find((list) => list.pattern.test(ua)); - return list ? { key: list.key } : undefined; + return match ? { key: match.key } : undefined; } updateConfig( diff --git a/library/agent/api/fetchBlockedLists.ts b/library/agent/api/fetchBlockedLists.ts index a8be4d10b..b3757b76c 100644 --- a/library/agent/api/fetchBlockedLists.ts +++ b/library/agent/api/fetchBlockedLists.ts @@ -7,19 +7,19 @@ export type IPList = { source: string; description: string; ips: string[]; + monitor: boolean; }; export type AgentBlockList = { key: string; pattern: string; // e.g. "Googlebot|Bingbot" + monitor: boolean; }; export type Response = { blockedIPAddresses: IPList[]; allowedIPAddresses: IPList[]; - blockedUserAgents: string; - monitoredIPAddresses: IPList[]; - monitoredUserAgents: AgentBlockList[]; + blockedUserAgents: AgentBlockList[]; }; export async function fetchBlockedLists(token: Token): Promise { @@ -30,6 +30,7 @@ export async function fetchBlockedLists(token: Token): Promise { headers: { // We need to set the Accept-Encoding header to "gzip" to receive the response in gzip format "Accept-Encoding": "gzip", + "x-supports-monitoring": "true", Authorization: token.asString(), }, timeoutInMS: 60 * 1000, @@ -49,9 +50,7 @@ export async function fetchBlockedLists(token: Token): Promise { const validResponse = Array.isArray(result.blockedIPAddresses) && Array.isArray(result.allowedIPAddresses) && - Array.isArray(result.monitoredIPAddresses) && - Array.isArray(result.monitoredUserAgents) && - typeof result.blockedUserAgents === "string"; + Array.isArray(result.blockedUserAgents); if (!validResponse) { throw new Error("Invalid response from fetchBlockedLists"); diff --git a/library/sources/HTTPServer.stats.test.ts b/library/sources/HTTPServer.stats.test.ts index 841f4fa18..2be862a8e 100644 --- a/library/sources/HTTPServer.stats.test.ts +++ b/library/sources/HTTPServer.stats.test.ts @@ -29,23 +29,23 @@ wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() { return async function fetchBlockedLists(): Promise { return { allowedIPAddresses: [], - blockedIPAddresses: [], - blockedUserAgents: "", - monitoredUserAgents: [ + blockedIPAddresses: [ { - key: "ai_data_scrapers", - pattern: "GPTBot|Google-Extended", + key: "known_threat_actors/public_scanners", + monitor: true, + ips: ["1.2.3.4/32"], + source: "test", + description: "Test IP list", }, ], - monitoredIPAddresses: [ + blockedUserAgents: [ { - key: "known_threat_actors/public_scanners", - source: "known_threat_actors", - ips: ["1.2.3.4/32"], - description: "Known public scanners", + key: "ai_data_scrapers", + monitor: true, + pattern: "GPTBot|Google-Extended", }, ], - }; + } satisfies Response; }; }); diff --git a/library/sources/HTTPServer.test.ts b/library/sources/HTTPServer.test.ts index ac5cc356b..04725c1bf 100644 --- a/library/sources/HTTPServer.test.ts +++ b/library/sources/HTTPServer.test.ts @@ -60,12 +60,17 @@ wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() { source: "geoip", ips: ["9.9.9.9"], description: "geo restrictions", + monitor: false, }, ], - blockedUserAgents: "", - monitoredUserAgents: [], - monitoredIPAddresses: [], - }; + blockedUserAgents: [ + { + key: "bots", + pattern: "", + monitor: false, + }, + ], + } satisfies Response; }; }); diff --git a/library/sources/Hono.test.ts b/library/sources/Hono.test.ts index 2263751c7..cc386eb6e 100644 --- a/library/sources/Hono.test.ts +++ b/library/sources/Hono.test.ts @@ -30,12 +30,17 @@ wrap(fetch, "fetch", function mock(original) { source: "geoip", description: "geo restrictions", ips: ["1.3.2.0/24", "e98c:a7ba:2329:8c69::/64"], + monitor: false, }, ], allowedIPAddresses: [], - monitoredIPAddresses: [], - blockedUserAgents: "hacker|attacker", - monitoredUserAgents: [], + blockedUserAgents: [ + { + key: "hackers", + pattern: "hacker|attacker", + monitor: false, + }, + ], } satisfies Response), }; } diff --git a/library/sources/Lambda.test.ts b/library/sources/Lambda.test.ts index e59f99c2b..919739379 100644 --- a/library/sources/Lambda.test.ts +++ b/library/sources/Lambda.test.ts @@ -306,6 +306,7 @@ t.test("it sends heartbeat after first and every 10 minutes", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -504,6 +505,7 @@ t.test("it counts attacks", async () => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index 0a8dab076..f0609dc91 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -98,7 +98,7 @@ export function checkIfRequestIsBlocked( "You are not allowed to access this resource because you have been identified as a bot." ); - agent.getInspectionStatistics().onBlockedUserAgent(); + agent.getInspectionStatistics().onBlockedUserAgent(isUserAgentBlocked.key); return true; } From 274842d3ab4bb7cb1e645f5a028b46fc1aed5e19 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 15 Apr 2025 18:35:46 +0200 Subject: [PATCH 28/48] Fix test --- library/sources/HTTPServer.test.ts | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/library/sources/HTTPServer.test.ts b/library/sources/HTTPServer.test.ts index 04725c1bf..0c3015154 100644 --- a/library/sources/HTTPServer.test.ts +++ b/library/sources/HTTPServer.test.ts @@ -63,13 +63,7 @@ wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() { monitor: false, }, ], - blockedUserAgents: [ - { - key: "bots", - pattern: "", - monitor: false, - }, - ], + blockedUserAgents: [], } satisfies Response; }; }); From b8b9022a5eea642cd57428abf6263ef78c89c9f7 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 15 Apr 2025 18:40:12 +0200 Subject: [PATCH 29/48] Fixes --- library/agent/InspectionStatistics.test.ts | 1 + library/sources/Hono.allowedIPAddresses.test.ts | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index e5ddf4809..bc1898deb 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -813,6 +813,7 @@ t.test("it keeps track of blocked IPs and user agents", async () => { blocked: { total: 1, breakdown: { + // eslint-disable-next-line camelcase ai_bots: 1, }, }, diff --git a/library/sources/Hono.allowedIPAddresses.test.ts b/library/sources/Hono.allowedIPAddresses.test.ts index 25e2ef83c..55c7e7f90 100644 --- a/library/sources/Hono.allowedIPAddresses.test.ts +++ b/library/sources/Hono.allowedIPAddresses.test.ts @@ -8,6 +8,7 @@ import { HTTPServer } from "./HTTPServer"; import { getMajorNodeVersion } from "../helpers/getNodeVersion"; import { createTestAgent } from "../helpers/createTestAgent"; import * as fetch from "../helpers/fetch"; +import { Response } from "../agent/api/fetchBlockedLists"; wrap(fetch, "fetch", function mock(original) { return async function mock(this: typeof fetch) { @@ -25,20 +26,26 @@ wrap(fetch, "fetch", function mock(original) { source: "geoip", description: "geo restrictions", ips: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], + monitor: false, + }, + ], + blockedUserAgents: [ + { + key: "hacker", + monitor: false, + pattern: "hacker|attacker", }, ], - blockedUserAgents: "hacker|attacker", allowedIPAddresses: [ { key: "geoip/Belgium;BE", source: "geoip", description: "geo restrictions", ips: ["4.3.2.1"], + monitor: false, }, ], - monitoredIPAddresses: [], - monitoredUserAgents: [], - }), + } satisfies Response), }; } From 9bac765a496b83969941312549cf5d61848eb515 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 15 Apr 2025 18:44:07 +0200 Subject: [PATCH 30/48] Fix stats test assertions --- library/sources/FunctionsFramework.test.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/library/sources/FunctionsFramework.test.ts b/library/sources/FunctionsFramework.test.ts index 1e1ed25d4..3cfca7ad8 100644 --- a/library/sources/FunctionsFramework.test.ts +++ b/library/sources/FunctionsFramework.test.ts @@ -124,6 +124,7 @@ t.test("it counts requests", async (t) => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -159,6 +160,7 @@ t.test("it counts attacks", async (t) => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, @@ -194,6 +196,7 @@ t.test("it counts request if error", async (t) => { userAgents: { blocked: { total: 0, + breakdown: {}, }, monitor: { total: 0, From e2ec466bbc088faf4ac7456d6571b39bb140421b Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 15 Apr 2025 19:40:37 +0200 Subject: [PATCH 31/48] refactor: update statistics structure to use simpler format with total, blocked and breakdown --- library/agent/InspectionStatistics.test.ts | 436 ++++++--------------- library/agent/InspectionStatistics.ts | 136 +++---- library/agent/api/Event.ts | 21 +- library/sources/FunctionsFramework.test.ts | 66 +--- library/sources/HTTPServer.stats.test.ts | 10 +- library/sources/Lambda.test.ts | 44 +-- 6 files changed, 217 insertions(+), 496 deletions(-) diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index bc1898deb..a654d3d47 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -40,24 +40,14 @@ t.test("it resets stats", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -75,24 +65,14 @@ t.test("it resets stats", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -121,24 +101,14 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -173,24 +143,14 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -225,24 +185,14 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -271,24 +221,14 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -323,24 +263,14 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -375,24 +305,14 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -446,24 +366,14 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -513,24 +423,14 @@ t.test("it keeps track of requests", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -548,24 +448,14 @@ t.test("it keeps track of requests", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -584,24 +474,14 @@ t.test("it keeps track of requests", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -620,24 +500,14 @@ t.test("it keeps track of requests", async () => { blocked: 1, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -657,24 +527,14 @@ t.test("it keeps track of requests", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -701,24 +561,14 @@ t.test("it force compresses stats", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -763,24 +613,14 @@ t.test("it keeps track of aborted requests", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); @@ -810,28 +650,18 @@ t.test("it keeps track of blocked IPs and user agents", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 1, - breakdown: { - // eslint-disable-next-line camelcase - ai_bots: 1, - }, - }, - monitor: { - total: 0, - breakdown: {}, + total: 1, + blocked: 1, + breakdown: { + // eslint-disable-next-line camelcase + ai_bots: { total: 1, blocked: 1 }, }, }, ipAddresses: { - blocked: { - total: 1, - breakdown: { - "known_threat_actors/public_scanners": 1, - }, - }, - monitor: { - total: 0, - breakdown: {}, + total: 1, + blocked: 1, + breakdown: { + "known_threat_actors/public_scanners": { total: 1, blocked: 1 }, }, }, }, @@ -862,28 +692,18 @@ t.test("it keeps track of monitored IPs and user agents", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 1, - breakdown: { - // eslint-disable-next-line camelcase - ai_data_scrapers: 1, - }, + total: 1, + blocked: 0, + breakdown: { + // eslint-disable-next-line camelcase + ai_data_scrapers: { total: 1, blocked: 0 }, }, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 1, - breakdown: { - "known_threat_actors/public_scanners": 1, - }, + total: 1, + blocked: 0, + breakdown: { + "known_threat_actors/public_scanners": { total: 1, blocked: 0 }, }, }, }, @@ -904,28 +724,18 @@ t.test("it keeps track of monitored IPs and user agents", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 2, - breakdown: { - // eslint-disable-next-line camelcase - ai_data_scrapers: 2, - }, + total: 2, + blocked: 0, + breakdown: { + // eslint-disable-next-line camelcase + ai_data_scrapers: { total: 2, blocked: 0 }, }, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 2, - breakdown: { - "known_threat_actors/public_scanners": 2, - }, + total: 2, + blocked: 0, + breakdown: { + "known_threat_actors/public_scanners": { total: 2, blocked: 0 }, }, }, }, diff --git a/library/agent/InspectionStatistics.ts b/library/agent/InspectionStatistics.ts index 77017c34f..3577f7aa1 100644 --- a/library/agent/InspectionStatistics.ts +++ b/library/agent/InspectionStatistics.ts @@ -24,6 +24,18 @@ type SinkStatsWithoutTimings = Omit; type UserAgentBotKey = string; type IPListKey = string; +type UserAgentStats = { + total: number; + blocked: number; + breakdown: Record; +}; + +type IPAddressStats = { + total: number; + blocked: number; + breakdown: Record; +}; + export class InspectionStatistics { private startedAt = Date.now(); private stats: Record = {}; @@ -36,49 +48,21 @@ export class InspectionStatistics { total: number; blocked: number; }; - userAgents: { - blocked: { - total: number; - breakdown: Record; - }; - monitor: { - total: number; - breakdown: Record; - }; - }; - ipAddresses: { - blocked: { - total: number; - breakdown: Record; - }; - monitor: { - total: number; - breakdown: Record; - }; - }; + userAgents: UserAgentStats; + ipAddresses: IPAddressStats; } = { total: 0, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }; @@ -114,24 +98,14 @@ export class InspectionStatistics { aborted: 0, attacksDetected: { total: 0, blocked: 0 }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }; this.startedAt = Date.now(); @@ -149,24 +123,14 @@ export class InspectionStatistics { blocked: number; }; userAgents: { - blocked: { - total: number; - breakdown: Record; - }; - monitor: { - total: number; - breakdown: Record; - }; + total: number; + blocked: number; + breakdown: Record; }; ipAddresses: { - blocked: { - total: number; - breakdown: Record; - }; - monitor: { - total: number; - breakdown: Record; - }; + total: number; + blocked: number; + breakdown: Record; }; }; } { @@ -264,43 +228,47 @@ export class InspectionStatistics { } onBlockedIPAddress(key: string) { - this.requests.ipAddresses.blocked.total += 1; + this.requests.ipAddresses.total += 1; + this.requests.ipAddresses.blocked += 1; - if (!this.requests.ipAddresses.blocked.breakdown[key]) { - this.requests.ipAddresses.blocked.breakdown[key] = 0; + if (!this.requests.ipAddresses.breakdown[key]) { + this.requests.ipAddresses.breakdown[key] = { total: 0, blocked: 0 }; } - this.requests.ipAddresses.blocked.breakdown[key] += 1; + this.requests.ipAddresses.breakdown[key].total += 1; + this.requests.ipAddresses.breakdown[key].blocked += 1; } onBlockedUserAgent(key: string) { - this.requests.userAgents.blocked.total += 1; + this.requests.userAgents.total += 1; + this.requests.userAgents.blocked += 1; - if (!this.requests.userAgents.blocked.breakdown[key]) { - this.requests.userAgents.blocked.breakdown[key] = 0; + if (!this.requests.userAgents.breakdown[key]) { + this.requests.userAgents.breakdown[key] = { total: 0, blocked: 0 }; } - this.requests.userAgents.blocked.breakdown[key] += 1; + this.requests.userAgents.breakdown[key].total += 1; + this.requests.userAgents.breakdown[key].blocked += 1; } detectedMonitoredIPAddress(key: IPListKey) { - this.requests.ipAddresses.monitor.total += 1; + this.requests.ipAddresses.total += 1; - if (!this.requests.ipAddresses.monitor.breakdown[key]) { - this.requests.ipAddresses.monitor.breakdown[key] = 0; + if (!this.requests.ipAddresses.breakdown[key]) { + this.requests.ipAddresses.breakdown[key] = { total: 0, blocked: 0 }; } - this.requests.ipAddresses.monitor.breakdown[key] += 1; + this.requests.ipAddresses.breakdown[key].total += 1; } detectedMonitoredUserAgent(key: UserAgentBotKey) { - this.requests.userAgents.monitor.total += 1; + this.requests.userAgents.total += 1; - if (!this.requests.userAgents.monitor.breakdown[key]) { - this.requests.userAgents.monitor.breakdown[key] = 0; + if (!this.requests.userAgents.breakdown[key]) { + this.requests.userAgents.breakdown[key] = { total: 0, blocked: 0 }; } - this.requests.userAgents.monitor.breakdown[key] += 1; + this.requests.userAgents.breakdown[key].total += 1; } onAbortedRequest() { diff --git a/library/agent/api/Event.ts b/library/agent/api/Event.ts index 1ab73fca6..11d80755d 100644 --- a/library/agent/api/Event.ts +++ b/library/agent/api/Event.ts @@ -93,23 +93,14 @@ type Heartbeat = { blocked: number; }; userAgents: { - blocked: { - total: number; - }; - monitor: { - total: number; - breakdown: Record; - }; + total: number; + blocked: number; + breakdown: Record; }; ipAddresses: { - blocked: { - total: number; - breakdown: Record; - }; - monitor: { - total: number; - breakdown: Record; - }; + total: number; + blocked: number; + breakdown: Record; }; }; }; diff --git a/library/sources/FunctionsFramework.test.ts b/library/sources/FunctionsFramework.test.ts index 3cfca7ad8..6ac0a96e3 100644 --- a/library/sources/FunctionsFramework.test.ts +++ b/library/sources/FunctionsFramework.test.ts @@ -122,24 +122,14 @@ t.test("it counts requests", async (t) => { aborted: 0, attacksDetected: { total: 0, blocked: 0 }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }); }); @@ -158,24 +148,14 @@ t.test("it counts attacks", async (t) => { aborted: 0, attacksDetected: { total: 1, blocked: 1 }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }); }); @@ -194,24 +174,14 @@ t.test("it counts request if error", async (t) => { aborted: 0, attacksDetected: { total: 0, blocked: 0 }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }); }); diff --git a/library/sources/HTTPServer.stats.test.ts b/library/sources/HTTPServer.stats.test.ts index 2be862a8e..710bd2bc1 100644 --- a/library/sources/HTTPServer.stats.test.ts +++ b/library/sources/HTTPServer.stats.test.ts @@ -91,11 +91,12 @@ t.test("it tracks monitored user agents", async () => { t.equal(response2.statusCode, 200); t.equal(response3.statusCode, 200); const stats = agent.getInspectionStatistics().getStats(); - t.same(stats.requests.userAgents.monitor, { + t.same(stats.requests.userAgents, { total: 2, + blocked: 0, breakdown: { // eslint-disable-next-line camelcase - ai_data_scrapers: 2, + ai_data_scrapers: { total: 2, blocked: 0 }, }, }); server.close(); @@ -134,10 +135,11 @@ t.test("it tracks monitored IP addresses", async () => { t.equal(response1.statusCode, 200); t.equal(response2.statusCode, 200); const stats = agent.getInspectionStatistics().getStats(); - t.same(stats.requests.ipAddresses.monitor, { + t.same(stats.requests.ipAddresses, { total: 1, + blocked: 0, breakdown: { - "known_threat_actors/public_scanners": 1, + "known_threat_actors/public_scanners": { total: 1, blocked: 0 }, }, }); server.close(); diff --git a/library/sources/Lambda.test.ts b/library/sources/Lambda.test.ts index 919739379..1470cf130 100644 --- a/library/sources/Lambda.test.ts +++ b/library/sources/Lambda.test.ts @@ -304,24 +304,14 @@ t.test("it sends heartbeat after first and every 10 minutes", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }, @@ -503,24 +493,14 @@ t.test("it counts attacks", async () => { blocked: 0, }, userAgents: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }); From 090802f046845bc9e1031771cefb37a0e4182cc9 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 15 Apr 2025 20:45:25 +0200 Subject: [PATCH 32/48] Fix end2end tests --- end2end/server/src/handlers/lists.js | 15 ++++++++++++++- end2end/server/src/zen/config.js | 6 +++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/end2end/server/src/handlers/lists.js b/end2end/server/src/handlers/lists.js index 343786013..484245249 100644 --- a/end2end/server/src/handlers/lists.js +++ b/end2end/server/src/handlers/lists.js @@ -20,20 +20,33 @@ module.exports = function lists(req, res) { blockedIps.length > 0 ? [ { + key: "geoip/Belgium;BE", source: "geoip", description: "geo restrictions", ips: blockedIps, + monitor: false, + }, + ] + : [], + blockedUserAgents: + blockedUserAgents.length > 0 + ? [ + { + key: "hackers", + pattern: blockedUserAgents, + monitor: false, }, ] : [], - blockedUserAgents: blockedUserAgents, allowedIPAddresses: allowedIps.length > 0 ? [ { + key: "geoip/Belgium;BE", source: "geoip", description: "geo restrictions", ips: allowedIps, + monitor: false, }, ] : [], diff --git a/end2end/server/src/zen/config.js b/end2end/server/src/zen/config.js index 87797b8e3..101b75b0c 100644 --- a/end2end/server/src/zen/config.js +++ b/end2end/server/src/zen/config.js @@ -90,7 +90,7 @@ function getAllowedIPAddresses(app) { } function updateBlockedUserAgents(app, uas) { - let entry = blockedUserAgents.find((e) => e.serviceId === e.serviceId); + let entry = blockedUserAgents.find((e) => e.serviceId === app.serviceId); if (entry) { entry.userAgents = uas; @@ -104,13 +104,13 @@ function updateBlockedUserAgents(app, uas) { } function getBlockedUserAgents(app) { - const entry = blockedUserAgents.find((e) => e.serviceId === e.serviceId); + const entry = blockedUserAgents.find((e) => e.serviceId === app.serviceId); if (entry) { return entry.userAgents; } - return ""; + return { serviceId: app.serviceId, userAgents: [] }; } module.exports = { From 748aa1b1844cc54348466a5b5f5752c55341468a Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 15 Apr 2025 20:50:47 +0200 Subject: [PATCH 33/48] FIX TypeScript errors and remove unused eslint directives --- library/agent/InspectionStatistics.ts | 1 - .../ReportingAPIRateLimitedClientSide.test.ts | 21 ++++++------------- .../http-server/createRequestListener.ts | 1 - 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/library/agent/InspectionStatistics.ts b/library/agent/InspectionStatistics.ts index 3577f7aa1..9afa6bf1f 100644 --- a/library/agent/InspectionStatistics.ts +++ b/library/agent/InspectionStatistics.ts @@ -111,7 +111,6 @@ export class InspectionStatistics { this.startedAt = Date.now(); } - // eslint-disable-next-line max-lines-per-function getStats(): { sinks: Record; startedAt: number; diff --git a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts index 033a14476..58bef221b 100644 --- a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts +++ b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts @@ -155,23 +155,14 @@ function generateHeartbeatEvent(): Event { total: 0, }, userAgents: { - blocked: { - total: 0, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, ipAddresses: { - blocked: { - total: 0, - breakdown: {}, - }, - monitor: { - total: 0, - breakdown: {}, - }, + total: 0, + blocked: 0, + breakdown: {}, }, }, }, diff --git a/library/sources/http-server/createRequestListener.ts b/library/sources/http-server/createRequestListener.ts index 85a284829..f59a72912 100644 --- a/library/sources/http-server/createRequestListener.ts +++ b/library/sources/http-server/createRequestListener.ts @@ -74,7 +74,6 @@ function callListenerWithContext( // Use symbol to avoid conflicts with other properties const countedRequest = Symbol("__zen_request_counted__"); -// eslint-disable-next-line max-lines-per-function function createOnFinishRequestHandler( req: IncomingMessage & { [countedRequest]?: boolean }, res: ServerResponse, From 8c33ac578ce65ed82162002622abeb24ee3a66e9 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 15 Apr 2025 20:57:17 +0200 Subject: [PATCH 34/48] Add comment --- library/agent/api/fetchBlockedLists.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/library/agent/api/fetchBlockedLists.ts b/library/agent/api/fetchBlockedLists.ts index b3757b76c..f99e28b9e 100644 --- a/library/agent/api/fetchBlockedLists.ts +++ b/library/agent/api/fetchBlockedLists.ts @@ -30,6 +30,7 @@ export async function fetchBlockedLists(token: Token): Promise { headers: { // We need to set the Accept-Encoding header to "gzip" to receive the response in gzip format "Accept-Encoding": "gzip", + // Indicates to the server that this agent supports the new format with monitoring "x-supports-monitoring": "true", Authorization: token.asString(), }, From f4a0e4e2a82ead8490be4edae0fad3c7e34c715b Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Wed, 16 Apr 2025 11:36:00 +0200 Subject: [PATCH 35/48] Move user agent and IP statistics outside requests --- library/agent/Agent.ts | 2 + library/agent/InspectionStatistics.test.ts | 398 +++++++++--------- library/agent/InspectionStatistics.ts | 104 ++--- library/agent/api/Event.ts | 20 +- .../ReportingAPIRateLimitedClientSide.test.ts | 20 +- library/sources/HTTPServer.stats.test.ts | 18 +- 6 files changed, 289 insertions(+), 273 deletions(-) diff --git a/library/agent/Agent.ts b/library/agent/Agent.ts index 2846a4d6f..da4e8ce8f 100644 --- a/library/agent/Agent.ts +++ b/library/agent/Agent.ts @@ -312,6 +312,8 @@ export class Agent { startedAt: stats.startedAt, endedAt: endedAt, requests: stats.requests, + userAgents: stats.userAgents, + ipAddresses: stats.ipAddresses, }, hostnames: outgoingDomains, routes: routes, diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index a654d3d47..4fd30f2ce 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -39,16 +39,16 @@ t.test("it resets stats", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -64,16 +64,16 @@ t.test("it resets stats", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -100,16 +100,16 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -142,16 +142,16 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -184,16 +184,16 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -220,16 +220,16 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -262,16 +262,16 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -304,16 +304,16 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -365,16 +365,16 @@ t.test("it keeps track of amount of calls", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -422,16 +422,16 @@ t.test("it keeps track of requests", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -447,16 +447,16 @@ t.test("it keeps track of requests", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -473,16 +473,16 @@ t.test("it keeps track of requests", async () => { total: 1, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -499,16 +499,16 @@ t.test("it keeps track of requests", async () => { total: 2, blocked: 1, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -526,16 +526,16 @@ t.test("it keeps track of requests", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -560,16 +560,16 @@ t.test("it force compresses stats", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -612,16 +612,16 @@ t.test("it keeps track of aborted requests", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }); @@ -649,20 +649,20 @@ t.test("it keeps track of blocked IPs and user agents", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 1, - blocked: 1, - breakdown: { - // eslint-disable-next-line camelcase - ai_bots: { total: 1, blocked: 1 }, - }, + }, + userAgents: { + total: 1, + blocked: 1, + breakdown: { + // eslint-disable-next-line camelcase + ai_bots: { total: 1, blocked: 1 }, }, - ipAddresses: { - total: 1, - blocked: 1, - breakdown: { - "known_threat_actors/public_scanners": { total: 1, blocked: 1 }, - }, + }, + ipAddresses: { + total: 1, + blocked: 1, + breakdown: { + "known_threat_actors/public_scanners": { total: 1, blocked: 1 }, }, }, }); @@ -691,20 +691,20 @@ t.test("it keeps track of monitored IPs and user agents", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 1, - blocked: 0, - breakdown: { - // eslint-disable-next-line camelcase - ai_data_scrapers: { total: 1, blocked: 0 }, - }, + }, + userAgents: { + total: 1, + blocked: 0, + breakdown: { + // eslint-disable-next-line camelcase + ai_data_scrapers: { total: 1, blocked: 0 }, }, - ipAddresses: { - total: 1, - blocked: 0, - breakdown: { - "known_threat_actors/public_scanners": { total: 1, blocked: 0 }, - }, + }, + ipAddresses: { + total: 1, + blocked: 0, + breakdown: { + "known_threat_actors/public_scanners": { total: 1, blocked: 0 }, }, }, }); @@ -723,20 +723,20 @@ t.test("it keeps track of monitored IPs and user agents", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 2, - blocked: 0, - breakdown: { - // eslint-disable-next-line camelcase - ai_data_scrapers: { total: 2, blocked: 0 }, - }, + }, + userAgents: { + total: 2, + blocked: 0, + breakdown: { + // eslint-disable-next-line camelcase + ai_data_scrapers: { total: 2, blocked: 0 }, }, - ipAddresses: { - total: 2, - blocked: 0, - breakdown: { - "known_threat_actors/public_scanners": { total: 2, blocked: 0 }, - }, + }, + ipAddresses: { + total: 2, + blocked: 0, + breakdown: { + "known_threat_actors/public_scanners": { total: 2, blocked: 0 }, }, }, }); diff --git a/library/agent/InspectionStatistics.ts b/library/agent/InspectionStatistics.ts index 9afa6bf1f..573ba80a9 100644 --- a/library/agent/InspectionStatistics.ts +++ b/library/agent/InspectionStatistics.ts @@ -48,22 +48,20 @@ export class InspectionStatistics { total: number; blocked: number; }; - userAgents: UserAgentStats; - ipAddresses: IPAddressStats; } = { total: 0, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }; + private userAgents: UserAgentStats = { + total: 0, + blocked: 0, + breakdown: {}, + }; + private ipAddresses: IPAddressStats = { + total: 0, + blocked: 0, + breakdown: {}, }; constructor({ @@ -97,16 +95,16 @@ export class InspectionStatistics { total: 0, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }; + this.userAgents = { + total: 0, + blocked: 0, + breakdown: {}, + }; + this.ipAddresses = { + total: 0, + blocked: 0, + breakdown: {}, }; this.startedAt = Date.now(); } @@ -121,16 +119,16 @@ export class InspectionStatistics { total: number; blocked: number; }; - userAgents: { - total: number; - blocked: number; - breakdown: Record; - }; - ipAddresses: { - total: number; - blocked: number; - breakdown: Record; - }; + }; + userAgents: { + total: number; + blocked: number; + breakdown: Record; + }; + ipAddresses: { + total: number; + blocked: number; + breakdown: Record; }; } { const sinks: Record = {}; @@ -152,6 +150,8 @@ export class InspectionStatistics { sinks: sinks, startedAt: this.startedAt, requests: this.requests, + userAgents: this.userAgents, + ipAddresses: this.ipAddresses, }; } @@ -227,47 +227,47 @@ export class InspectionStatistics { } onBlockedIPAddress(key: string) { - this.requests.ipAddresses.total += 1; - this.requests.ipAddresses.blocked += 1; + this.ipAddresses.total += 1; + this.ipAddresses.blocked += 1; - if (!this.requests.ipAddresses.breakdown[key]) { - this.requests.ipAddresses.breakdown[key] = { total: 0, blocked: 0 }; + if (!this.ipAddresses.breakdown[key]) { + this.ipAddresses.breakdown[key] = { total: 0, blocked: 0 }; } - this.requests.ipAddresses.breakdown[key].total += 1; - this.requests.ipAddresses.breakdown[key].blocked += 1; + this.ipAddresses.breakdown[key].total += 1; + this.ipAddresses.breakdown[key].blocked += 1; } onBlockedUserAgent(key: string) { - this.requests.userAgents.total += 1; - this.requests.userAgents.blocked += 1; + this.userAgents.total += 1; + this.userAgents.blocked += 1; - if (!this.requests.userAgents.breakdown[key]) { - this.requests.userAgents.breakdown[key] = { total: 0, blocked: 0 }; + if (!this.userAgents.breakdown[key]) { + this.userAgents.breakdown[key] = { total: 0, blocked: 0 }; } - this.requests.userAgents.breakdown[key].total += 1; - this.requests.userAgents.breakdown[key].blocked += 1; + this.userAgents.breakdown[key].total += 1; + this.userAgents.breakdown[key].blocked += 1; } detectedMonitoredIPAddress(key: IPListKey) { - this.requests.ipAddresses.total += 1; + this.ipAddresses.total += 1; - if (!this.requests.ipAddresses.breakdown[key]) { - this.requests.ipAddresses.breakdown[key] = { total: 0, blocked: 0 }; + if (!this.ipAddresses.breakdown[key]) { + this.ipAddresses.breakdown[key] = { total: 0, blocked: 0 }; } - this.requests.ipAddresses.breakdown[key].total += 1; + this.ipAddresses.breakdown[key].total += 1; } detectedMonitoredUserAgent(key: UserAgentBotKey) { - this.requests.userAgents.total += 1; + this.userAgents.total += 1; - if (!this.requests.userAgents.breakdown[key]) { - this.requests.userAgents.breakdown[key] = { total: 0, blocked: 0 }; + if (!this.userAgents.breakdown[key]) { + this.userAgents.breakdown[key] = { total: 0, blocked: 0 }; } - this.requests.userAgents.breakdown[key].total += 1; + this.userAgents.breakdown[key].total += 1; } onAbortedRequest() { diff --git a/library/agent/api/Event.ts b/library/agent/api/Event.ts index 11d80755d..77137a4f8 100644 --- a/library/agent/api/Event.ts +++ b/library/agent/api/Event.ts @@ -92,16 +92,16 @@ type Heartbeat = { total: number; blocked: number; }; - userAgents: { - total: number; - blocked: number; - breakdown: Record; - }; - ipAddresses: { - total: number; - blocked: number; - breakdown: Record; - }; + }; + userAgents: { + total: number; + blocked: number; + breakdown: Record; + }; + ipAddresses: { + total: number; + blocked: number; + breakdown: Record; }; }; hostnames: { hostname: string; port: number | undefined; hits: number }[]; diff --git a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts index 58bef221b..a5a662b81 100644 --- a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts +++ b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts @@ -151,20 +151,20 @@ function generateHeartbeatEvent(): Event { total: 0, aborted: 0, attacksDetected: { - blocked: 0, - total: 0, - }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { total: 0, blocked: 0, - breakdown: {}, }, }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, + }, }, agent: { version: "1.0.0", diff --git a/library/sources/HTTPServer.stats.test.ts b/library/sources/HTTPServer.stats.test.ts index 710bd2bc1..816b15a57 100644 --- a/library/sources/HTTPServer.stats.test.ts +++ b/library/sources/HTTPServer.stats.test.ts @@ -53,6 +53,10 @@ t.setTimeout(30 * 1000); const http = require("http") as typeof import("http"); +t.beforeEach(() => { + agent.getInspectionStatistics().reset(); +}); + t.test("it tracks monitored user agents", async () => { const server = http.createServer((req, res) => { res.setHeader("Content-Type", "text/plain"); @@ -91,7 +95,7 @@ t.test("it tracks monitored user agents", async () => { t.equal(response2.statusCode, 200); t.equal(response3.statusCode, 200); const stats = agent.getInspectionStatistics().getStats(); - t.same(stats.requests.userAgents, { + t.same(stats.userAgents, { total: 2, blocked: 0, breakdown: { @@ -99,6 +103,11 @@ t.test("it tracks monitored user agents", async () => { ai_data_scrapers: { total: 2, blocked: 0 }, }, }); + t.same(stats.ipAddresses, { + total: 0, + blocked: 0, + breakdown: {}, + }); server.close(); resolve(); }); @@ -135,7 +144,12 @@ t.test("it tracks monitored IP addresses", async () => { t.equal(response1.statusCode, 200); t.equal(response2.statusCode, 200); const stats = agent.getInspectionStatistics().getStats(); - t.same(stats.requests.ipAddresses, { + t.same(stats.userAgents, { + total: 0, + blocked: 0, + breakdown: {}, + }); + t.same(stats.ipAddresses, { total: 1, blocked: 0, breakdown: { From 00f2b763959e74e7cf3e04e4f8029ebbca17e5e5 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Wed, 16 Apr 2025 12:12:46 +0200 Subject: [PATCH 36/48] Move monitoring logic to checkIfRequestIsBlocked We moved the monitoring logic from createRequestListener to checkIfRequestIsBlocked to handle multiple matches for both IP addresses and user agents. This change allows us to track statistics for all matches while using the first non-monitored match for blocking. --- library/agent/ServiceConfig.ts | 63 ++++----------- .../http-server/checkIfRequestIsBlocked.ts | 77 +++++++++++-------- .../http-server/createRequestListener.ts | 23 ------ 3 files changed, 62 insertions(+), 101 deletions(-) diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index 54ea40eaa..dcef27937 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -117,20 +117,14 @@ export class ServiceConfig { isIPAddressBlocked( ip: string - ): { blocked: true; reason: string; key: string } | { blocked: false } { - const blocklist = this.blockedIPAddresses - .filter((list) => !list.monitor) - .find((blocklist) => blocklist.blocklist.has(ip)); - - if (blocklist) { - return { - blocked: true, - reason: blocklist.description, - key: blocklist.key, - }; - } - - return { blocked: false }; + ): Array<{ key: string; monitor: boolean; reason: string }> { + return this.blockedIPAddresses + .filter((list) => list.blocklist.has(ip)) + .map((list) => ({ + key: list.key, + monitor: list.monitor, + reason: list.description, + })); } private setBlockedIPAddresses(blockedIPAddresses: IPList[]) { @@ -162,21 +156,13 @@ export class ServiceConfig { this.setBlockedUserAgents(blockedUserAgents); } - isUserAgentBlocked( - ua: string - ): { blocked: boolean; key: string } | { blocked: false } { - const match = this.blockedUserAgents - .filter((list) => !list.monitor) - .find((list) => list.pattern.test(ua)); - - if (match) { - return { - blocked: true, - key: match.key, - }; - } - - return { blocked: false }; + isUserAgentBlocked(ua: string): Array<{ key: string; monitor: boolean }> { + return this.blockedUserAgents + .filter((list) => list.pattern.test(ua)) + .map((list) => ({ + key: list.key, + monitor: list.monitor, + })); } private setAllowedIPAddresses(ipAddresses: IPList[]) { @@ -215,25 +201,6 @@ export class ServiceConfig { return { allowed: !!allowlist }; } - isMonitoredIPAddress(ip: string): { key: string } | undefined { - const blocklist = this.blockedIPAddresses - .filter((list) => list.monitor) - .find((list) => list.blocklist.has(ip)); - if (blocklist) { - return { key: blocklist.key }; - } - - return undefined; - } - - isMonitoredUserAgent(ua: string): { key: string } | undefined { - const match = this.blockedUserAgents - .filter((list) => list.monitor) - .find((list) => list.pattern.test(ua)); - - return match ? { key: match.key } : undefined; - } - updateConfig( endpoints: EndpointConfig[], lastUpdatedAt: number, diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index f0609dc91..bdd465091 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -65,42 +65,59 @@ export function checkIfRequestIsBlocked( return true; } - const result = context.remoteAddress + const blockedIPs = context.remoteAddress ? agent.getConfig().isIPAddressBlocked(context.remoteAddress) - : ({ blocked: false } as const); - - if (result.blocked) { - res.statusCode = 403; - res.setHeader("Content-Type", "text/plain"); - - let message = `Your IP address is blocked due to ${escapeHTML(result.reason)}.`; - if (context.remoteAddress) { - message += ` (Your IP: ${escapeHTML(context.remoteAddress)})`; + : []; + + if (blockedIPs.length > 0) { + // The same IP address can be blocked by multiple lists + blockedIPs.forEach((match) => { + if (match.monitor) { + agent.getInspectionStatistics().detectedMonitoredIPAddress(match.key); + } else { + agent.getInspectionStatistics().onBlockedIPAddress(match.key); + } + }); + + const blockingMatch = blockedIPs.find((match) => !match.monitor); + if (blockingMatch) { + res.statusCode = 403; + res.setHeader("Content-Type", "text/plain"); + + let message = `Your IP address is blocked due to ${escapeHTML(blockingMatch.reason)}.`; + if (context.remoteAddress) { + message += ` (Your IP: ${escapeHTML(context.remoteAddress)})`; + } + + res.end(message); + return true; } - - res.end(message); - - agent.getInspectionStatistics().onBlockedIPAddress(result.key); - - return true; } - const isUserAgentBlocked = + const blockedUserAgents = context.headers && typeof context.headers["user-agent"] === "string" ? agent.getConfig().isUserAgentBlocked(context.headers["user-agent"]) - : ({ blocked: false } as const); - - if (isUserAgentBlocked.blocked) { - res.statusCode = 403; - res.setHeader("Content-Type", "text/plain"); - - res.end( - "You are not allowed to access this resource because you have been identified as a bot." - ); - - agent.getInspectionStatistics().onBlockedUserAgent(isUserAgentBlocked.key); - - return true; + : []; + + if (blockedUserAgents.length > 0) { + // The same user agent can be blocked by multiple lists + blockedUserAgents.forEach((match) => { + if (match.monitor) { + agent.getInspectionStatistics().detectedMonitoredUserAgent(match.key); + } else { + agent.getInspectionStatistics().onBlockedUserAgent(match.key); + } + }); + + if (blockedUserAgents.find((match) => !match.monitor)) { + res.statusCode = 403; + res.setHeader("Content-Type", "text/plain"); + + res.end( + "You are not allowed to access this resource because you have been identified as a bot." + ); + return true; + } } return false; diff --git a/library/sources/http-server/createRequestListener.ts b/library/sources/http-server/createRequestListener.ts index f59a72912..70cee60bd 100644 --- a/library/sources/http-server/createRequestListener.ts +++ b/library/sources/http-server/createRequestListener.ts @@ -105,28 +105,5 @@ function createOnFinishRequestHandler( // Only count the request if the route is discovered agent.getInspectionStatistics().onRequest(); } - - if (context) { - if ( - context.headers && - typeof context.headers["user-agent"] === "string" - ) { - const match = agent - .getConfig() - .isMonitoredUserAgent(context.headers["user-agent"]); - if (match) { - agent.getInspectionStatistics().detectedMonitoredUserAgent(match.key); - } - } - - if (context.remoteAddress) { - const match = agent - .getConfig() - .isMonitoredIPAddress(context.remoteAddress); - if (match) { - agent.getInspectionStatistics().detectedMonitoredIPAddress(match.key); - } - } - } }; } From 65162e38ba32b0f7bd5e7be6de926a79bbe78297 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Wed, 16 Apr 2025 14:20:56 +0200 Subject: [PATCH 37/48] Fixes --- library/agent/Agent.test.ts | 81 ++++++------ library/agent/ServiceConfig.test.ts | 138 ++++++++++++--------- library/agent/ServiceConfig.ts | 4 +- library/sources/FunctionsFramework.test.ts | 30 ----- library/sources/Lambda.test.ts | 30 ++--- 5 files changed, 135 insertions(+), 148 deletions(-) diff --git a/library/agent/Agent.test.ts b/library/agent/Agent.test.ts index 76b1cbbe5..326def8e6 100644 --- a/library/agent/Agent.test.ts +++ b/library/agent/Agent.test.ts @@ -1074,16 +1074,20 @@ t.test("it fetches blocked lists", async () => { await setTimeout(0); - t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), { - blocked: true, - reason: "Description", - key: "some/key", - }); - t.same(agent.getConfig().isIPAddressBlocked("fe80::1234:5678:abcd:ef12"), { - blocked: true, - reason: "Description", - key: "some/key", - }); + t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), [ + { + key: "some/key", + monitor: false, + reason: "Description", + }, + ]); + t.same(agent.getConfig().isIPAddressBlocked("fe80::1234:5678:abcd:ef12"), [ + { + key: "some/key", + monitor: false, + reason: "Description", + }, + ]); t.same( agent @@ -1091,23 +1095,25 @@ t.test("it fetches blocked lists", async () => { .isUserAgentBlocked( "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)" ), - { - blocked: true, - key: "ai_bots", - } + [ + { + key: "ai_bots", + monitor: false, + }, + ] ); t.same( agent.getConfig().isUserAgentBlocked("Mozilla/5.0 (compatible) Bytespider"), - { - blocked: true, - key: "ai_bots", - } + [ + { + key: "ai_bots", + monitor: false, + }, + ] ); - t.same(agent.getConfig().isUserAgentBlocked("Mozilla/5.0 (compatible)"), { - blocked: false, - }); + t.same(agent.getConfig().isUserAgentBlocked("Mozilla/5.0 (compatible)"), []); }); t.test("it does not fetch blocked IPs if serverless", async () => { @@ -1121,10 +1127,7 @@ t.test("it does not fetch blocked IPs if serverless", async () => { await setTimeout(0); - t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), { - blocked: false, - }); - + t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), []); t.same(agent.getConfig().isAllowedIPAddress("1.3.2.4"), { allowed: true, }); @@ -1135,9 +1138,7 @@ t.test("it does not fetch blocked IPs if serverless", async () => { .isUserAgentBlocked( "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)" ), - { - blocked: false, - } + [] ); }); @@ -1152,16 +1153,20 @@ t.test("it only allows some IP addresses", async () => { await setTimeout(0); - t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), { - blocked: true, - reason: "Description", - key: "some/key", - }); - t.same(agent.getConfig().isIPAddressBlocked("fe80::1234:5678:abcd:ef12"), { - blocked: true, - reason: "Description", - key: "some/key", - }); + t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), [ + { + key: "some/key", + monitor: false, + reason: "Description", + }, + ]); + t.same(agent.getConfig().isIPAddressBlocked("fe80::1234:5678:abcd:ef12"), [ + { + key: "some/key", + monitor: false, + reason: "Description", + }, + ]); t.same(agent.getConfig().isAllowedIPAddress("1.2.3.4"), { allowed: false, diff --git a/library/agent/ServiceConfig.test.ts b/library/agent/ServiceConfig.test.ts index 967177a3e..589c2ab77 100644 --- a/library/agent/ServiceConfig.test.ts +++ b/library/agent/ServiceConfig.test.ts @@ -112,41 +112,51 @@ t.test("ip blocking works", async () => { ], [] ); - t.same(config.isIPAddressBlocked("1.2.3.4"), { - blocked: true, - reason: "description", - key: "geoip/Belgium;BE", - }); - t.same(config.isIPAddressBlocked("2.3.4.5"), { blocked: false }); - t.same(config.isIPAddressBlocked("192.168.2.2"), { - blocked: true, - reason: "description", - key: "geoip/Belgium;BE", - }); - t.same(config.isIPAddressBlocked("fd00:1234:5678:9abc::1"), { - blocked: true, - reason: "description", - key: "geoip/Belgium;BE", - }); - t.same(config.isIPAddressBlocked("fd00:1234:5678:9abc::2"), { - blocked: false, - }); - t.same(config.isIPAddressBlocked("fd00:3234:5678:9abc::1"), { - blocked: true, - reason: "description", - key: "geoip/Belgium;BE", - }); - t.same(config.isIPAddressBlocked("fd00:3234:5678:9abc::2"), { - blocked: true, - reason: "description", - key: "geoip/Belgium;BE", - }); - t.same(config.isIPAddressBlocked("5.6.7.8"), { - blocked: true, - reason: "description", - key: "geoip/Belgium;BE", - }); - t.same(config.isIPAddressBlocked("1.2"), { blocked: false }); + t.same(config.isIPAddressBlocked("1.2.3.4"), [ + { + key: "geoip/Belgium;BE", + monitor: false, + reason: "description", + }, + ]); + t.same(config.isIPAddressBlocked("2.3.4.5"), []); + t.same(config.isIPAddressBlocked("192.168.2.2"), [ + { + key: "geoip/Belgium;BE", + monitor: false, + reason: "description", + }, + ]); + t.same(config.isIPAddressBlocked("fd00:1234:5678:9abc::1"), [ + { + key: "geoip/Belgium;BE", + monitor: false, + reason: "description", + }, + ]); + t.same(config.isIPAddressBlocked("fd00:1234:5678:9abc::2"), []); + t.same(config.isIPAddressBlocked("fd00:3234:5678:9abc::1"), [ + { + key: "geoip/Belgium;BE", + monitor: false, + reason: "description", + }, + ]); + t.same(config.isIPAddressBlocked("fd00:3234:5678:9abc::2"), [ + { + key: "geoip/Belgium;BE", + monitor: false, + reason: "description", + }, + ]); + t.same(config.isIPAddressBlocked("5.6.7.8"), [ + { + key: "geoip/Belgium;BE", + monitor: false, + reason: "description", + }, + ]); + t.same(config.isIPAddressBlocked("1.2"), []); }); t.test("it blocks bots", async () => { @@ -159,19 +169,23 @@ t.test("it blocks bots", async () => { }, ]); - t.same(config.isUserAgentBlocked("googlebot"), { - blocked: true, - key: "test", - }); - t.same(config.isUserAgentBlocked("123 bingbot abc"), { - blocked: true, - key: "test", - }); - t.same(config.isUserAgentBlocked("bing"), { blocked: false }); + t.same(config.isUserAgentBlocked("googlebot"), [ + { + key: "test", + monitor: false, + }, + ]); + t.same(config.isUserAgentBlocked("123 bingbot abc"), [ + { + key: "test", + monitor: false, + }, + ]); + t.same(config.isUserAgentBlocked("bing"), []); config.updateBlockedUserAgents([]); - t.same(config.isUserAgentBlocked("googlebot"), { blocked: false }); + t.same(config.isUserAgentBlocked("googlebot"), []); }); t.test("restricting access to some ips", async () => { @@ -289,17 +303,19 @@ t.test("it updates blocked user agents", async (t) => { monitor: false, }, ]); - t.same(config.isUserAgentBlocked("googlebot"), { - blocked: true, - key: "bots", - }); - t.same(config.isUserAgentBlocked("firefox"), { blocked: false }); + t.same(config.isUserAgentBlocked("googlebot"), [ + { + key: "bots", + monitor: false, + }, + ]); + t.same(config.isUserAgentBlocked("firefox"), []); }); t.test("it updates blocked user agents with empty list", async (t) => { const config = new ServiceConfig([], 0, [], [], false, [], []); config.updateBlockedUserAgents([]); - t.same(config.isUserAgentBlocked("googlebot"), { blocked: false }); + t.same(config.isUserAgentBlocked("googlebot"), []); }); t.test("it updates blocked user agents with invalid pattern", async (t) => { @@ -311,16 +327,18 @@ t.test("it updates blocked user agents with invalid pattern", async (t) => { monitor: false, }, ]); - t.same(config.isUserAgentBlocked("googlebot"), { - blocked: true, - key: "bots", - }); + t.same(config.isUserAgentBlocked("googlebot"), [ + { + key: "bots", + monitor: false, + }, + ]); }); t.test("it updates blocked user agents with empty pattern", async (t) => { const config = new ServiceConfig([], 0, [], [], false, [], []); config.updateBlockedUserAgents([]); - t.same(config.isUserAgentBlocked("googlebot"), { blocked: false }); + t.same(config.isUserAgentBlocked("googlebot"), []); }); t.test("it updates blocked user agents with monitor flag", async (t) => { @@ -332,6 +350,10 @@ t.test("it updates blocked user agents with monitor flag", async (t) => { monitor: true, }, ]); - t.same(config.isUserAgentBlocked("googlebot"), { blocked: false }); - t.same(config.isMonitoredUserAgent("googlebot"), { key: "bots" }); + t.same(config.isUserAgentBlocked("googlebot"), [ + { + key: "bots", + monitor: true, + }, + ]); }); diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index dcef27937..36a22e35a 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -117,7 +117,7 @@ export class ServiceConfig { isIPAddressBlocked( ip: string - ): Array<{ key: string; monitor: boolean; reason: string }> { + ): { key: string; monitor: boolean; reason: string }[] { return this.blockedIPAddresses .filter((list) => list.blocklist.has(ip)) .map((list) => ({ @@ -156,7 +156,7 @@ export class ServiceConfig { this.setBlockedUserAgents(blockedUserAgents); } - isUserAgentBlocked(ua: string): Array<{ key: string; monitor: boolean }> { + isUserAgentBlocked(ua: string): { key: string; monitor: boolean }[] { return this.blockedUserAgents .filter((list) => list.pattern.test(ua)) .map((list) => ({ diff --git a/library/sources/FunctionsFramework.test.ts b/library/sources/FunctionsFramework.test.ts index 6ac0a96e3..ebb70f071 100644 --- a/library/sources/FunctionsFramework.test.ts +++ b/library/sources/FunctionsFramework.test.ts @@ -121,16 +121,6 @@ t.test("it counts requests", async (t) => { total: 1, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, }); }); @@ -147,16 +137,6 @@ t.test("it counts attacks", async (t) => { total: 1, aborted: 0, attacksDetected: { total: 1, blocked: 1 }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, }); }); @@ -173,16 +153,6 @@ t.test("it counts request if error", async (t) => { total: 1, aborted: 0, attacksDetected: { total: 0, blocked: 0 }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, }); }); diff --git a/library/sources/Lambda.test.ts b/library/sources/Lambda.test.ts index 1470cf130..354ccddec 100644 --- a/library/sources/Lambda.test.ts +++ b/library/sources/Lambda.test.ts @@ -303,16 +303,16 @@ t.test("it sends heartbeat after first and every 10 minutes", async () => { total: 0, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, + }, + userAgents: { + total: 0, + blocked: 0, + breakdown: {}, + }, + ipAddresses: { + total: 0, + blocked: 0, + breakdown: {}, }, }, middlewareInstalled: false, @@ -492,16 +492,6 @@ t.test("it counts attacks", async () => { total: 1, blocked: 0, }, - userAgents: { - total: 0, - blocked: 0, - breakdown: {}, - }, - ipAddresses: { - total: 0, - blocked: 0, - breakdown: {}, - }, }, }); }); From a1c97d181569aba2fe9baa2cbc8eafbfb0ea27ba Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Wed, 16 Apr 2025 14:33:01 +0200 Subject: [PATCH 38/48] Make sure to increment total only once --- library/agent/InspectionStatistics.test.ts | 54 ++++++++++++++-- library/agent/InspectionStatistics.ts | 64 +++++++++---------- .../http-server/checkIfRequestIsBlocked.ts | 16 +---- 3 files changed, 82 insertions(+), 52 deletions(-) diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index 4fd30f2ce..9e3e6a000 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -636,8 +636,10 @@ t.test("it keeps track of blocked IPs and user agents", async () => { maxCompressedStatsInMemory: 5, }); - stats.onBlockedIPAddress("known_threat_actors/public_scanners"); - stats.onBlockedUserAgent("ai_bots"); + stats.onIPAddressMatches([ + { key: "known_threat_actors/public_scanners", monitor: false }, + ]); + stats.onUserAgentMatches([{ key: "ai_bots", monitor: false }]); t.same(stats.getStats(), { sinks: {}, @@ -678,8 +680,10 @@ t.test("it keeps track of monitored IPs and user agents", async () => { maxCompressedStatsInMemory: 5, }); - stats.detectedMonitoredIPAddress("known_threat_actors/public_scanners"); - stats.detectedMonitoredUserAgent("ai_data_scrapers"); + stats.onIPAddressMatches([ + { key: "known_threat_actors/public_scanners", monitor: true }, + ]); + stats.onUserAgentMatches([{ key: "ai_data_scrapers", monitor: true }]); t.same(stats.getStats(), { sinks: {}, @@ -710,8 +714,10 @@ t.test("it keeps track of monitored IPs and user agents", async () => { }); // Test multiple occurrences - stats.detectedMonitoredIPAddress("known_threat_actors/public_scanners"); - stats.detectedMonitoredUserAgent("ai_data_scrapers"); + stats.onIPAddressMatches([ + { key: "known_threat_actors/public_scanners", monitor: true }, + ]); + stats.onUserAgentMatches([{ key: "ai_data_scrapers", monitor: true }]); t.same(stats.getStats(), { sinks: {}, @@ -743,3 +749,39 @@ t.test("it keeps track of monitored IPs and user agents", async () => { clock.uninstall(); }); + +t.test("should track multiple matches for the same key", (t) => { + const stats = new InspectionStatistics({ + maxPerfSamplesInMemory: 100, + maxCompressedStatsInMemory: 10, + }); + + stats.onIPAddressMatches([ + { key: "known_threat_actors/public_scanners", monitor: true }, + { key: "known_threat_actors/public_scanners", monitor: false }, + ]); + stats.onUserAgentMatches([ + { key: "ai_data_scrapers", monitor: true }, + { key: "ai_data_scrapers", monitor: false }, + ]); + + const result = stats.getStats(); + + t.equal(result.ipAddresses.total, 1); + t.equal(result.ipAddresses.blocked, 1); + t.equal( + result.ipAddresses.breakdown["known_threat_actors/public_scanners"].total, + 2 + ); + t.equal( + result.ipAddresses.breakdown["known_threat_actors/public_scanners"].blocked, + 1 + ); + + t.equal(result.userAgents.total, 1); + t.equal(result.userAgents.blocked, 1); + t.equal(result.userAgents.breakdown["ai_data_scrapers"].total, 2); + t.equal(result.userAgents.breakdown["ai_data_scrapers"].blocked, 1); + + t.end(); +}); diff --git a/library/agent/InspectionStatistics.ts b/library/agent/InspectionStatistics.ts index 573ba80a9..d7a63fa50 100644 --- a/library/agent/InspectionStatistics.ts +++ b/library/agent/InspectionStatistics.ts @@ -226,48 +226,48 @@ export class InspectionStatistics { } } - onBlockedIPAddress(key: string) { - this.ipAddresses.total += 1; - this.ipAddresses.blocked += 1; - - if (!this.ipAddresses.breakdown[key]) { - this.ipAddresses.breakdown[key] = { total: 0, blocked: 0 }; + onIPAddressMatches(matches: { key: IPListKey; monitor: boolean }[]) { + if (matches.length > 0) { + this.ipAddresses.total += 1; } - this.ipAddresses.breakdown[key].total += 1; - this.ipAddresses.breakdown[key].blocked += 1; - } - - onBlockedUserAgent(key: string) { - this.userAgents.total += 1; - this.userAgents.blocked += 1; - - if (!this.userAgents.breakdown[key]) { - this.userAgents.breakdown[key] = { total: 0, blocked: 0 }; + const blockingMatches = matches.filter((match) => !match.monitor); + if (blockingMatches.length > 0) { + this.ipAddresses.blocked += 1; } - this.userAgents.breakdown[key].total += 1; - this.userAgents.breakdown[key].blocked += 1; - } - - detectedMonitoredIPAddress(key: IPListKey) { - this.ipAddresses.total += 1; - - if (!this.ipAddresses.breakdown[key]) { - this.ipAddresses.breakdown[key] = { total: 0, blocked: 0 }; - } + matches.forEach((match) => { + if (!this.ipAddresses.breakdown[match.key]) { + this.ipAddresses.breakdown[match.key] = { total: 0, blocked: 0 }; + } + this.ipAddresses.breakdown[match.key].total += 1; - this.ipAddresses.breakdown[key].total += 1; + if (!match.monitor) { + this.ipAddresses.breakdown[match.key].blocked += 1; + } + }); } - detectedMonitoredUserAgent(key: UserAgentBotKey) { - this.userAgents.total += 1; + onUserAgentMatches(matches: { key: UserAgentBotKey; monitor: boolean }[]) { + if (matches.length > 0) { + this.userAgents.total += 1; + } - if (!this.userAgents.breakdown[key]) { - this.userAgents.breakdown[key] = { total: 0, blocked: 0 }; + const blockingMatches = matches.filter((match) => !match.monitor); + if (blockingMatches.length > 0) { + this.userAgents.blocked += 1; } - this.userAgents.breakdown[key].total += 1; + matches.forEach((match) => { + if (!this.userAgents.breakdown[match.key]) { + this.userAgents.breakdown[match.key] = { total: 0, blocked: 0 }; + } + this.userAgents.breakdown[match.key].total += 1; + + if (!match.monitor) { + this.userAgents.breakdown[match.key].blocked += 1; + } + }); } onAbortedRequest() { diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index bdd465091..3d7e3fbe2 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -71,13 +71,7 @@ export function checkIfRequestIsBlocked( if (blockedIPs.length > 0) { // The same IP address can be blocked by multiple lists - blockedIPs.forEach((match) => { - if (match.monitor) { - agent.getInspectionStatistics().detectedMonitoredIPAddress(match.key); - } else { - agent.getInspectionStatistics().onBlockedIPAddress(match.key); - } - }); + agent.getInspectionStatistics().onIPAddressMatches(blockedIPs); const blockingMatch = blockedIPs.find((match) => !match.monitor); if (blockingMatch) { @@ -101,13 +95,7 @@ export function checkIfRequestIsBlocked( if (blockedUserAgents.length > 0) { // The same user agent can be blocked by multiple lists - blockedUserAgents.forEach((match) => { - if (match.monitor) { - agent.getInspectionStatistics().detectedMonitoredUserAgent(match.key); - } else { - agent.getInspectionStatistics().onBlockedUserAgent(match.key); - } - }); + agent.getInspectionStatistics().onUserAgentMatches(blockedUserAgents); if (blockedUserAgents.find((match) => !match.monitor)) { res.statusCode = 403; From 9914a3aba6f6693407d0c66d2b447403ee8f25fe Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Wed, 16 Apr 2025 14:37:21 +0200 Subject: [PATCH 39/48] Reduce diff --- library/agent/api/ReportingAPIRateLimitedClientSide.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts index a5a662b81..522f6102a 100644 --- a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts +++ b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts @@ -151,8 +151,8 @@ function generateHeartbeatEvent(): Event { total: 0, aborted: 0, attacksDetected: { - total: 0, blocked: 0, + total: 0, }, }, userAgents: { From 1b8bb579850b1649084752ba06d32154d98c2b60 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Wed, 16 Apr 2025 14:52:16 +0200 Subject: [PATCH 40/48] Add try/catch around new RegExp --- library/agent/ServiceConfig.ts | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index 36a22e35a..5d8c20cbd 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -145,11 +145,21 @@ export class ServiceConfig { } private setBlockedUserAgents(blockedUserAgents: AgentBlockList[]) { - this.blockedUserAgents = blockedUserAgents.map((list) => ({ - key: list.key, - pattern: new RegExp(list.pattern, "i"), - monitor: list.monitor, - })); + this.blockedUserAgents = []; + + for (const list of blockedUserAgents) { + if (list.pattern.length > 0) { + try { + this.blockedUserAgents.push({ + key: list.key, + pattern: new RegExp(list.pattern, "i"), + monitor: list.monitor, + }); + } catch { + // Invalid regex, ignore this entry + } + } + } } updateBlockedUserAgents(blockedUserAgents: AgentBlockList[]) { From 94f49d0a0a49dec60439d4d9abb7e2ddc9e7710a Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Wed, 16 Apr 2025 15:05:58 +0200 Subject: [PATCH 41/48] Improve tests --- library/agent/ServiceConfig.test.ts | 75 +++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/library/agent/ServiceConfig.test.ts b/library/agent/ServiceConfig.test.ts index 589c2ab77..f987da3ff 100644 --- a/library/agent/ServiceConfig.test.ts +++ b/library/agent/ServiceConfig.test.ts @@ -157,6 +157,23 @@ t.test("ip blocking works", async () => { }, ]); t.same(config.isIPAddressBlocked("1.2"), []); + + config.updateBlockedIPAddresses([]); + t.same(config.isIPAddressBlocked("1.2.3.4"), []); +}); + +t.test("update blocked IPs contains empty IPs", async (t) => { + const config = new ServiceConfig([], 0, [], [], false, [], []); + config.updateBlockedIPAddresses([ + { + key: "geoip/Belgium;BE", + source: "geoip", + description: "description", + ips: [], + monitor: false, + }, + ]); + t.same(config.isIPAddressBlocked("1.2.3.4"), []); }); t.test("it blocks bots", async () => { @@ -294,7 +311,7 @@ t.test("bypassed ips support cidr", async () => { t.same(config.isBypassedIP("999.999.999.999"), false); }); -t.test("it updates blocked user agents", async (t) => { +t.test("should return all matching user agent patterns", async (t) => { const config = new ServiceConfig([], 0, [], [], false, [], []); config.updateBlockedUserAgents([ { @@ -312,13 +329,7 @@ t.test("it updates blocked user agents", async (t) => { t.same(config.isUserAgentBlocked("firefox"), []); }); -t.test("it updates blocked user agents with empty list", async (t) => { - const config = new ServiceConfig([], 0, [], [], false, [], []); - config.updateBlockedUserAgents([]); - t.same(config.isUserAgentBlocked("googlebot"), []); -}); - -t.test("it updates blocked user agents with invalid pattern", async (t) => { +t.test("it returns and updates blocked user agents", async (t) => { const config = new ServiceConfig([], 0, [], [], false, [], []); config.updateBlockedUserAgents([ { @@ -326,34 +337,58 @@ t.test("it updates blocked user agents with invalid pattern", async (t) => { pattern: "googlebot|bingbot", monitor: false, }, + { + key: "crawlers", + pattern: "googlebot", + monitor: true, + }, ]); t.same(config.isUserAgentBlocked("googlebot"), [ { key: "bots", monitor: false, }, + { + key: "crawlers", + monitor: true, + }, + ]); + t.same(config.isUserAgentBlocked("bingbot"), [ + { + key: "bots", + monitor: false, + }, ]); -}); -t.test("it updates blocked user agents with empty pattern", async (t) => { - const config = new ServiceConfig([], 0, [], [], false, [], []); config.updateBlockedUserAgents([]); t.same(config.isUserAgentBlocked("googlebot"), []); + t.same(config.isUserAgentBlocked("bingbot"), []); + t.same(config.isUserAgentBlocked("firefox"), []); }); -t.test("it updates blocked user agents with monitor flag", async (t) => { +t.test("it ignores user agent lists with empty patterns", async (t) => { const config = new ServiceConfig([], 0, [], [], false, [], []); config.updateBlockedUserAgents([ { key: "bots", - pattern: "googlebot|bingbot", - monitor: true, - }, - ]); - t.same(config.isUserAgentBlocked("googlebot"), [ - { - key: "bots", - monitor: true, + pattern: "", + monitor: false, }, ]); + t.same(config.isUserAgentBlocked("googlebot"), []); }); + +t.test( + "it does not throw error when updating user agent lists with invalid patterns", + async (t) => { + const config = new ServiceConfig([], 0, [], [], false, [], []); + config.updateBlockedUserAgents([ + { + key: "bots", + pattern: "[", + monitor: false, + }, + ]); + t.same(config.isUserAgentBlocked("googlebot"), []); + } +); From 519a39d930b0c1630205b40a6f9ae18d466a2093 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 17 Apr 2025 14:07:45 +0200 Subject: [PATCH 42/48] Rename AgentBlocklist to BotBlocklist --- library/agent/ServiceConfig.ts | 6 +++--- library/agent/api/fetchBlockedLists.ts | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index 5d8c20cbd..22c88cbe0 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -2,7 +2,7 @@ import { IPMatcher } from "../helpers/ip-matcher/IPMatcher"; import { LimitedContext, matchEndpoints } from "../helpers/matchEndpoints"; import { isPrivateIP } from "../vulnerabilities/ssrf/isPrivateIP"; import type { Endpoint, EndpointConfig } from "./Config"; -import { AgentBlockList, IPList } from "./api/fetchBlockedLists"; +import { BotBlocklist, IPList } from "./api/fetchBlockedLists"; export class ServiceConfig { private blockedUserIds: Map = new Map(); @@ -144,7 +144,7 @@ export class ServiceConfig { this.setBlockedIPAddresses(blockedIPAddresses); } - private setBlockedUserAgents(blockedUserAgents: AgentBlockList[]) { + private setBlockedUserAgents(blockedUserAgents: BotBlocklist[]) { this.blockedUserAgents = []; for (const list of blockedUserAgents) { @@ -162,7 +162,7 @@ export class ServiceConfig { } } - updateBlockedUserAgents(blockedUserAgents: AgentBlockList[]) { + updateBlockedUserAgents(blockedUserAgents: BotBlocklist[]) { this.setBlockedUserAgents(blockedUserAgents); } diff --git a/library/agent/api/fetchBlockedLists.ts b/library/agent/api/fetchBlockedLists.ts index f99e28b9e..b6c7ee305 100644 --- a/library/agent/api/fetchBlockedLists.ts +++ b/library/agent/api/fetchBlockedLists.ts @@ -10,7 +10,7 @@ export type IPList = { monitor: boolean; }; -export type AgentBlockList = { +export type BotBlocklist = { key: string; pattern: string; // e.g. "Googlebot|Bingbot" monitor: boolean; @@ -19,7 +19,7 @@ export type AgentBlockList = { export type Response = { blockedIPAddresses: IPList[]; allowedIPAddresses: IPList[]; - blockedUserAgents: AgentBlockList[]; + blockedUserAgents: BotBlocklist[]; }; export async function fetchBlockedLists(token: Token): Promise { From 2fa9007447fd85386d50db632c3677ce800c294e Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 17 Apr 2025 15:21:57 +0200 Subject: [PATCH 43/48] Use .find(...) for performance --- library/agent/InspectionStatistics.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/library/agent/InspectionStatistics.ts b/library/agent/InspectionStatistics.ts index d7a63fa50..f972ad083 100644 --- a/library/agent/InspectionStatistics.ts +++ b/library/agent/InspectionStatistics.ts @@ -231,8 +231,8 @@ export class InspectionStatistics { this.ipAddresses.total += 1; } - const blockingMatches = matches.filter((match) => !match.monitor); - if (blockingMatches.length > 0) { + const blockingMatch = matches.find((match) => !match.monitor); + if (blockingMatch) { this.ipAddresses.blocked += 1; } @@ -240,6 +240,7 @@ export class InspectionStatistics { if (!this.ipAddresses.breakdown[match.key]) { this.ipAddresses.breakdown[match.key] = { total: 0, blocked: 0 }; } + this.ipAddresses.breakdown[match.key].total += 1; if (!match.monitor) { @@ -253,8 +254,8 @@ export class InspectionStatistics { this.userAgents.total += 1; } - const blockingMatches = matches.filter((match) => !match.monitor); - if (blockingMatches.length > 0) { + const blockingMatch = matches.find((match) => !match.monitor); + if (blockingMatch) { this.userAgents.blocked += 1; } @@ -262,6 +263,7 @@ export class InspectionStatistics { if (!this.userAgents.breakdown[match.key]) { this.userAgents.breakdown[match.key] = { total: 0, blocked: 0 }; } + this.userAgents.breakdown[match.key].total += 1; if (!match.monitor) { From 3f58756024a6584ea8d226b12c2c6929a7255639 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 17 Apr 2025 15:24:33 +0200 Subject: [PATCH 44/48] Rename methods (we return a list of matches now) --- library/agent/Agent.test.ts | 23 +++++---- library/agent/ServiceConfig.test.ts | 48 +++++++++---------- library/agent/ServiceConfig.ts | 4 +- .../http-server/checkIfRequestIsBlocked.ts | 4 +- 4 files changed, 42 insertions(+), 37 deletions(-) diff --git a/library/agent/Agent.test.ts b/library/agent/Agent.test.ts index 326def8e6..22e6e5fb6 100644 --- a/library/agent/Agent.test.ts +++ b/library/agent/Agent.test.ts @@ -1074,14 +1074,14 @@ t.test("it fetches blocked lists", async () => { await setTimeout(0); - t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), [ + t.same(agent.getConfig().getBlockedIPAddresses("1.3.2.4"), [ { key: "some/key", monitor: false, reason: "Description", }, ]); - t.same(agent.getConfig().isIPAddressBlocked("fe80::1234:5678:abcd:ef12"), [ + t.same(agent.getConfig().getBlockedIPAddresses("fe80::1234:5678:abcd:ef12"), [ { key: "some/key", monitor: false, @@ -1092,7 +1092,7 @@ t.test("it fetches blocked lists", async () => { t.same( agent .getConfig() - .isUserAgentBlocked( + .getBlockedUserAgents( "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)" ), [ @@ -1104,7 +1104,9 @@ t.test("it fetches blocked lists", async () => { ); t.same( - agent.getConfig().isUserAgentBlocked("Mozilla/5.0 (compatible) Bytespider"), + agent + .getConfig() + .getBlockedUserAgents("Mozilla/5.0 (compatible) Bytespider"), [ { key: "ai_bots", @@ -1113,7 +1115,10 @@ t.test("it fetches blocked lists", async () => { ] ); - t.same(agent.getConfig().isUserAgentBlocked("Mozilla/5.0 (compatible)"), []); + t.same( + agent.getConfig().getBlockedUserAgents("Mozilla/5.0 (compatible)"), + [] + ); }); t.test("it does not fetch blocked IPs if serverless", async () => { @@ -1127,7 +1132,7 @@ t.test("it does not fetch blocked IPs if serverless", async () => { await setTimeout(0); - t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), []); + t.same(agent.getConfig().getBlockedIPAddresses("1.3.2.4"), []); t.same(agent.getConfig().isAllowedIPAddress("1.3.2.4"), { allowed: true, }); @@ -1135,7 +1140,7 @@ t.test("it does not fetch blocked IPs if serverless", async () => { t.same( agent .getConfig() - .isUserAgentBlocked( + .getBlockedUserAgents( "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)" ), [] @@ -1153,14 +1158,14 @@ t.test("it only allows some IP addresses", async () => { await setTimeout(0); - t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), [ + t.same(agent.getConfig().getBlockedIPAddresses("1.3.2.4"), [ { key: "some/key", monitor: false, reason: "Description", }, ]); - t.same(agent.getConfig().isIPAddressBlocked("fe80::1234:5678:abcd:ef12"), [ + t.same(agent.getConfig().getBlockedIPAddresses("fe80::1234:5678:abcd:ef12"), [ { key: "some/key", monitor: false, diff --git a/library/agent/ServiceConfig.test.ts b/library/agent/ServiceConfig.test.ts index f987da3ff..4e366b11b 100644 --- a/library/agent/ServiceConfig.test.ts +++ b/library/agent/ServiceConfig.test.ts @@ -112,54 +112,54 @@ t.test("ip blocking works", async () => { ], [] ); - t.same(config.isIPAddressBlocked("1.2.3.4"), [ + t.same(config.getBlockedIPAddresses("1.2.3.4"), [ { key: "geoip/Belgium;BE", monitor: false, reason: "description", }, ]); - t.same(config.isIPAddressBlocked("2.3.4.5"), []); - t.same(config.isIPAddressBlocked("192.168.2.2"), [ + t.same(config.getBlockedIPAddresses("2.3.4.5"), []); + t.same(config.getBlockedIPAddresses("192.168.2.2"), [ { key: "geoip/Belgium;BE", monitor: false, reason: "description", }, ]); - t.same(config.isIPAddressBlocked("fd00:1234:5678:9abc::1"), [ + t.same(config.getBlockedIPAddresses("fd00:1234:5678:9abc::1"), [ { key: "geoip/Belgium;BE", monitor: false, reason: "description", }, ]); - t.same(config.isIPAddressBlocked("fd00:1234:5678:9abc::2"), []); - t.same(config.isIPAddressBlocked("fd00:3234:5678:9abc::1"), [ + t.same(config.getBlockedIPAddresses("fd00:1234:5678:9abc::2"), []); + t.same(config.getBlockedIPAddresses("fd00:3234:5678:9abc::1"), [ { key: "geoip/Belgium;BE", monitor: false, reason: "description", }, ]); - t.same(config.isIPAddressBlocked("fd00:3234:5678:9abc::2"), [ + t.same(config.getBlockedIPAddresses("fd00:3234:5678:9abc::2"), [ { key: "geoip/Belgium;BE", monitor: false, reason: "description", }, ]); - t.same(config.isIPAddressBlocked("5.6.7.8"), [ + t.same(config.getBlockedIPAddresses("5.6.7.8"), [ { key: "geoip/Belgium;BE", monitor: false, reason: "description", }, ]); - t.same(config.isIPAddressBlocked("1.2"), []); + t.same(config.getBlockedIPAddresses("1.2"), []); config.updateBlockedIPAddresses([]); - t.same(config.isIPAddressBlocked("1.2.3.4"), []); + t.same(config.getBlockedIPAddresses("1.2.3.4"), []); }); t.test("update blocked IPs contains empty IPs", async (t) => { @@ -173,7 +173,7 @@ t.test("update blocked IPs contains empty IPs", async (t) => { monitor: false, }, ]); - t.same(config.isIPAddressBlocked("1.2.3.4"), []); + t.same(config.getBlockedIPAddresses("1.2.3.4"), []); }); t.test("it blocks bots", async () => { @@ -186,23 +186,23 @@ t.test("it blocks bots", async () => { }, ]); - t.same(config.isUserAgentBlocked("googlebot"), [ + t.same(config.getBlockedUserAgents("googlebot"), [ { key: "test", monitor: false, }, ]); - t.same(config.isUserAgentBlocked("123 bingbot abc"), [ + t.same(config.getBlockedUserAgents("123 bingbot abc"), [ { key: "test", monitor: false, }, ]); - t.same(config.isUserAgentBlocked("bing"), []); + t.same(config.getBlockedUserAgents("bing"), []); config.updateBlockedUserAgents([]); - t.same(config.isUserAgentBlocked("googlebot"), []); + t.same(config.getBlockedUserAgents("googlebot"), []); }); t.test("restricting access to some ips", async () => { @@ -320,13 +320,13 @@ t.test("should return all matching user agent patterns", async (t) => { monitor: false, }, ]); - t.same(config.isUserAgentBlocked("googlebot"), [ + t.same(config.getBlockedUserAgents("googlebot"), [ { key: "bots", monitor: false, }, ]); - t.same(config.isUserAgentBlocked("firefox"), []); + t.same(config.getBlockedUserAgents("firefox"), []); }); t.test("it returns and updates blocked user agents", async (t) => { @@ -343,7 +343,7 @@ t.test("it returns and updates blocked user agents", async (t) => { monitor: true, }, ]); - t.same(config.isUserAgentBlocked("googlebot"), [ + t.same(config.getBlockedUserAgents("googlebot"), [ { key: "bots", monitor: false, @@ -353,7 +353,7 @@ t.test("it returns and updates blocked user agents", async (t) => { monitor: true, }, ]); - t.same(config.isUserAgentBlocked("bingbot"), [ + t.same(config.getBlockedUserAgents("bingbot"), [ { key: "bots", monitor: false, @@ -361,9 +361,9 @@ t.test("it returns and updates blocked user agents", async (t) => { ]); config.updateBlockedUserAgents([]); - t.same(config.isUserAgentBlocked("googlebot"), []); - t.same(config.isUserAgentBlocked("bingbot"), []); - t.same(config.isUserAgentBlocked("firefox"), []); + t.same(config.getBlockedUserAgents("googlebot"), []); + t.same(config.getBlockedUserAgents("bingbot"), []); + t.same(config.getBlockedUserAgents("firefox"), []); }); t.test("it ignores user agent lists with empty patterns", async (t) => { @@ -375,7 +375,7 @@ t.test("it ignores user agent lists with empty patterns", async (t) => { monitor: false, }, ]); - t.same(config.isUserAgentBlocked("googlebot"), []); + t.same(config.getBlockedUserAgents("googlebot"), []); }); t.test( @@ -389,6 +389,6 @@ t.test( monitor: false, }, ]); - t.same(config.isUserAgentBlocked("googlebot"), []); + t.same(config.getBlockedUserAgents("googlebot"), []); } ); diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index 22c88cbe0..beb35cf00 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -115,7 +115,7 @@ export class ServiceConfig { return this.blockedUserIds.has(userId); } - isIPAddressBlocked( + getBlockedIPAddresses( ip: string ): { key: string; monitor: boolean; reason: string }[] { return this.blockedIPAddresses @@ -166,7 +166,7 @@ export class ServiceConfig { this.setBlockedUserAgents(blockedUserAgents); } - isUserAgentBlocked(ua: string): { key: string; monitor: boolean }[] { + getBlockedUserAgents(ua: string): { key: string; monitor: boolean }[] { return this.blockedUserAgents .filter((list) => list.pattern.test(ua)) .map((list) => ({ diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index 3d7e3fbe2..c8565d6b0 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -66,7 +66,7 @@ export function checkIfRequestIsBlocked( } const blockedIPs = context.remoteAddress - ? agent.getConfig().isIPAddressBlocked(context.remoteAddress) + ? agent.getConfig().getBlockedIPAddresses(context.remoteAddress) : []; if (blockedIPs.length > 0) { @@ -90,7 +90,7 @@ export function checkIfRequestIsBlocked( const blockedUserAgents = context.headers && typeof context.headers["user-agent"] === "string" - ? agent.getConfig().isUserAgentBlocked(context.headers["user-agent"]) + ? agent.getConfig().getBlockedUserAgents(context.headers["user-agent"]) : []; if (blockedUserAgents.length > 0) { From 8580d8e95ea15cad7bb61169e94bca0fecdf4c8d Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Fri, 18 Apr 2025 14:05:10 +0200 Subject: [PATCH 45/48] Remove total and blocked Simplifies the implementation and we don't need it in the dashboard --- library/agent/InspectionStatistics.test.ts | 80 ------------------- library/agent/InspectionStatistics.ts | 34 -------- library/agent/api/Event.ts | 4 - .../ReportingAPIRateLimitedClientSide.test.ts | 4 - library/sources/HTTPServer.stats.test.ts | 8 -- library/sources/Lambda.test.ts | 4 - 6 files changed, 134 deletions(-) diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index 9e3e6a000..b5a17b368 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -41,13 +41,9 @@ t.test("it resets stats", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -66,13 +62,9 @@ t.test("it resets stats", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -102,13 +94,9 @@ t.test("it keeps track of amount of calls", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -144,13 +132,9 @@ t.test("it keeps track of amount of calls", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -186,13 +170,9 @@ t.test("it keeps track of amount of calls", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -222,13 +202,9 @@ t.test("it keeps track of amount of calls", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -264,13 +240,9 @@ t.test("it keeps track of amount of calls", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -306,13 +278,9 @@ t.test("it keeps track of amount of calls", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -367,13 +335,9 @@ t.test("it keeps track of amount of calls", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -424,13 +388,9 @@ t.test("it keeps track of requests", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -449,13 +409,9 @@ t.test("it keeps track of requests", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -475,13 +431,9 @@ t.test("it keeps track of requests", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -501,13 +453,9 @@ t.test("it keeps track of requests", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -528,13 +476,9 @@ t.test("it keeps track of requests", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -562,13 +506,9 @@ t.test("it force compresses stats", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -614,13 +554,9 @@ t.test("it keeps track of aborted requests", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }); @@ -653,16 +589,12 @@ t.test("it keeps track of blocked IPs and user agents", async () => { }, }, userAgents: { - total: 1, - blocked: 1, breakdown: { // eslint-disable-next-line camelcase ai_bots: { total: 1, blocked: 1 }, }, }, ipAddresses: { - total: 1, - blocked: 1, breakdown: { "known_threat_actors/public_scanners": { total: 1, blocked: 1 }, }, @@ -697,16 +629,12 @@ t.test("it keeps track of monitored IPs and user agents", async () => { }, }, userAgents: { - total: 1, - blocked: 0, breakdown: { // eslint-disable-next-line camelcase ai_data_scrapers: { total: 1, blocked: 0 }, }, }, ipAddresses: { - total: 1, - blocked: 0, breakdown: { "known_threat_actors/public_scanners": { total: 1, blocked: 0 }, }, @@ -731,16 +659,12 @@ t.test("it keeps track of monitored IPs and user agents", async () => { }, }, userAgents: { - total: 2, - blocked: 0, breakdown: { // eslint-disable-next-line camelcase ai_data_scrapers: { total: 2, blocked: 0 }, }, }, ipAddresses: { - total: 2, - blocked: 0, breakdown: { "known_threat_actors/public_scanners": { total: 2, blocked: 0 }, }, @@ -767,8 +691,6 @@ t.test("should track multiple matches for the same key", (t) => { const result = stats.getStats(); - t.equal(result.ipAddresses.total, 1); - t.equal(result.ipAddresses.blocked, 1); t.equal( result.ipAddresses.breakdown["known_threat_actors/public_scanners"].total, 2 @@ -778,8 +700,6 @@ t.test("should track multiple matches for the same key", (t) => { 1 ); - t.equal(result.userAgents.total, 1); - t.equal(result.userAgents.blocked, 1); t.equal(result.userAgents.breakdown["ai_data_scrapers"].total, 2); t.equal(result.userAgents.breakdown["ai_data_scrapers"].blocked, 1); diff --git a/library/agent/InspectionStatistics.ts b/library/agent/InspectionStatistics.ts index f972ad083..25b4b1667 100644 --- a/library/agent/InspectionStatistics.ts +++ b/library/agent/InspectionStatistics.ts @@ -25,14 +25,10 @@ type UserAgentBotKey = string; type IPListKey = string; type UserAgentStats = { - total: number; - blocked: number; breakdown: Record; }; type IPAddressStats = { - total: number; - blocked: number; breakdown: Record; }; @@ -54,13 +50,9 @@ export class InspectionStatistics { attacksDetected: { total: 0, blocked: 0 }, }; private userAgents: UserAgentStats = { - total: 0, - blocked: 0, breakdown: {}, }; private ipAddresses: IPAddressStats = { - total: 0, - blocked: 0, breakdown: {}, }; @@ -97,13 +89,9 @@ export class InspectionStatistics { attacksDetected: { total: 0, blocked: 0 }, }; this.userAgents = { - total: 0, - blocked: 0, breakdown: {}, }; this.ipAddresses = { - total: 0, - blocked: 0, breakdown: {}, }; this.startedAt = Date.now(); @@ -121,13 +109,9 @@ export class InspectionStatistics { }; }; userAgents: { - total: number; - blocked: number; breakdown: Record; }; ipAddresses: { - total: number; - blocked: number; breakdown: Record; }; } { @@ -227,15 +211,6 @@ export class InspectionStatistics { } onIPAddressMatches(matches: { key: IPListKey; monitor: boolean }[]) { - if (matches.length > 0) { - this.ipAddresses.total += 1; - } - - const blockingMatch = matches.find((match) => !match.monitor); - if (blockingMatch) { - this.ipAddresses.blocked += 1; - } - matches.forEach((match) => { if (!this.ipAddresses.breakdown[match.key]) { this.ipAddresses.breakdown[match.key] = { total: 0, blocked: 0 }; @@ -250,15 +225,6 @@ export class InspectionStatistics { } onUserAgentMatches(matches: { key: UserAgentBotKey; monitor: boolean }[]) { - if (matches.length > 0) { - this.userAgents.total += 1; - } - - const blockingMatch = matches.find((match) => !match.monitor); - if (blockingMatch) { - this.userAgents.blocked += 1; - } - matches.forEach((match) => { if (!this.userAgents.breakdown[match.key]) { this.userAgents.breakdown[match.key] = { total: 0, blocked: 0 }; diff --git a/library/agent/api/Event.ts b/library/agent/api/Event.ts index 77137a4f8..3ab35c63c 100644 --- a/library/agent/api/Event.ts +++ b/library/agent/api/Event.ts @@ -94,13 +94,9 @@ type Heartbeat = { }; }; userAgents: { - total: number; - blocked: number; breakdown: Record; }; ipAddresses: { - total: number; - blocked: number; breakdown: Record; }; }; diff --git a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts index 522f6102a..643e7b918 100644 --- a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts +++ b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts @@ -156,13 +156,9 @@ function generateHeartbeatEvent(): Event { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }, diff --git a/library/sources/HTTPServer.stats.test.ts b/library/sources/HTTPServer.stats.test.ts index 816b15a57..93d06185b 100644 --- a/library/sources/HTTPServer.stats.test.ts +++ b/library/sources/HTTPServer.stats.test.ts @@ -96,16 +96,12 @@ t.test("it tracks monitored user agents", async () => { t.equal(response3.statusCode, 200); const stats = agent.getInspectionStatistics().getStats(); t.same(stats.userAgents, { - total: 2, - blocked: 0, breakdown: { // eslint-disable-next-line camelcase ai_data_scrapers: { total: 2, blocked: 0 }, }, }); t.same(stats.ipAddresses, { - total: 0, - blocked: 0, breakdown: {}, }); server.close(); @@ -145,13 +141,9 @@ t.test("it tracks monitored IP addresses", async () => { t.equal(response2.statusCode, 200); const stats = agent.getInspectionStatistics().getStats(); t.same(stats.userAgents, { - total: 0, - blocked: 0, breakdown: {}, }); t.same(stats.ipAddresses, { - total: 1, - blocked: 0, breakdown: { "known_threat_actors/public_scanners": { total: 1, blocked: 0 }, }, diff --git a/library/sources/Lambda.test.ts b/library/sources/Lambda.test.ts index 354ccddec..72ea6a4e9 100644 --- a/library/sources/Lambda.test.ts +++ b/library/sources/Lambda.test.ts @@ -305,13 +305,9 @@ t.test("it sends heartbeat after first and every 10 minutes", async () => { }, }, userAgents: { - total: 0, - blocked: 0, breakdown: {}, }, ipAddresses: { - total: 0, - blocked: 0, breakdown: {}, }, }, From cf35e65bd74f718af37983fae183175fc25fd1e5 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Fri, 18 Apr 2025 16:32:07 +0200 Subject: [PATCH 46/48] Prevent double counting monitored lists --- library/sources/HTTPServer.stats.test.ts | 56 +++++++++++++++++++ .../http-server/checkIfRequestIsBlocked.ts | 56 ++++++++++--------- 2 files changed, 86 insertions(+), 26 deletions(-) diff --git a/library/sources/HTTPServer.stats.test.ts b/library/sources/HTTPServer.stats.test.ts index 93d06185b..2c0911df1 100644 --- a/library/sources/HTTPServer.stats.test.ts +++ b/library/sources/HTTPServer.stats.test.ts @@ -154,3 +154,59 @@ t.test("it tracks monitored IP addresses", async () => { }); }); }); + +t.test("it only counts once if multiple listeners", async () => { + const server = http.createServer((req, res) => { + res.setHeader("Content-Type", "text/plain"); + res.end("OK"); + }); + + server.on("request", (req, res) => { + if (res.headersSent) { + return; + } + + res.setHeader("Content-Type", "text/plain"); + res.end("OK"); + }); + + await new Promise((resolve) => { + server.listen(3329, () => { + Promise.all([ + fetch({ + url: new URL("http://localhost:3329/test"), + method: "GET", + headers: { + "user-agent": "GPTBot", + }, + timeoutInMS: 500, + }), + fetch({ + url: new URL("http://localhost:3329/test"), + method: "GET", + headers: { + "x-forwarded-for": "1.2.3.4", + }, + timeoutInMS: 500, + }), + ]).then(() => { + const { userAgents, ipAddresses } = agent + .getInspectionStatistics() + .getStats(); + t.same(userAgents, { + breakdown: { + // eslint-disable-next-line camelcase + ai_data_scrapers: { total: 1, blocked: 0 }, + }, + }); + t.same(ipAddresses, { + breakdown: { + "known_threat_actors/public_scanners": { total: 1, blocked: 0 }, + }, + }); + server.close(); + resolve(); + }); + }); + }); +}); diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index c8565d6b0..dc678aa8b 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -5,13 +5,15 @@ import { getContext } from "../../agent/Context"; import { escapeHTML } from "../../helpers/escapeHTML"; import { ipAllowedToAccessRoute } from "./ipAllowedToAccessRoute"; +const checkedBlocks = Symbol("__zen_checked_blocks__"); + /** * Inspects the IP address of the request: * - Whether the IP address is blocked by an IP blocklist (e.g. Geo restrictions) * - Whether the IP address is allowed to access the current route (e.g. Admin panel) */ export function checkIfRequestIsBlocked( - res: ServerResponse, + res: ServerResponse & { [checkedBlocks]?: boolean }, agent: Agent ): boolean { if (res.headersSent) { @@ -26,6 +28,14 @@ export function checkIfRequestIsBlocked( return false; } + if (res[checkedBlocks]) { + return false; + } + + // We don't need to check again if the request has already been checked + // Also ensures that the statistics are only counted once + // res[checkedBlocklist] = true; + if (!ipAllowedToAccessRoute(context, agent)) { res.statusCode = 403; res.setHeader("Content-Type", "text/plain"); @@ -69,23 +79,20 @@ export function checkIfRequestIsBlocked( ? agent.getConfig().getBlockedIPAddresses(context.remoteAddress) : []; - if (blockedIPs.length > 0) { - // The same IP address can be blocked by multiple lists - agent.getInspectionStatistics().onIPAddressMatches(blockedIPs); + agent.getInspectionStatistics().onIPAddressMatches(blockedIPs); + const blockingMatch = blockedIPs.find((match) => !match.monitor); - const blockingMatch = blockedIPs.find((match) => !match.monitor); - if (blockingMatch) { - res.statusCode = 403; - res.setHeader("Content-Type", "text/plain"); - - let message = `Your IP address is blocked due to ${escapeHTML(blockingMatch.reason)}.`; - if (context.remoteAddress) { - message += ` (Your IP: ${escapeHTML(context.remoteAddress)})`; - } + if (blockingMatch) { + res.statusCode = 403; + res.setHeader("Content-Type", "text/plain"); - res.end(message); - return true; + let message = `Your IP address is blocked due to ${escapeHTML(blockingMatch.reason)}.`; + if (context.remoteAddress) { + message += ` (Your IP: ${escapeHTML(context.remoteAddress)})`; } + + res.end(message); + return true; } const blockedUserAgents = @@ -93,19 +100,16 @@ export function checkIfRequestIsBlocked( ? agent.getConfig().getBlockedUserAgents(context.headers["user-agent"]) : []; - if (blockedUserAgents.length > 0) { - // The same user agent can be blocked by multiple lists - agent.getInspectionStatistics().onUserAgentMatches(blockedUserAgents); + agent.getInspectionStatistics().onUserAgentMatches(blockedUserAgents); - if (blockedUserAgents.find((match) => !match.monitor)) { - res.statusCode = 403; - res.setHeader("Content-Type", "text/plain"); + if (blockedUserAgents.find((match) => !match.monitor)) { + res.statusCode = 403; + res.setHeader("Content-Type", "text/plain"); - res.end( - "You are not allowed to access this resource because you have been identified as a bot." - ); - return true; - } + res.end( + "You are not allowed to access this resource because you have been identified as a bot." + ); + return true; } return false; From 6f84655cdd784a14cf1f13c0dbbc19c00add7705 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Fri, 18 Apr 2025 16:59:44 +0200 Subject: [PATCH 47/48] Set symbol to true and add comment --- library/sources/HTTPServer.stats.test.ts | 15 ++++++++------- .../http-server/checkIfRequestIsBlocked.ts | 5 +++-- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/library/sources/HTTPServer.stats.test.ts b/library/sources/HTTPServer.stats.test.ts index 2c0911df1..7c066527b 100644 --- a/library/sources/HTTPServer.stats.test.ts +++ b/library/sources/HTTPServer.stats.test.ts @@ -162,12 +162,11 @@ t.test("it only counts once if multiple listeners", async () => { }); server.on("request", (req, res) => { - if (res.headersSent) { - return; - } + // This is a second listener + }); - res.setHeader("Content-Type", "text/plain"); - res.end("OK"); + server.on("request", (req, res) => { + // This is a third listener }); await new Promise((resolve) => { @@ -178,6 +177,7 @@ t.test("it only counts once if multiple listeners", async () => { method: "GET", headers: { "user-agent": "GPTBot", + "x-forwarded-for": "1.2.3.4", }, timeoutInMS: 500, }), @@ -185,6 +185,7 @@ t.test("it only counts once if multiple listeners", async () => { url: new URL("http://localhost:3329/test"), method: "GET", headers: { + "user-agent": "GPTBot", "x-forwarded-for": "1.2.3.4", }, timeoutInMS: 500, @@ -196,12 +197,12 @@ t.test("it only counts once if multiple listeners", async () => { t.same(userAgents, { breakdown: { // eslint-disable-next-line camelcase - ai_data_scrapers: { total: 1, blocked: 0 }, + ai_data_scrapers: { total: 2, blocked: 0 }, }, }); t.same(ipAddresses, { breakdown: { - "known_threat_actors/public_scanners": { total: 1, blocked: 0 }, + "known_threat_actors/public_scanners": { total: 2, blocked: 0 }, }, }); server.close(); diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index dc678aa8b..de6959a18 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -8,9 +8,10 @@ import { ipAllowedToAccessRoute } from "./ipAllowedToAccessRoute"; const checkedBlocks = Symbol("__zen_checked_blocks__"); /** - * Inspects the IP address of the request: + * Inspects the IP address and user agent of the request: * - Whether the IP address is blocked by an IP blocklist (e.g. Geo restrictions) * - Whether the IP address is allowed to access the current route (e.g. Admin panel) + * - Whether the user agent is blocked by a user agent blocklist */ export function checkIfRequestIsBlocked( res: ServerResponse & { [checkedBlocks]?: boolean }, @@ -34,7 +35,7 @@ export function checkIfRequestIsBlocked( // We don't need to check again if the request has already been checked // Also ensures that the statistics are only counted once - // res[checkedBlocklist] = true; + res[checkedBlocks] = true; if (!ipAllowedToAccessRoute(context, agent)) { res.statusCode = 403; From b3f888b03256877ab8454e64f921a4d36a76ae2d Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 29 Apr 2025 16:08:33 +0200 Subject: [PATCH 48/48] Fix test --- library/agent/InspectionStatistics.test.ts | 26 +++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index 6676cdb8c..43d76f807 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -595,7 +595,7 @@ t.test("it keeps track of blocked IPs and user agents", async () => { stats.onUserAgentMatches([{ key: "ai_bots", monitor: false }]); t.same(stats.getStats(), { - sinks: {}, + operations: {}, startedAt: 0, requests: { total: 0, @@ -635,7 +635,7 @@ t.test("it keeps track of monitored IPs and user agents", async () => { stats.onUserAgentMatches([{ key: "ai_data_scrapers", monitor: true }]); t.same(stats.getStats(), { - sinks: {}, + operations: {}, startedAt: 0, requests: { total: 0, @@ -665,7 +665,7 @@ t.test("it keeps track of monitored IPs and user agents", async () => { stats.onUserAgentMatches([{ key: "ai_data_scrapers", monitor: true }]); t.same(stats.getStats(), { - sinks: {}, + operations: {}, startedAt: 0, requests: { total: 0, @@ -692,6 +692,8 @@ t.test("it keeps track of monitored IPs and user agents", async () => { }); t.test("should track multiple matches for the same key", (t) => { + const clock = FakeTimers.install(); + const stats = new InspectionStatistics({ maxPerfSamplesInMemory: 100, maxCompressedStatsInMemory: 10, @@ -785,6 +787,12 @@ t.test("it keeps track of multiple operations of the same kind", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); // Test that each operation maintains its own stats @@ -840,6 +848,12 @@ t.test("it keeps track of multiple operations of the same kind", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); clock.uninstall(); @@ -878,6 +892,12 @@ t.test("it handles empty operation strings", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); clock.uninstall();