diff --git a/end2end/server/src/handlers/lists.js b/end2end/server/src/handlers/lists.js index 617ac2ff0..484245249 100644 --- a/end2end/server/src/handlers/lists.js +++ b/end2end/server/src/handlers/lists.js @@ -20,22 +20,37 @@ module.exports = function lists(req, res) { blockedIps.length > 0 ? [ { + key: "geoip/Belgium;BE", source: "geoip", description: "geo restrictions", ips: blockedIps, + monitor: false, + }, + ] + : [], + blockedUserAgents: + blockedUserAgents.length > 0 + ? [ + { + key: "hackers", + pattern: blockedUserAgents, + monitor: false, }, ] : [], - blockedUserAgents: blockedUserAgents, allowedIPAddresses: allowedIps.length > 0 ? [ { + key: "geoip/Belgium;BE", source: "geoip", description: "geo restrictions", ips: allowedIps, + monitor: false, }, ] : [], + monitoredIPAddresses: [], + monitoredUserAgents: [], }); }; diff --git a/end2end/server/src/zen/config.js b/end2end/server/src/zen/config.js index 87797b8e3..101b75b0c 100644 --- a/end2end/server/src/zen/config.js +++ b/end2end/server/src/zen/config.js @@ -90,7 +90,7 @@ function getAllowedIPAddresses(app) { } function updateBlockedUserAgents(app, uas) { - let entry = blockedUserAgents.find((e) => e.serviceId === e.serviceId); + let entry = blockedUserAgents.find((e) => e.serviceId === app.serviceId); if (entry) { entry.userAgents = uas; @@ -104,13 +104,13 @@ function updateBlockedUserAgents(app, uas) { } function getBlockedUserAgents(app) { - const entry = blockedUserAgents.find((e) => e.serviceId === e.serviceId); + const entry = blockedUserAgents.find((e) => e.serviceId === app.serviceId); if (entry) { return entry.userAgents; } - return ""; + return { serviceId: app.serviceId, userAgents: [] }; } module.exports = { diff --git a/library/agent/Agent.test.ts b/library/agent/Agent.test.ts index 6466b91d7..97efd634e 100644 --- a/library/agent/Agent.test.ts +++ b/library/agent/Agent.test.ts @@ -18,6 +18,7 @@ import { Wrapper } from "./Wrapper"; import { Context } from "./Context"; import { createTestAgent } from "../helpers/createTestAgent"; import { setTimeout } from "node:timers/promises"; +import type { Response } from "./api/fetchBlockedLists"; let shouldOnlyAllowSomeIPAddresses = false; @@ -28,22 +29,32 @@ wrap(fetch, "fetch", function mock() { body: JSON.stringify({ blockedIPAddresses: [ { + key: "some/key", source: "name", description: "Description", ips: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], + monitor: false, + }, + ], + blockedUserAgents: [ + { + key: "ai_bots", + pattern: "AI2Bot|Bytespider", + monitor: false, }, ], - blockedUserAgents: "AI2Bot|Bytespider", allowedIPAddresses: shouldOnlyAllowSomeIPAddresses ? [ { + key: "some/key", source: "name", description: "Description", ips: ["4.3.2.1"], + monitor: false, }, ] : [], - }), + } satisfies Response), }; }; }); @@ -1065,36 +1076,51 @@ t.test("it fetches blocked lists", async () => { await setTimeout(0); - t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), { - blocked: true, - reason: "Description", - }); - t.same(agent.getConfig().isIPAddressBlocked("fe80::1234:5678:abcd:ef12"), { - blocked: true, - reason: "Description", - }); + t.same(agent.getConfig().getBlockedIPAddresses("1.3.2.4"), [ + { + key: "some/key", + monitor: false, + reason: "Description", + }, + ]); + t.same(agent.getConfig().getBlockedIPAddresses("fe80::1234:5678:abcd:ef12"), [ + { + key: "some/key", + monitor: false, + reason: "Description", + }, + ]); t.same( agent .getConfig() - .isUserAgentBlocked( + .getBlockedUserAgents( "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)" ), - { - blocked: true, - } + [ + { + key: "ai_bots", + monitor: false, + }, + ] ); t.same( - agent.getConfig().isUserAgentBlocked("Mozilla/5.0 (compatible) Bytespider"), - { - blocked: true, - } + agent + .getConfig() + .getBlockedUserAgents("Mozilla/5.0 (compatible) Bytespider"), + [ + { + key: "ai_bots", + monitor: false, + }, + ] ); - t.same(agent.getConfig().isUserAgentBlocked("Mozilla/5.0 (compatible)"), { - blocked: false, - }); + t.same( + agent.getConfig().getBlockedUserAgents("Mozilla/5.0 (compatible)"), + [] + ); }); t.test("it does not fetch blocked IPs if serverless", async () => { @@ -1108,10 +1134,7 @@ t.test("it does not fetch blocked IPs if serverless", async () => { await setTimeout(0); - t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), { - blocked: false, - }); - + t.same(agent.getConfig().getBlockedIPAddresses("1.3.2.4"), []); t.same(agent.getConfig().isAllowedIPAddress("1.3.2.4"), { allowed: true, }); @@ -1119,12 +1142,10 @@ t.test("it does not fetch blocked IPs if serverless", async () => { t.same( agent .getConfig() - .isUserAgentBlocked( + .getBlockedUserAgents( "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)" ), - { - blocked: false, - } + [] ); }); @@ -1139,14 +1160,20 @@ t.test("it only allows some IP addresses", async () => { await setTimeout(0); - t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), { - blocked: true, - reason: "Description", - }); - t.same(agent.getConfig().isIPAddressBlocked("fe80::1234:5678:abcd:ef12"), { - blocked: true, - reason: "Description", - }); + t.same(agent.getConfig().getBlockedIPAddresses("1.3.2.4"), [ + { + key: "some/key", + monitor: false, + reason: "Description", + }, + ]); + t.same(agent.getConfig().getBlockedIPAddresses("fe80::1234:5678:abcd:ef12"), [ + { + key: "some/key", + monitor: false, + reason: "Description", + }, + ]); t.same(agent.getConfig().isAllowedIPAddress("1.2.3.4"), { allowed: false, diff --git a/library/agent/Agent.ts b/library/agent/Agent.ts index ad04480b8..de1f9827b 100644 --- a/library/agent/Agent.ts +++ b/library/agent/Agent.ts @@ -312,6 +312,8 @@ export class Agent { startedAt: stats.startedAt, endedAt: endedAt, requests: stats.requests, + userAgents: stats.userAgents, + ipAddresses: stats.ipAddresses, }, hostnames: outgoingDomains, routes: routes, diff --git a/library/agent/InspectionStatistics.test.ts b/library/agent/InspectionStatistics.test.ts index a9bf897c2..43d76f807 100644 --- a/library/agent/InspectionStatistics.test.ts +++ b/library/agent/InspectionStatistics.test.ts @@ -42,6 +42,12 @@ t.test("it resets stats", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); clock.tick(1000); @@ -57,6 +63,12 @@ t.test("it resets stats", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); clock.uninstall(); @@ -83,6 +95,12 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); stats.onInspectedCall({ @@ -117,6 +135,12 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); stats.onInspectedCall({ @@ -151,6 +175,12 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); stats.interceptorThrewError("mongodb.query", "nosql_op"); @@ -178,6 +208,12 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); stats.onInspectedCall({ @@ -212,6 +248,12 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); stats.onInspectedCall({ @@ -246,6 +288,12 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); t.same(stats.hasCompressedStats(), false); @@ -299,6 +347,12 @@ t.test("it keeps track of amount of calls", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); t.ok( @@ -349,6 +403,12 @@ t.test("it keeps track of requests", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); stats.onRequest(); @@ -364,6 +424,12 @@ t.test("it keeps track of requests", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); stats.onRequest(); @@ -380,6 +446,12 @@ t.test("it keeps track of requests", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); stats.onRequest(); @@ -396,6 +468,12 @@ t.test("it keeps track of requests", async () => { blocked: 1, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); clock.tick(1000); @@ -413,6 +491,12 @@ t.test("it keeps track of requests", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); clock.uninstall(); @@ -437,6 +521,12 @@ t.test("it force compresses stats", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); stats.onRequest(); @@ -480,11 +570,163 @@ t.test("it keeps track of aborted requests", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, + }); + + clock.uninstall(); +}); + +t.test("it keeps track of blocked IPs and user agents", async () => { + const clock = FakeTimers.install(); + + const stats = new InspectionStatistics({ + maxPerfSamplesInMemory: 50, + maxCompressedStatsInMemory: 5, + }); + + stats.onIPAddressMatches([ + { key: "known_threat_actors/public_scanners", monitor: false }, + ]); + stats.onUserAgentMatches([{ key: "ai_bots", monitor: false }]); + + t.same(stats.getStats(), { + operations: {}, + startedAt: 0, + requests: { + total: 0, + aborted: 0, + attacksDetected: { + total: 0, + blocked: 0, + }, + }, + userAgents: { + breakdown: { + // eslint-disable-next-line camelcase + ai_bots: { total: 1, blocked: 1 }, + }, + }, + ipAddresses: { + breakdown: { + "known_threat_actors/public_scanners": { total: 1, blocked: 1 }, + }, + }, }); clock.uninstall(); }); +t.test("it keeps track of monitored IPs and user agents", async () => { + const clock = FakeTimers.install(); + + const stats = new InspectionStatistics({ + maxPerfSamplesInMemory: 50, + maxCompressedStatsInMemory: 5, + }); + + stats.onIPAddressMatches([ + { key: "known_threat_actors/public_scanners", monitor: true }, + ]); + stats.onUserAgentMatches([{ key: "ai_data_scrapers", monitor: true }]); + + t.same(stats.getStats(), { + operations: {}, + startedAt: 0, + requests: { + total: 0, + aborted: 0, + attacksDetected: { + total: 0, + blocked: 0, + }, + }, + userAgents: { + breakdown: { + // eslint-disable-next-line camelcase + ai_data_scrapers: { total: 1, blocked: 0 }, + }, + }, + ipAddresses: { + breakdown: { + "known_threat_actors/public_scanners": { total: 1, blocked: 0 }, + }, + }, + }); + + // Test multiple occurrences + stats.onIPAddressMatches([ + { key: "known_threat_actors/public_scanners", monitor: true }, + ]); + stats.onUserAgentMatches([{ key: "ai_data_scrapers", monitor: true }]); + + t.same(stats.getStats(), { + operations: {}, + startedAt: 0, + requests: { + total: 0, + aborted: 0, + attacksDetected: { + total: 0, + blocked: 0, + }, + }, + userAgents: { + breakdown: { + // eslint-disable-next-line camelcase + ai_data_scrapers: { total: 2, blocked: 0 }, + }, + }, + ipAddresses: { + breakdown: { + "known_threat_actors/public_scanners": { total: 2, blocked: 0 }, + }, + }, + }); + + clock.uninstall(); +}); + +t.test("should track multiple matches for the same key", (t) => { + const clock = FakeTimers.install(); + + const stats = new InspectionStatistics({ + maxPerfSamplesInMemory: 100, + maxCompressedStatsInMemory: 10, + }); + + stats.onIPAddressMatches([ + { key: "known_threat_actors/public_scanners", monitor: true }, + { key: "known_threat_actors/public_scanners", monitor: false }, + ]); + stats.onUserAgentMatches([ + { key: "ai_data_scrapers", monitor: true }, + { key: "ai_data_scrapers", monitor: false }, + ]); + + const result = stats.getStats(); + + t.equal( + result.ipAddresses.breakdown["known_threat_actors/public_scanners"].total, + 2 + ); + t.equal( + result.ipAddresses.breakdown["known_threat_actors/public_scanners"].blocked, + 1 + ); + + t.equal(result.userAgents.breakdown["ai_data_scrapers"].total, 2); + t.equal(result.userAgents.breakdown["ai_data_scrapers"].blocked, 1); + + t.end(); + + clock.uninstall(); +}); + t.test("it keeps track of multiple operations of the same kind", async () => { const clock = FakeTimers.install(); @@ -545,6 +787,12 @@ t.test("it keeps track of multiple operations of the same kind", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); // Test that each operation maintains its own stats @@ -600,6 +848,12 @@ t.test("it keeps track of multiple operations of the same kind", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); clock.uninstall(); @@ -638,6 +892,12 @@ t.test("it handles empty operation strings", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }); clock.uninstall(); diff --git a/library/agent/InspectionStatistics.ts b/library/agent/InspectionStatistics.ts index e86346d54..220a0bbde 100644 --- a/library/agent/InspectionStatistics.ts +++ b/library/agent/InspectionStatistics.ts @@ -23,6 +23,16 @@ type OperationStats = { }; type OperationStatsWithoutTimings = Omit; +type UserAgentBotKey = string; +type IPListKey = string; + +type UserAgentStats = { + breakdown: Record; +}; + +type IPAddressStats = { + breakdown: Record; +}; export class InspectionStatistics { private startedAt = Date.now(); @@ -36,7 +46,17 @@ export class InspectionStatistics { total: number; blocked: number; }; - } = { total: 0, aborted: 0, attacksDetected: { total: 0, blocked: 0 } }; + } = { + total: 0, + aborted: 0, + attacksDetected: { total: 0, blocked: 0 }, + }; + private userAgents: UserAgentStats = { + breakdown: {}, + }; + private ipAddresses: IPAddressStats = { + breakdown: {}, + }; constructor({ maxPerfSamplesInMemory, @@ -70,6 +90,12 @@ export class InspectionStatistics { aborted: 0, attacksDetected: { total: 0, blocked: 0 }, }; + this.userAgents = { + breakdown: {}, + }; + this.ipAddresses = { + breakdown: {}, + }; this.startedAt = Date.now(); } @@ -84,6 +110,12 @@ export class InspectionStatistics { blocked: number; }; }; + userAgents: { + breakdown: Record; + }; + ipAddresses: { + breakdown: Record; + }; } { const operations: Record = {}; for (const operation in this.operations) { @@ -105,6 +137,8 @@ export class InspectionStatistics { operations: operations, startedAt: this.startedAt, requests: this.requests, + userAgents: this.userAgents, + ipAddresses: this.ipAddresses, }; } @@ -188,6 +222,34 @@ export class InspectionStatistics { } } + onIPAddressMatches(matches: { key: IPListKey; monitor: boolean }[]) { + matches.forEach((match) => { + if (!this.ipAddresses.breakdown[match.key]) { + this.ipAddresses.breakdown[match.key] = { total: 0, blocked: 0 }; + } + + this.ipAddresses.breakdown[match.key].total += 1; + + if (!match.monitor) { + this.ipAddresses.breakdown[match.key].blocked += 1; + } + }); + } + + onUserAgentMatches(matches: { key: UserAgentBotKey; monitor: boolean }[]) { + matches.forEach((match) => { + if (!this.userAgents.breakdown[match.key]) { + this.userAgents.breakdown[match.key] = { total: 0, blocked: 0 }; + } + + this.userAgents.breakdown[match.key].total += 1; + + if (!match.monitor) { + this.userAgents.breakdown[match.key].blocked += 1; + } + }); + } + onAbortedRequest() { this.requests.aborted += 1; } diff --git a/library/agent/ServiceConfig.test.ts b/library/agent/ServiceConfig.test.ts index 7cf648999..4e366b11b 100644 --- a/library/agent/ServiceConfig.test.ts +++ b/library/agent/ServiceConfig.test.ts @@ -97,6 +97,7 @@ t.test("ip blocking works", async () => { false, [ { + key: "geoip/Belgium;BE", source: "geoip", description: "description", ips: [ @@ -106,52 +107,102 @@ t.test("ip blocking works", async () => { "fd00:3234:5678:9abc::1/64", "5.6.7.8/32", ], + monitor: false, }, ], [] ); - t.same(config.isIPAddressBlocked("1.2.3.4"), { - blocked: true, - reason: "description", - }); - t.same(config.isIPAddressBlocked("2.3.4.5"), { blocked: false }); - t.same(config.isIPAddressBlocked("192.168.2.2"), { - blocked: true, - reason: "description", - }); - t.same(config.isIPAddressBlocked("fd00:1234:5678:9abc::1"), { - blocked: true, - reason: "description", - }); - t.same(config.isIPAddressBlocked("fd00:1234:5678:9abc::2"), { - blocked: false, - }); - t.same(config.isIPAddressBlocked("fd00:3234:5678:9abc::1"), { - blocked: true, - reason: "description", - }); - t.same(config.isIPAddressBlocked("fd00:3234:5678:9abc::2"), { - blocked: true, - reason: "description", - }); - t.same(config.isIPAddressBlocked("5.6.7.8"), { - blocked: true, - reason: "description", - }); - t.same(config.isIPAddressBlocked("1.2"), { blocked: false }); + t.same(config.getBlockedIPAddresses("1.2.3.4"), [ + { + key: "geoip/Belgium;BE", + monitor: false, + reason: "description", + }, + ]); + t.same(config.getBlockedIPAddresses("2.3.4.5"), []); + t.same(config.getBlockedIPAddresses("192.168.2.2"), [ + { + key: "geoip/Belgium;BE", + monitor: false, + reason: "description", + }, + ]); + t.same(config.getBlockedIPAddresses("fd00:1234:5678:9abc::1"), [ + { + key: "geoip/Belgium;BE", + monitor: false, + reason: "description", + }, + ]); + t.same(config.getBlockedIPAddresses("fd00:1234:5678:9abc::2"), []); + t.same(config.getBlockedIPAddresses("fd00:3234:5678:9abc::1"), [ + { + key: "geoip/Belgium;BE", + monitor: false, + reason: "description", + }, + ]); + t.same(config.getBlockedIPAddresses("fd00:3234:5678:9abc::2"), [ + { + key: "geoip/Belgium;BE", + monitor: false, + reason: "description", + }, + ]); + t.same(config.getBlockedIPAddresses("5.6.7.8"), [ + { + key: "geoip/Belgium;BE", + monitor: false, + reason: "description", + }, + ]); + t.same(config.getBlockedIPAddresses("1.2"), []); + + config.updateBlockedIPAddresses([]); + t.same(config.getBlockedIPAddresses("1.2.3.4"), []); +}); + +t.test("update blocked IPs contains empty IPs", async (t) => { + const config = new ServiceConfig([], 0, [], [], false, [], []); + config.updateBlockedIPAddresses([ + { + key: "geoip/Belgium;BE", + source: "geoip", + description: "description", + ips: [], + monitor: false, + }, + ]); + t.same(config.getBlockedIPAddresses("1.2.3.4"), []); }); t.test("it blocks bots", async () => { const config = new ServiceConfig([], 0, [], [], true, [], []); - config.updateBlockedUserAgents("googlebot|bingbot"); + config.updateBlockedUserAgents([ + { + key: "test", + pattern: "googlebot|bingbot", + monitor: false, + }, + ]); - t.same(config.isUserAgentBlocked("googlebot"), { blocked: true }); - t.same(config.isUserAgentBlocked("123 bingbot abc"), { blocked: true }); - t.same(config.isUserAgentBlocked("bing"), { blocked: false }); + t.same(config.getBlockedUserAgents("googlebot"), [ + { + key: "test", + monitor: false, + }, + ]); + t.same(config.getBlockedUserAgents("123 bingbot abc"), [ + { + key: "test", + monitor: false, + }, + ]); + t.same(config.getBlockedUserAgents("bing"), []); - config.updateBlockedUserAgents(""); + config.updateBlockedUserAgents([]); - t.same(config.isUserAgentBlocked("googlebot"), { blocked: false }); + t.same(config.getBlockedUserAgents("googlebot"), []); }); t.test("restricting access to some ips", async () => { @@ -164,9 +215,11 @@ t.test("restricting access to some ips", async () => { [], [ { + key: "geoip/Belgium;BE", source: "geoip", description: "description", ips: ["1.2.3.4"], + monitor: false, }, ] ); @@ -191,9 +244,11 @@ t.test("only allow some ips: empty list", async () => { [], [ { + key: "geoip/Belgium;BE", source: "geoip", description: "description", ips: [], + monitor: false, }, ] ); @@ -255,3 +310,85 @@ t.test("bypassed ips support cidr", async () => { t.same(config.isBypassedIP("123.123.123.1"), false); t.same(config.isBypassedIP("999.999.999.999"), false); }); + +t.test("should return all matching user agent patterns", async (t) => { + const config = new ServiceConfig([], 0, [], [], false, [], []); + config.updateBlockedUserAgents([ + { + key: "bots", + pattern: "googlebot|bingbot", + monitor: false, + }, + ]); + t.same(config.getBlockedUserAgents("googlebot"), [ + { + key: "bots", + monitor: false, + }, + ]); + t.same(config.getBlockedUserAgents("firefox"), []); +}); + +t.test("it returns and updates blocked user agents", async (t) => { + const config = new ServiceConfig([], 0, [], [], false, [], []); + config.updateBlockedUserAgents([ + { + key: "bots", + pattern: "googlebot|bingbot", + monitor: false, + }, + { + key: "crawlers", + pattern: "googlebot", + monitor: true, + }, + ]); + t.same(config.getBlockedUserAgents("googlebot"), [ + { + key: "bots", + monitor: false, + }, + { + key: "crawlers", + monitor: true, + }, + ]); + t.same(config.getBlockedUserAgents("bingbot"), [ + { + key: "bots", + monitor: false, + }, + ]); + + config.updateBlockedUserAgents([]); + t.same(config.getBlockedUserAgents("googlebot"), []); + t.same(config.getBlockedUserAgents("bingbot"), []); + t.same(config.getBlockedUserAgents("firefox"), []); +}); + +t.test("it ignores user agent lists with empty patterns", async (t) => { + const config = new ServiceConfig([], 0, [], [], false, [], []); + config.updateBlockedUserAgents([ + { + key: "bots", + pattern: "", + monitor: false, + }, + ]); + t.same(config.getBlockedUserAgents("googlebot"), []); +}); + +t.test( + "it does not throw error when updating user agent lists with invalid patterns", + async (t) => { + const config = new ServiceConfig([], 0, [], [], false, [], []); + config.updateBlockedUserAgents([ + { + key: "bots", + pattern: "[", + monitor: false, + }, + ]); + t.same(config.getBlockedUserAgents("googlebot"), []); + } +); diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index b1984480d..beb35cf00 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -2,7 +2,7 @@ import { IPMatcher } from "../helpers/ip-matcher/IPMatcher"; import { LimitedContext, matchEndpoints } from "../helpers/matchEndpoints"; import { isPrivateIP } from "../vulnerabilities/ssrf/isPrivateIP"; import type { Endpoint, EndpointConfig } from "./Config"; -import { IPList } from "./api/fetchBlockedLists"; +import { BotBlocklist, IPList } from "./api/fetchBlockedLists"; export class ServiceConfig { private blockedUserIds: Map = new Map(); @@ -10,9 +10,17 @@ export class ServiceConfig { private bypassedIPAddresses: IPMatcher | undefined; private nonGraphQLEndpoints: Endpoint[] = []; private graphqlFields: Endpoint[] = []; - private blockedIPAddresses: { blocklist: IPMatcher; description: string }[] = - []; - private blockedUserAgentRegex: RegExp | undefined; + private blockedIPAddresses: { + key: string; + blocklist: IPMatcher; + description: string; + monitor: boolean; + }[] = []; + private blockedUserAgents: { + key: string; + pattern: RegExp; + monitor: boolean; + }[] = []; // If not empty, only ips in this list are allowed to access the service // e.g. for country allowlists private allowedIPAddresses: { @@ -107,18 +115,16 @@ export class ServiceConfig { return this.blockedUserIds.has(userId); } - isIPAddressBlocked( + getBlockedIPAddresses( ip: string - ): { blocked: true; reason: string } | { blocked: false } { - const blocklist = this.blockedIPAddresses.find((blocklist) => - blocklist.blocklist.has(ip) - ); - - if (blocklist) { - return { blocked: true, reason: blocklist.description }; - } - - return { blocked: false }; + ): { key: string; monitor: boolean; reason: string }[] { + return this.blockedIPAddresses + .filter((list) => list.blocklist.has(ip)) + .map((list) => ({ + key: list.key, + monitor: list.monitor, + reason: list.description, + })); } private setBlockedIPAddresses(blockedIPAddresses: IPList[]) { @@ -126,8 +132,10 @@ export class ServiceConfig { for (const source of blockedIPAddresses) { this.blockedIPAddresses.push({ + key: source.key, blocklist: new IPMatcher(source.ips), description: source.description, + monitor: source.monitor, }); } } @@ -136,19 +144,35 @@ export class ServiceConfig { this.setBlockedIPAddresses(blockedIPAddresses); } - updateBlockedUserAgents(blockedUserAgents: string) { - if (!blockedUserAgents) { - this.blockedUserAgentRegex = undefined; - return; + private setBlockedUserAgents(blockedUserAgents: BotBlocklist[]) { + this.blockedUserAgents = []; + + for (const list of blockedUserAgents) { + if (list.pattern.length > 0) { + try { + this.blockedUserAgents.push({ + key: list.key, + pattern: new RegExp(list.pattern, "i"), + monitor: list.monitor, + }); + } catch { + // Invalid regex, ignore this entry + } + } } - this.blockedUserAgentRegex = new RegExp(blockedUserAgents, "i"); } - isUserAgentBlocked(ua: string): { blocked: boolean } { - if (this.blockedUserAgentRegex) { - return { blocked: this.blockedUserAgentRegex.test(ua) }; - } - return { blocked: false }; + updateBlockedUserAgents(blockedUserAgents: BotBlocklist[]) { + this.setBlockedUserAgents(blockedUserAgents); + } + + getBlockedUserAgents(ua: string): { key: string; monitor: boolean }[] { + return this.blockedUserAgents + .filter((list) => list.pattern.test(ua)) + .map((list) => ({ + key: list.key, + monitor: list.monitor, + })); } private setAllowedIPAddresses(ipAddresses: IPList[]) { diff --git a/library/agent/api/Event.ts b/library/agent/api/Event.ts index 0579818d8..824c81f54 100644 --- a/library/agent/api/Event.ts +++ b/library/agent/api/Event.ts @@ -104,6 +104,12 @@ type Heartbeat = { blocked: number; }; }; + userAgents: { + breakdown: Record; + }; + ipAddresses: { + breakdown: Record; + }; }; hostnames: { hostname: string; port: number | undefined; hits: number }[]; routes: { diff --git a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts index 5929de592..98e5ca32a 100644 --- a/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts +++ b/library/agent/api/ReportingAPIRateLimitedClientSide.test.ts @@ -155,6 +155,12 @@ function generateHeartbeatEvent(): Event { total: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }, agent: { version: "1.0.0", diff --git a/library/agent/api/fetchBlockedLists.ts b/library/agent/api/fetchBlockedLists.ts index 745c8df4c..b6c7ee305 100644 --- a/library/agent/api/fetchBlockedLists.ts +++ b/library/agent/api/fetchBlockedLists.ts @@ -3,16 +3,26 @@ import { getAPIURL } from "../getAPIURL"; import { Token } from "./Token"; export type IPList = { + key: string; source: string; description: string; ips: string[]; + monitor: boolean; }; -export async function fetchBlockedLists(token: Token): Promise<{ +export type BotBlocklist = { + key: string; + pattern: string; // e.g. "Googlebot|Bingbot" + monitor: boolean; +}; + +export type Response = { blockedIPAddresses: IPList[]; allowedIPAddresses: IPList[]; - blockedUserAgents: string; -}> { + blockedUserAgents: BotBlocklist[]; +}; + +export async function fetchBlockedLists(token: Token): Promise { const baseUrl = getAPIURL(); const { body, statusCode } = await fetch({ url: new URL(`${baseUrl.toString()}api/runtime/firewall/lists`), @@ -20,6 +30,8 @@ export async function fetchBlockedLists(token: Token): Promise<{ headers: { // We need to set the Accept-Encoding header to "gzip" to receive the response in gzip format "Accept-Encoding": "gzip", + // Indicates to the server that this agent supports the new format with monitoring + "x-supports-monitoring": "true", Authorization: token.asString(), }, timeoutInMS: 60 * 1000, @@ -34,25 +46,16 @@ export async function fetchBlockedLists(token: Token): Promise<{ throw new Error(`Failed to fetch blocked lists: ${statusCode}`); } - const result: { - blockedIPAddresses: IPList[]; - allowedIPAddresses: IPList[]; - blockedUserAgents: string; - } = JSON.parse(body); - - return { - blockedIPAddresses: - result && Array.isArray(result.blockedIPAddresses) - ? result.blockedIPAddresses - : [], - allowedIPAddresses: - result && Array.isArray(result.allowedIPAddresses) - ? result.allowedIPAddresses - : [], - // Blocked user agents are stored as a string pattern for usage in a regex (e.g. "Googlebot|Bingbot") - blockedUserAgents: - result && typeof result.blockedUserAgents === "string" - ? result.blockedUserAgents - : "", - }; + const result: Response = JSON.parse(body); + + const validResponse = + Array.isArray(result.blockedIPAddresses) && + Array.isArray(result.allowedIPAddresses) && + Array.isArray(result.blockedUserAgents); + + if (!validResponse) { + throw new Error("Invalid response from fetchBlockedLists"); + } + + return result; } diff --git a/library/sources/HTTPServer.stats.test.ts b/library/sources/HTTPServer.stats.test.ts new file mode 100644 index 000000000..7c066527b --- /dev/null +++ b/library/sources/HTTPServer.stats.test.ts @@ -0,0 +1,213 @@ +import { Token } from "../agent/api/Token"; +import * as t from "tap"; +import { ReportingAPIForTesting } from "../agent/api/ReportingAPIForTesting"; +import { fetch } from "../helpers/fetch"; +import { wrap } from "../helpers/wrap"; +import { HTTPServer } from "./HTTPServer"; +import { createTestAgent } from "../helpers/createTestAgent"; +import type { Response } from "../agent/api/fetchBlockedLists"; +import * as fetchBlockedLists from "../agent/api/fetchBlockedLists"; + +// Before require("http") +const api = new ReportingAPIForTesting({ + success: true, + configUpdatedAt: 0, + allowedIPAddresses: [], + blockedUserIds: [], + endpoints: [], + heartbeatIntervalInMS: 10 * 60 * 1000, +}); + +const agent = createTestAgent({ + token: new Token("123"), + api, +}); + +agent.start([new HTTPServer()]); + +wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() { + return async function fetchBlockedLists(): Promise { + return { + allowedIPAddresses: [], + blockedIPAddresses: [ + { + key: "known_threat_actors/public_scanners", + monitor: true, + ips: ["1.2.3.4/32"], + source: "test", + description: "Test IP list", + }, + ], + blockedUserAgents: [ + { + key: "ai_data_scrapers", + monitor: true, + pattern: "GPTBot|Google-Extended", + }, + ], + } satisfies Response; + }; +}); + +t.setTimeout(30 * 1000); + +const http = require("http") as typeof import("http"); + +t.beforeEach(() => { + agent.getInspectionStatistics().reset(); +}); + +t.test("it tracks monitored user agents", async () => { + const server = http.createServer((req, res) => { + res.setHeader("Content-Type", "text/plain"); + res.end("OK"); + }); + + await new Promise((resolve) => { + server.listen(3327, () => { + Promise.all([ + fetch({ + url: new URL("http://localhost:3327/test"), + method: "GET", + headers: { + "user-agent": "GPTBot", + }, + timeoutInMS: 500, + }), + fetch({ + url: new URL("http://localhost:3327/test"), + method: "GET", + headers: { + "user-agent": "Google-Extended", + }, + timeoutInMS: 500, + }), + fetch({ + url: new URL("http://localhost:3327/test"), + method: "GET", + headers: { + "user-agent": "Regular Browser", + }, + timeoutInMS: 500, + }), + ]).then(([response1, response2, response3]) => { + t.equal(response1.statusCode, 200); + t.equal(response2.statusCode, 200); + t.equal(response3.statusCode, 200); + const stats = agent.getInspectionStatistics().getStats(); + t.same(stats.userAgents, { + breakdown: { + // eslint-disable-next-line camelcase + ai_data_scrapers: { total: 2, blocked: 0 }, + }, + }); + t.same(stats.ipAddresses, { + breakdown: {}, + }); + server.close(); + resolve(); + }); + }); + }); +}); + +t.test("it tracks monitored IP addresses", async () => { + const server = http.createServer((req, res) => { + res.setHeader("Content-Type", "text/plain"); + res.end("OK"); + }); + + await new Promise((resolve) => { + server.listen(3328, () => { + Promise.all([ + fetch({ + url: new URL("http://localhost:3328/test"), + method: "GET", + headers: { + "x-forwarded-for": "1.2.3.4", + }, + timeoutInMS: 500, + }), + fetch({ + url: new URL("http://localhost:3328/test"), + method: "GET", + headers: { + "x-forwarded-for": "5.6.7.8", + }, + timeoutInMS: 500, + }), + ]).then(([response1, response2]) => { + t.equal(response1.statusCode, 200); + t.equal(response2.statusCode, 200); + const stats = agent.getInspectionStatistics().getStats(); + t.same(stats.userAgents, { + breakdown: {}, + }); + t.same(stats.ipAddresses, { + breakdown: { + "known_threat_actors/public_scanners": { total: 1, blocked: 0 }, + }, + }); + server.close(); + resolve(); + }); + }); + }); +}); + +t.test("it only counts once if multiple listeners", async () => { + const server = http.createServer((req, res) => { + res.setHeader("Content-Type", "text/plain"); + res.end("OK"); + }); + + server.on("request", (req, res) => { + // This is a second listener + }); + + server.on("request", (req, res) => { + // This is a third listener + }); + + await new Promise((resolve) => { + server.listen(3329, () => { + Promise.all([ + fetch({ + url: new URL("http://localhost:3329/test"), + method: "GET", + headers: { + "user-agent": "GPTBot", + "x-forwarded-for": "1.2.3.4", + }, + timeoutInMS: 500, + }), + fetch({ + url: new URL("http://localhost:3329/test"), + method: "GET", + headers: { + "user-agent": "GPTBot", + "x-forwarded-for": "1.2.3.4", + }, + timeoutInMS: 500, + }), + ]).then(() => { + const { userAgents, ipAddresses } = agent + .getInspectionStatistics() + .getStats(); + t.same(userAgents, { + breakdown: { + // eslint-disable-next-line camelcase + ai_data_scrapers: { total: 2, blocked: 0 }, + }, + }); + t.same(ipAddresses, { + breakdown: { + "known_threat_actors/public_scanners": { total: 2, blocked: 0 }, + }, + }); + server.close(); + resolve(); + }); + }); + }); +}); diff --git a/library/sources/HTTPServer.test.ts b/library/sources/HTTPServer.test.ts index 659cd72d0..de099d93a 100644 --- a/library/sources/HTTPServer.test.ts +++ b/library/sources/HTTPServer.test.ts @@ -8,7 +8,7 @@ import { wrap } from "../helpers/wrap"; import { HTTPServer } from "./HTTPServer"; import { join } from "path"; import { createTestAgent } from "../helpers/createTestAgent"; -import type { IPList } from "../agent/api/fetchBlockedLists"; +import type { Response } from "../agent/api/fetchBlockedLists"; import * as fetchBlockedLists from "../agent/api/fetchBlockedLists"; import { mkdtemp, writeFile, unlink } from "fs/promises"; import { exec } from "child_process"; @@ -53,20 +53,20 @@ const agent = createTestAgent({ agent.start([new HTTPServer(), new FileSystem(), new Path()]); wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() { - return async function fetchBlockedLists(): Promise<{ - blockedIPAddresses: IPList[]; - blockedUserAgents: string; - }> { + return async function fetchBlockedLists(): Promise { return { + allowedIPAddresses: [], blockedIPAddresses: [ { + key: "geoip/Belgium;BE", source: "geoip", ips: ["9.9.9.9"], description: "geo restrictions", + monitor: false, }, ], - blockedUserAgents: "", - }; + blockedUserAgents: [], + } satisfies Response; }; }); diff --git a/library/sources/Hono.allowedIPAddresses.test.ts b/library/sources/Hono.allowedIPAddresses.test.ts index f51ce81b4..55c7e7f90 100644 --- a/library/sources/Hono.allowedIPAddresses.test.ts +++ b/library/sources/Hono.allowedIPAddresses.test.ts @@ -8,6 +8,7 @@ import { HTTPServer } from "./HTTPServer"; import { getMajorNodeVersion } from "../helpers/getNodeVersion"; import { createTestAgent } from "../helpers/createTestAgent"; import * as fetch from "../helpers/fetch"; +import { Response } from "../agent/api/fetchBlockedLists"; wrap(fetch, "fetch", function mock(original) { return async function mock(this: typeof fetch) { @@ -21,20 +22,30 @@ wrap(fetch, "fetch", function mock(original) { body: JSON.stringify({ blockedIPAddresses: [ { + key: "geoip/Belgium;BE", source: "geoip", description: "geo restrictions", ips: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], + monitor: false, + }, + ], + blockedUserAgents: [ + { + key: "hacker", + monitor: false, + pattern: "hacker|attacker", }, ], - blockedUserAgents: "hacker|attacker", allowedIPAddresses: [ { + key: "geoip/Belgium;BE", source: "geoip", description: "geo restrictions", ips: ["4.3.2.1"], + monitor: false, }, ], - }), + } satisfies Response), }; } diff --git a/library/sources/Hono.test.ts b/library/sources/Hono.test.ts index 02aad4e60..cc386eb6e 100644 --- a/library/sources/Hono.test.ts +++ b/library/sources/Hono.test.ts @@ -1,5 +1,6 @@ /* eslint-disable prefer-rest-params */ import * as t from "tap"; +import type { Response } from "../agent/api/fetchBlockedLists"; import { ReportingAPIForTesting } from "../agent/api/ReportingAPIForTesting"; import { Token } from "../agent/api/Token"; import { setUser } from "../agent/context/user"; @@ -25,14 +26,22 @@ wrap(fetch, "fetch", function mock(original) { body: JSON.stringify({ blockedIPAddresses: [ { + key: "geoip/Belgium;BE", source: "geoip", description: "geo restrictions", ips: ["1.3.2.0/24", "e98c:a7ba:2329:8c69::/64"], + monitor: false, }, ], - blockedUserAgents: "hacker|attacker", allowedIPAddresses: [], - }), + blockedUserAgents: [ + { + key: "hackers", + pattern: "hacker|attacker", + monitor: false, + }, + ], + } satisfies Response), }; } diff --git a/library/sources/Lambda.test.ts b/library/sources/Lambda.test.ts index 2b216f5c6..b7bb2dd42 100644 --- a/library/sources/Lambda.test.ts +++ b/library/sources/Lambda.test.ts @@ -308,6 +308,12 @@ t.test("it sends heartbeat after first and every 10 minutes", async () => { blocked: 0, }, }, + userAgents: { + breakdown: {}, + }, + ipAddresses: { + breakdown: {}, + }, }, middlewareInstalled: false, }, diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index 527808c4c..de6959a18 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -5,13 +5,16 @@ import { getContext } from "../../agent/Context"; import { escapeHTML } from "../../helpers/escapeHTML"; import { ipAllowedToAccessRoute } from "./ipAllowedToAccessRoute"; +const checkedBlocks = Symbol("__zen_checked_blocks__"); + /** - * Inspects the IP address of the request: + * Inspects the IP address and user agent of the request: * - Whether the IP address is blocked by an IP blocklist (e.g. Geo restrictions) * - Whether the IP address is allowed to access the current route (e.g. Admin panel) + * - Whether the user agent is blocked by a user agent blocklist */ export function checkIfRequestIsBlocked( - res: ServerResponse, + res: ServerResponse & { [checkedBlocks]?: boolean }, agent: Agent ): boolean { if (res.headersSent) { @@ -26,6 +29,14 @@ export function checkIfRequestIsBlocked( return false; } + if (res[checkedBlocks]) { + return false; + } + + // We don't need to check again if the request has already been checked + // Also ensures that the statistics are only counted once + res[checkedBlocks] = true; + if (!ipAllowedToAccessRoute(context, agent)) { res.statusCode = 403; res.setHeader("Content-Type", "text/plain"); @@ -65,37 +76,40 @@ export function checkIfRequestIsBlocked( return true; } - const result = context.remoteAddress - ? agent.getConfig().isIPAddressBlocked(context.remoteAddress) - : ({ blocked: false } as const); + const blockedIPs = context.remoteAddress + ? agent.getConfig().getBlockedIPAddresses(context.remoteAddress) + : []; - if (result.blocked) { + agent.getInspectionStatistics().onIPAddressMatches(blockedIPs); + const blockingMatch = blockedIPs.find((match) => !match.monitor); + + if (blockingMatch) { res.statusCode = 403; res.setHeader("Content-Type", "text/plain"); - let message = `Your IP address is blocked due to ${escapeHTML(result.reason)}.`; + let message = `Your IP address is blocked due to ${escapeHTML(blockingMatch.reason)}.`; if (context.remoteAddress) { message += ` (Your IP: ${escapeHTML(context.remoteAddress)})`; } res.end(message); - return true; } - const isUserAgentBlocked = + const blockedUserAgents = context.headers && typeof context.headers["user-agent"] === "string" - ? agent.getConfig().isUserAgentBlocked(context.headers["user-agent"]) - : ({ blocked: false } as const); + ? agent.getConfig().getBlockedUserAgents(context.headers["user-agent"]) + : []; - if (isUserAgentBlocked.blocked) { + agent.getInspectionStatistics().onUserAgentMatches(blockedUserAgents); + + if (blockedUserAgents.find((match) => !match.monitor)) { res.statusCode = 403; res.setHeader("Content-Type", "text/plain"); res.end( "You are not allowed to access this resource because you have been identified as a bot." ); - return true; } diff --git a/library/sources/http-server/createRequestListener.ts b/library/sources/http-server/createRequestListener.ts index 5bf2693d2..70cee60bd 100644 --- a/library/sources/http-server/createRequestListener.ts +++ b/library/sources/http-server/createRequestListener.ts @@ -75,19 +75,19 @@ function callListenerWithContext( const countedRequest = Symbol("__zen_request_counted__"); function createOnFinishRequestHandler( - req: IncomingMessage, + req: IncomingMessage & { [countedRequest]?: boolean }, res: ServerResponse, agent: Agent ) { return function onFinishRequest() { - if ((req as any)[countedRequest]) { + if (req[countedRequest]) { // The request has already been counted // This might happen if the server has multiple listeners return; } // Mark the request as counted - (req as any)[countedRequest] = true; + req[countedRequest] = true; const context = getContext();