Skip to content

Commit 4899569

Browse files
committed
Prepare bot spoofing protection
1 parent 916e44a commit 4899569

File tree

7 files changed

+213
-3
lines changed

7 files changed

+213
-3
lines changed

library/agent/Agent.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,11 +378,16 @@ export class Agent {
378378
}
379379

380380
try {
381-
const { blockedIPAddresses, blockedUserAgents, allowedIPAddresses } =
382-
await fetchBlockedLists(this.token);
381+
const {
382+
blockedIPAddresses,
383+
blockedUserAgents,
384+
allowedIPAddresses,
385+
botSpoofingData,
386+
} = await fetchBlockedLists(this.token);
383387
this.serviceConfig.updateBlockedIPAddresses(blockedIPAddresses);
384388
this.serviceConfig.updateBlockedUserAgents(blockedUserAgents);
385389
this.serviceConfig.updateAllowedIPAddresses(allowedIPAddresses);
390+
this.serviceConfig.updateBotSpoofingData(botSpoofingData);
386391
} catch (error: any) {
387392
console.error(`Aikido: Failed to update blocked lists: ${error.message}`);
388393
}

library/agent/ServiceConfig.ts

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,14 @@ import { IPMatcher } from "../helpers/ip-matcher/IPMatcher";
22
import { LimitedContext, matchEndpoints } from "../helpers/matchEndpoints";
33
import { isPrivateIP } from "../vulnerabilities/ssrf/isPrivateIP";
44
import type { Endpoint, EndpointConfig } from "./Config";
5-
import { IPList } from "./api/fetchBlockedLists";
5+
import { BotSpoofingData, IPList } from "./api/fetchBlockedLists";
6+
7+
export type ServiceConfigBotSpoofingData = {
8+
key: string;
9+
uaPattern: RegExp;
10+
ips: IPMatcher | undefined;
11+
hostnames: string[];
12+
};
613

714
export class ServiceConfig {
815
private blockedUserIds: Map<string, string> = new Map();
@@ -19,6 +26,7 @@ export class ServiceConfig {
1926
allowlist: IPMatcher;
2027
description: string;
2128
}[] = [];
29+
private botSpoofingData: ServiceConfigBotSpoofingData[] = [];
2230

2331
constructor(
2432
endpoints: EndpointConfig[],
@@ -208,4 +216,26 @@ export class ServiceConfig {
208216
hasReceivedAnyStats() {
209217
return this.receivedAnyStats;
210218
}
219+
220+
updateBotSpoofingData(data: BotSpoofingData[]) {
221+
this.botSpoofingData = [];
222+
223+
for (const source of data) {
224+
// Skip empty
225+
if (source.ips.length === 0 && source.hostnames.length === 0) {
226+
continue;
227+
}
228+
229+
this.botSpoofingData.push({
230+
key: source.key,
231+
uaPattern: new RegExp(source.uaPattern, "i"),
232+
ips: new IPMatcher(source.ips),
233+
hostnames: source.hostnames,
234+
});
235+
}
236+
}
237+
238+
getBotSpoofingData() {
239+
return this.botSpoofingData;
240+
}
211241
}

library/agent/api/fetchBlockedLists.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,18 @@ export type IPList = {
88
ips: string[];
99
};
1010

11+
export type BotSpoofingData = {
12+
key: string;
13+
uaPattern: string;
14+
ips: string[];
15+
hostnames: string[];
16+
};
17+
1118
export async function fetchBlockedLists(token: Token): Promise<{
1219
blockedIPAddresses: IPList[];
1320
allowedIPAddresses: IPList[];
1421
blockedUserAgents: string;
22+
botSpoofingData: BotSpoofingData[];
1523
}> {
1624
const baseUrl = getAPIURL();
1725
const { body, statusCode } = await fetch({
@@ -38,6 +46,7 @@ export async function fetchBlockedLists(token: Token): Promise<{
3846
blockedIPAddresses: IPList[];
3947
allowedIPAddresses: IPList[];
4048
blockedUserAgents: string;
49+
botSpoofingData: BotSpoofingData[];
4150
} = JSON.parse(body);
4251

4352
return {
@@ -54,5 +63,9 @@ export async function fetchBlockedLists(token: Token): Promise<{
5463
result && typeof result.blockedUserAgents === "string"
5564
? result.blockedUserAgents
5665
: "",
66+
botSpoofingData:
67+
result && Array.isArray(result.botSpoofingData)
68+
? result.botSpoofingData
69+
: [],
5770
};
5871
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import type { Agent } from "../../agent/Agent";
2+
import type { Context } from "../../agent/Context";
3+
import { verifyBotAuthenticity } from "./verifyBotAuthenticity";
4+
5+
export function checkRequestForBotSpoofing(context: Context, agent: Agent) {
6+
const botSpoofingData = agent.getConfig().getBotSpoofingData();
7+
8+
if (!botSpoofingData || botSpoofingData.length === 0) {
9+
return false;
10+
}
11+
12+
const userAgent = context.headers["user-agent"];
13+
const ip = context.remoteAddress;
14+
15+
if (!ip) {
16+
return false;
17+
}
18+
19+
if (typeof userAgent !== "string" || userAgent.length === 0) {
20+
return false;
21+
}
22+
23+
// Check if the user agent matches any of the bot spoofing patterns
24+
const matchingBot = botSpoofingData.find((data) =>
25+
data.uaPattern.test(userAgent)
26+
);
27+
28+
if (!matchingBot) {
29+
// The request is not from a protected bot
30+
return false;
31+
}
32+
33+
return verifyBotAuthenticity(ip, matchingBot);
34+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import * as t from "tap";
2+
import { verifyBotAuthenticity } from "./verifyBotAuthenticity";
3+
import { IPMatcher } from "../../helpers/ip-matcher/IPMatcher";
4+
5+
t.test("it works with a matching IP", async (t) => {
6+
const matchingBot = {
7+
key: "bot",
8+
uaPattern: /bot/i,
9+
ips: new IPMatcher(["123.123.0.0/16"]),
10+
hostnames: [],
11+
};
12+
13+
t.same(await verifyBotAuthenticity("", matchingBot), false);
14+
t.same(await verifyBotAuthenticity("1.2.3.4", matchingBot), false);
15+
t.same(await verifyBotAuthenticity("123.123.1.2", matchingBot), true);
16+
t.same(await verifyBotAuthenticity("123.123.123.123", matchingBot), true);
17+
});
18+
19+
t.test("it works with hostnames (googlebot)", async (t) => {
20+
const matchingBot = {
21+
key: "google_test",
22+
uaPattern: /Googlebot/i,
23+
ips: new IPMatcher(),
24+
hostnames: ["google.com", "googlebot.com"],
25+
};
26+
27+
t.same(await verifyBotAuthenticity("1.1.1.1", matchingBot), false);
28+
t.same(await verifyBotAuthenticity("66.249.90.77", matchingBot), true);
29+
});
30+
31+
t.test("it works with hostnames (bingbot)", async (t) => {
32+
const matchingBot = {
33+
key: "bing_test",
34+
uaPattern: /207.46.13.14/i,
35+
ips: new IPMatcher(),
36+
hostnames: ["search.msn.com", "bing.com"],
37+
};
38+
39+
t.same(await verifyBotAuthenticity("1.1.1.1", matchingBot), false);
40+
t.same(await verifyBotAuthenticity("207.46.13.14", matchingBot), true);
41+
});
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import type { ServiceConfigBotSpoofingData } from "../../agent/ServiceConfig";
2+
import { verifyBotAuthenticityWithDNS } from "./verifyBotAuthenticityWithDNS";
3+
4+
export async function verifyBotAuthenticity(
5+
requestIp: string,
6+
matchingBot: ServiceConfigBotSpoofingData
7+
) {
8+
// Check if the IP address matches any of the whitelisted IP addresses
9+
if (matchingBot.ips) {
10+
if (matchingBot.ips.has(requestIp)) {
11+
return true;
12+
}
13+
}
14+
15+
if (matchingBot.hostnames.length > 0) {
16+
// Check if the hostname matches any of the whitelisted hostnames
17+
if (await verifyBotAuthenticityWithDNS(requestIp, matchingBot)) {
18+
// Todo cache
19+
return true;
20+
}
21+
}
22+
23+
return false;
24+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import { resolve, reverse } from "dns/promises";
2+
import type { ServiceConfigBotSpoofingData } from "../../agent/ServiceConfig";
3+
import { getInstance } from "../../agent/AgentSingleton";
4+
5+
/**
6+
* Checks the authenticity of a bot by performing a reverse DNS lookup on the request IP address.
7+
* Returns true if the bot is authentic, false otherwise.
8+
*
9+
* Example:
10+
*
11+
* 1. Do a reverse DNS lookup of the request ip
12+
* e.g. `54.236.1.12` → crawl-54-236-1-12.pinterest.com
13+
* 2. Check if the domain matches the expected origin
14+
* e.g. Domain is pinterest.com or pinterestcrawler.com for `Pinterestbot`
15+
* 3. Because PTR records can be spoofed, also lookup the A and AAAA record and compare them with the request ip:
16+
* e.g. `crawl-54-236-1-12.pinterest.com` → `54.236.1.12`
17+
*/
18+
export async function verifyBotAuthenticityWithDNS(
19+
requestIp: string,
20+
matchingBot: ServiceConfigBotSpoofingData
21+
) {
22+
try {
23+
// Send a reverse DNS lookup request
24+
const hostnames = await reverse(requestIp);
25+
if (!Array.isArray(hostnames)) {
26+
// No PTR record found
27+
return false;
28+
}
29+
30+
// Filter out hostnames that don't end with any of the whitelisted hostnames
31+
const matchingHostnames = hostnames.filter((hostname) =>
32+
matchingBot.hostnames.some((whitelistedHostname) =>
33+
// Check if the hostname ends with the whitelisted hostname
34+
hostname.endsWith(`.${whitelistedHostname}`)
35+
)
36+
);
37+
38+
if (matchingHostnames.length === 0) {
39+
// No matching hostnames found, so the bot is not authentic
40+
return false;
41+
}
42+
43+
const rrType = requestIp.includes(":") ? "AAAA" : "A";
44+
45+
// Check if the IP address matches any of the A or AAAA records for the matching hostnames
46+
for (const hostname of matchingHostnames) {
47+
const addresses = await resolve(hostname, rrType);
48+
if (!Array.isArray(addresses)) {
49+
// No A or AAAA records found
50+
continue;
51+
}
52+
if (addresses.some((address) => address === requestIp)) {
53+
// The IP address matches the A or AAAA record for the hostname
54+
return true;
55+
}
56+
}
57+
58+
return false;
59+
} catch (error) {
60+
getInstance()?.log(`Bot Spoofing Protection: DNS check failed: ${error}`);
61+
return true; // Fallback to true on error to prevent blocking legitimate requests
62+
}
63+
}

0 commit comments

Comments
 (0)