Skip to content

Commit da4caab

Browse files
authored
try to detect external sources without using host (#57033)
1 parent 8e524ba commit da4caab

File tree

2 files changed

+76
-23
lines changed

2 files changed

+76
-23
lines changed

src/search/components/helpers/execute-search-actions.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ export async function executeAISearch(version: string, query: string, debug = fa
4949
const body = {
5050
query,
5151
version,
52+
client_name: 'docs.github.com-client',
5253
...(debug && { debug: '1' }),
5354
}
5455

@@ -80,6 +81,9 @@ export async function executeCombinedSearch(
8081
params.set('debug', '1')
8182
}
8283

84+
// Add client_name to identify requests from our frontend
85+
params.set('client_name', 'docs.github.com-client')
86+
8387
// Always fetch 4 results for autocomplete
8488
params.set('size', '4')
8589

src/search/lib/helpers/external-search-analytics.ts

Lines changed: 72 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,27 +12,40 @@ export async function handleExternalSearchAnalytics(
1212
const host = req.headers['x-host'] || req.headers.host
1313
const normalizedHost = stripPort(host as string)
1414

15-
// Skip analytics entirely for production and internal staging environments
16-
if (
17-
normalizedHost === 'docs.github.com' ||
18-
normalizedHost.endsWith('.github.net') ||
19-
normalizedHost.endsWith('.githubapp.com')
20-
) {
21-
return null
22-
}
15+
// Check if this is likely an external API call rather than a browser request
16+
const isLikelyExternalAPI = isExternalAPIRequest(req)
2317

24-
// For localhost, send analytics but auto-set client_name if not provided
18+
// Get client_name from query or body
2519
let client_name = req.query.client_name || req.body?.client_name
26-
if (normalizedHost === 'localhost' && !client_name) {
27-
client_name = 'localhost'
20+
21+
// Rule 1: Skip analytics for browser requests from our own frontend
22+
if (!isLikelyExternalAPI && client_name === 'docs.github.com-client') {
23+
return null
2824
}
2925

30-
// For all other external requests, require explicit client_name
31-
if (!client_name) {
32-
return {
33-
status: 400,
34-
error: "Missing required parameter 'client_name' for external requests",
26+
// Rule 2: Send analytics for any request with a client_name that's not 'docs.github.com-client'
27+
// (This includes partner APIs and other external clients)
28+
if (client_name && client_name !== 'docs.github.com-client') {
29+
// Analytics will be sent at the end of this function
30+
}
31+
// Rule 3: For requests without client_name, require it for external API requests
32+
else if (!client_name) {
33+
if (isLikelyExternalAPI) {
34+
return {
35+
status: 400,
36+
error: "Missing required parameter 'client_name' for external requests",
37+
}
3538
}
39+
// For browser requests without client_name to internal environments, skip analytics
40+
else if (normalizedHost.endsWith('.github.net') || normalizedHost.endsWith('.githubapp.com')) {
41+
return null
42+
}
43+
// For localhost development without client_name, we'll still send analytics below
44+
}
45+
46+
// For localhost, ensure we have a client_name for analytics
47+
if (normalizedHost === 'localhost' && !client_name) {
48+
client_name = 'localhost'
3649
}
3750

3851
// Send search event with client identifier
@@ -71,19 +84,16 @@ export async function handleExternalSearchAnalytics(
7184

7285
/**
7386
* Determines if a host should bypass client_name requirement for analytics
74-
* Returns true if the host is docs.github.com or ends with github.net or githubapp.com
75-
* (for production and internal staging environments)
87+
* Returns true if the host ends with github.net or githubapp.com
88+
* (for internal staging environments)
89+
* Note: docs.github.com is removed since normalizedHost will always be docs.github.com in production
7690
* Note: localhost is NOT included here as it should send analytics with auto-set client_name
7791
*/
7892
export function shouldBypassClientNameRequirement(host: string | undefined): boolean {
7993
if (!host) return false
8094

8195
const normalizedHost = stripPort(host)
82-
return (
83-
normalizedHost === 'docs.github.com' ||
84-
normalizedHost.endsWith('.github.net') ||
85-
normalizedHost.endsWith('.githubapp.com')
86-
)
96+
return normalizedHost.endsWith('.github.net') || normalizedHost.endsWith('.githubapp.com')
8797
}
8898

8999
/**
@@ -93,3 +103,42 @@ function stripPort(host: string): string {
93103
const [hostname] = host.split(':')
94104
return hostname
95105
}
106+
107+
interface ExternalAPIRequestLike {
108+
headers: Record<string, string | undefined>
109+
}
110+
111+
/**
112+
* Determines if a request is likely from an external API client rather than a browser
113+
* Uses multiple heuristics to detect programmatic vs browser requests
114+
*/
115+
const userAgentRegex = /^(curl|wget|python-requests|axios|node-fetch|Go-http-client|okhttp)/i
116+
function isExternalAPIRequest(req: ExternalAPIRequestLike): boolean {
117+
const headers = req.headers
118+
119+
// Browser security headers that APIs typically don't send
120+
const hasSecFetchHeaders = headers['sec-fetch-site'] || headers['sec-fetch-mode']
121+
const hasClientHints = headers['sec-ch-ua'] || headers['sec-ch-ua-mobile']
122+
123+
// Browsers typically request HTML, APIs typically request JSON
124+
const acceptHeader = headers.accept || ''
125+
const prefersJson =
126+
acceptHeader.includes('application/json') && !acceptHeader.includes('text/html')
127+
128+
// Common API user agents (not exhaustive, but catches common cases)
129+
const userAgent = headers['user-agent'] || ''
130+
const hasAPIUserAgent = userAgentRegex.test(userAgent)
131+
132+
// If it has browser-specific headers, it's likely a browser
133+
if (hasSecFetchHeaders || hasClientHints) {
134+
return false
135+
}
136+
137+
// If it prefers JSON or has a common API user agent, it's likely an API
138+
if (prefersJson || hasAPIUserAgent) {
139+
return true
140+
}
141+
142+
// Default to treating it as a browser request to be conservative
143+
return false
144+
}

0 commit comments

Comments
 (0)