Skip to content

Commit 49ce854

Browse files
Copilotjstirnaman
andauthored
Fix PR Preview skipping when layout changes include wildcard URL patterns (#6725)
* Initial plan * Fix: Strip wildcards from URL paths in PR Preview detection - Update normalizeUrlPath() to remove asterisk wildcards - Collapse multiple consecutive slashes after wildcard removal - Add backtick as valid URL delimiter for code-wrapped URLs - Add comprehensive test cases for wildcard handling - Update backtick test to reflect safer truncation behavior Fixes issue where PR descriptions with wildcard patterns like `/influxdb3/enterprise/*` were not properly extracted, causing PR Preview to skip even when URLs were provided. Co-authored-by: jstirnaman <212227+jstirnaman@users.noreply.github.com> * docs: Clarify backtick handling in URL validation Add comment explaining that backticks act as delimiters in regex extraction, preventing them from appearing in extracted paths even though they're in the rejection pattern. Co-authored-by: jstirnaman <212227+jstirnaman@users.noreply.github.com> * docs: Improve comments explaining normalization and regex logic - Clarify why wildcards are removed before slash collapsing - Document the defense-in-depth backtick handling - Add examples of the normalization process Co-authored-by: jstirnaman <212227+jstirnaman@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: jstirnaman <212227+jstirnaman@users.noreply.github.com>
1 parent d7824de commit 49ce854

File tree

2 files changed

+52
-7
lines changed

2 files changed

+52
-7
lines changed

.github/scripts/parse-pr-urls.js

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@ function isValidUrlPath(path) {
5151
// Reject path traversal attempts
5252
if (path.includes('..')) return false;
5353

54-
// Reject paths with suspicious characters (includes ' to prevent JS injection)
54+
// Reject paths with suspicious characters
55+
// Note: Backticks are in this list, but the extraction regex stops AT backticks,
56+
// so they act as delimiters rather than being included in paths
57+
// (includes ' to prevent JS injection)
5558
if (/[<>"|{}`\\^[\]']/.test(path)) return false;
5659

5760
// Reject URL-encoded characters (potential encoding attacks)
@@ -73,9 +76,13 @@ function isValidUrlPath(path) {
7376
function buildRelativePattern() {
7477
const namespaceAlternation = PRODUCT_NAMESPACES.join('|');
7578
// Match relative paths starting with known product prefixes
76-
// Also captures paths in markdown links: [text](/influxdb3/core/)
79+
// Captures paths in various contexts: markdown links, parentheses, backticks, etc.
80+
// Delimiters: start of string, whitespace, ], ), (, or `
81+
// Note: Backtick appears in both the delimiter list and negated character class
82+
// for defense-in-depth - delimiter stops extraction, character class prevents
83+
// any edge cases where backticks might slip through
7784
return new RegExp(
78-
`(?:^|\\s|\\]|\\)|\\()(\\/(?:${namespaceAlternation})[^\\s)\\]>"']*)`,
85+
`(?:^|\\s|\\]|\\)|\\(|\`)(\\/(?:${namespaceAlternation})[^\\s)\\]>"'\`]*)`,
7986
'gm'
8087
);
8188
}
@@ -130,14 +137,20 @@ export function extractDocsUrls(text) {
130137
/**
131138
* Normalize URL path to consistent format
132139
* @param {string} urlPath - URL path to normalize
133-
* @returns {string} - Normalized path with trailing slash
140+
* @returns {string} - Normalized path with trailing slash, wildcards stripped
134141
*/
135142
function normalizeUrlPath(urlPath) {
136143
// Remove anchor fragments
137144
let normalized = urlPath.split('#')[0];
138145
// Remove query strings
139146
normalized = normalized.split('?')[0];
140-
// Ensure trailing slash
147+
// Remove wildcard characters (* is often used to indicate "all pages")
148+
// Do this BEFORE collapsing slashes to handle patterns like /path/*/
149+
normalized = normalized.replace(/\*/g, '');
150+
// Collapse multiple consecutive slashes into single slash
151+
// This handles cases like /path/*/ → /path// → /path/
152+
normalized = normalized.replace(/\/+/g, '/');
153+
// Ensure trailing slash (important for Hugo's URL structure)
141154
if (!normalized.endsWith('/')) {
142155
normalized += '/';
143156
}

.github/scripts/test-parse-pr-urls.js

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,12 @@ test('Special characters: pipes and brackets', () => {
140140
assertEquals(result, [], 'Should reject paths with curly braces');
141141
});
142142

143-
test('Special characters: backticks', () => {
143+
test('Special characters: backticks are delimiters', () => {
144+
// Backticks act as delimiters, stopping URL extraction
145+
// This prevents command substitution injection
144146
const text = '/influxdb3/`whoami`/';
145147
const result = extractDocsUrls(text);
146-
assertEquals(result, [], 'Should reject paths with backticks');
148+
assertEquals(result, ['/influxdb3/'], 'Should truncate at backtick delimiter');
147149
});
148150

149151
test('Special characters: single quotes truncate at extraction', () => {
@@ -252,6 +254,36 @@ test('Normalization: removes query string', () => {
252254
assertEquals(result, ['/influxdb3/core/'], 'Should remove query string');
253255
});
254256

257+
test('Normalization: strips wildcard from path', () => {
258+
const text = '/influxdb3/enterprise/*';
259+
const result = extractDocsUrls(text);
260+
assertEquals(result, ['/influxdb3/enterprise/'], 'Should strip wildcard character');
261+
});
262+
263+
test('Normalization: strips wildcard in middle of path', () => {
264+
const text = '/influxdb3/*/admin/';
265+
const result = extractDocsUrls(text);
266+
assertEquals(result, ['/influxdb3/admin/'], 'Should strip wildcard from middle of path');
267+
});
268+
269+
test('Normalization: strips multiple wildcards', () => {
270+
const text = '/influxdb3/*/admin/*';
271+
const result = extractDocsUrls(text);
272+
assertEquals(result, ['/influxdb3/admin/'], 'Should strip all wildcard characters');
273+
});
274+
275+
test('Wildcard in markdown-style notation', () => {
276+
const text = '**InfluxDB 3 Enterprise pages** (`/influxdb3/enterprise/*`)';
277+
const result = extractDocsUrls(text);
278+
assertEquals(result, ['/influxdb3/enterprise/'], 'Should extract and normalize path with wildcard in backticks');
279+
});
280+
281+
test('Wildcard in parentheses', () => {
282+
const text = 'Affects pages under (/influxdb3/enterprise/*)';
283+
const result = extractDocsUrls(text);
284+
assertEquals(result, ['/influxdb3/enterprise/'], 'Should extract and normalize path with wildcard in parentheses');
285+
});
286+
255287
// Test deduplication
256288
test('Deduplication: same URL multiple times', () => {
257289
const text = `

0 commit comments

Comments
 (0)