Fix PR Preview skipping when layout changes include wildcard URL patterns (#6725)

Copilot · jstirnaman · web-flow · commit 49ce854fc1eb · 2026-01-13T21:51:28.000-06:00
* Initial plan

* Fix: Strip wildcards from URL paths in PR Preview detection

- Update normalizeUrlPath() to remove asterisk wildcards
- Collapse multiple consecutive slashes after wildcard removal
- Add backtick as valid URL delimiter for code-wrapped URLs
- Add comprehensive test cases for wildcard handling
- Update backtick test to reflect safer truncation behavior

Fixes issue where PR descriptions with wildcard patterns like
`/influxdb3/enterprise/*` were not properly extracted, causing
PR Preview to skip even when URLs were provided.

Co-authored-by: jstirnaman &lt;212227+jstirnaman@users.noreply.github.com&gt;

* docs: Clarify backtick handling in URL validation

Add comment explaining that backticks act as delimiters in regex
extraction, preventing them from appearing in extracted paths even
though they're in the rejection pattern.

Co-authored-by: jstirnaman &lt;212227+jstirnaman@users.noreply.github.com&gt;

* docs: Improve comments explaining normalization and regex logic

- Clarify why wildcards are removed before slash collapsing
- Document the defense-in-depth backtick handling
- Add examples of the normalization process

Co-authored-by: jstirnaman &lt;212227+jstirnaman@users.noreply.github.com&gt;

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: jstirnaman &lt;212227+jstirnaman@users.noreply.github.com&gt;
diff --git a/.github/scripts/parse-pr-urls.js b/.github/scripts/parse-pr-urls.js
@@ -51,7 +51,10 @@ function isValidUrlPath(path) {
   // Reject path traversal attempts
   if (path.includes('..')) return false;
 
-  // Reject paths with suspicious characters (includes ' to prevent JS injection)
+  // Reject paths with suspicious characters
+  // Note: Backticks are in this list, but the extraction regex stops AT backticks,
+  // so they act as delimiters rather than being included in paths
+  // (includes ' to prevent JS injection)
   if (/[<>"|{}`\\^[\]']/.test(path)) return false;
 
   // Reject URL-encoded characters (potential encoding attacks)
@@ -73,9 +76,13 @@ function isValidUrlPath(path) {
 function buildRelativePattern() {
   const namespaceAlternation = PRODUCT_NAMESPACES.join('|');
   // Match relative paths starting with known product prefixes
-  // Also captures paths in markdown links: [text](/influxdb3/core/)
+  // Captures paths in various contexts: markdown links, parentheses, backticks, etc.
+  // Delimiters: start of string, whitespace, ], ), (, or `
+  // Note: Backtick appears in both the delimiter list and negated character class
+  // for defense-in-depth - delimiter stops extraction, character class prevents
+  // any edge cases where backticks might slip through
   return new RegExp(
-    `(?:^|\\s|\\]|\\)|\\()(\\/(?:${namespaceAlternation})[^\\s)\\]>"']*)`,
+    `(?:^|\\s|\\]|\\)|\\(|\`)(\\/(?:${namespaceAlternation})[^\\s)\\]>"'\`]*)`,
     'gm'
   );
 }
@@ -130,14 +137,20 @@ export function extractDocsUrls(text) {
 /**
  * Normalize URL path to consistent format
  * @param {string} urlPath - URL path to normalize
- * @returns {string} - Normalized path with trailing slash
+ * @returns {string} - Normalized path with trailing slash, wildcards stripped
  */
 function normalizeUrlPath(urlPath) {
   // Remove anchor fragments
   let normalized = urlPath.split('#')[0];
   // Remove query strings
   normalized = normalized.split('?')[0];
-  // Ensure trailing slash
+  // Remove wildcard characters (* is often used to indicate "all pages")
+  // Do this BEFORE collapsing slashes to handle patterns like /path/*/
+  normalized = normalized.replace(/\*/g, '');
+  // Collapse multiple consecutive slashes into single slash
+  // This handles cases like /path/*/ → /path// → /path/
+  normalized = normalized.replace(/\/+/g, '/');
+  // Ensure trailing slash (important for Hugo's URL structure)
   if (!normalized.endsWith('/')) {
     normalized += '/';
   }
diff --git a/.github/scripts/test-parse-pr-urls.js b/.github/scripts/test-parse-pr-urls.js
@@ -140,10 +140,12 @@ test('Special characters: pipes and brackets', () => {
   assertEquals(result, [], 'Should reject paths with curly braces');
 });
 
-test('Special characters: backticks', () => {
+test('Special characters: backticks are delimiters', () => {
+  // Backticks act as delimiters, stopping URL extraction
+  // This prevents command substitution injection
   const text = '/influxdb3/`whoami`/';
   const result = extractDocsUrls(text);
-  assertEquals(result, [], 'Should reject paths with backticks');
+  assertEquals(result, ['/influxdb3/'], 'Should truncate at backtick delimiter');
 });
 
 test('Special characters: single quotes truncate at extraction', () => {
@@ -252,6 +254,36 @@ test('Normalization: removes query string', () => {
   assertEquals(result, ['/influxdb3/core/'], 'Should remove query string');
 });
 
+test('Normalization: strips wildcard from path', () => {
+  const text = '/influxdb3/enterprise/*';
+  const result = extractDocsUrls(text);
+  assertEquals(result, ['/influxdb3/enterprise/'], 'Should strip wildcard character');
+});
+
+test('Normalization: strips wildcard in middle of path', () => {
+  const text = '/influxdb3/*/admin/';
+  const result = extractDocsUrls(text);
+  assertEquals(result, ['/influxdb3/admin/'], 'Should strip wildcard from middle of path');
+});
+
+test('Normalization: strips multiple wildcards', () => {
+  const text = '/influxdb3/*/admin/*';
+  const result = extractDocsUrls(text);
+  assertEquals(result, ['/influxdb3/admin/'], 'Should strip all wildcard characters');
+});
+
+test('Wildcard in markdown-style notation', () => {
+  const text = '**InfluxDB 3 Enterprise pages** (`/influxdb3/enterprise/*`)';
+  const result = extractDocsUrls(text);
+  assertEquals(result, ['/influxdb3/enterprise/'], 'Should extract and normalize path with wildcard in backticks');
+});
+
+test('Wildcard in parentheses', () => {
+  const text = 'Affects pages under (/influxdb3/enterprise/*)';
+  const result = extractDocsUrls(text);
+  assertEquals(result, ['/influxdb3/enterprise/'], 'Should extract and normalize path with wildcard in parentheses');
+});
+
 // Test deduplication
 test('Deduplication: same URL multiple times', () => {
   const text = `