Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/enhance/docs-patterns.js
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ const docsPatterns = {
}

// Check for very long lists that could be tables
const longLists = content.match(/(?:^[-*]\s+.+\n){10,}/gm);
const longLists = content.match(/(?:^[-*][ \t]+\S[^\n]*\n){10,}/gm);
if (longLists) {
suggestions.push('Long lists (10+ items) might be more efficient as tables');
}
Expand Down
51 changes: 38 additions & 13 deletions lib/enhance/fixer.js
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ function applyAtPath(obj, pathStr, fixFn) {
const part = parts[i];
if (part.includes('[')) {
// Array access
const match = part.match(/(\w+)\[(\d+)\]/);
const match = part.match(/^((?!__proto__|constructor|prototype)[a-zA-Z_]\w*)\[(\d{1,10})\]$/);
if (match) {
current = current[match[1]][parseInt(match[2], 10)];
}
Expand All @@ -190,7 +190,7 @@ function applyAtPath(obj, pathStr, fixFn) {

const lastPart = parts[parts.length - 1];
if (lastPart.includes('[')) {
const match = lastPart.match(/(\w+)\[(\d+)\]/);
const match = lastPart.match(/^((?!__proto__|constructor|prototype)[a-zA-Z_]\w*)\[(\d{1,10})\]$/);
if (match) {
current[match[1]][parseInt(match[2], 10)] = fixFn(current[match[1]][parseInt(match[2], 10)]);
}
Expand Down Expand Up @@ -403,7 +403,7 @@ function fixInconsistentHeadings(content) {

if (inCodeBlock) continue;

const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
const headingMatch = line.match(/^(#{1,6})[ \t]+(\S.*)$/);
if (headingMatch) {
const currentLevel = headingMatch[1].length;
const headingText = headingMatch[2];
Expand Down Expand Up @@ -535,6 +535,37 @@ Why bad: [explanation]
return content.trim() + exampleSection;
}

/**
* Wrap a markdown section (heading to next heading/separator) in XML tags.
* Uses line-by-line scanning to avoid ReDoS from [\s\S]*? with lookaheads.
*/
function wrapSection(text, headingPattern, tagName) {
const lines = text.split('\n');
let sectionStart = -1;
for (let i = 0; i < lines.length; i++) {
if (sectionStart === -1) {
if (headingPattern.test(lines[i])) {
sectionStart = i;
}
} else {
// End section at next heading or horizontal rule
if (/^#{1,6}\s/.test(lines[i]) || /^---/.test(lines[i])) {
const before = lines.slice(0, sectionStart);
const section = lines.slice(sectionStart, i);
const after = lines.slice(i);
return [...before, `<${tagName}>`, ...section, `</${tagName}>`, ...after].join('\n');
}
}
}
// Section runs to end of content
if (sectionStart !== -1) {
const before = lines.slice(0, sectionStart);
const section = lines.slice(sectionStart);
return [...before, `<${tagName}>`, ...section, `</${tagName}>`].join('\n');
}
return text;
}

/**
* Add XML structure tags to complex prompt
* @param {string} content - Prompt content
Expand All @@ -551,17 +582,11 @@ function fixMissingXmlStructure(content) {
// Wrap role section if exists
let result = content;

// Find and wrap role section
result = result.replace(
/^(##\s*(?:your\s+)?role\s*\n)([\s\S]*?)(?=\n##|\n---|\Z)/im,
'<role>\n$1$2</role>\n'
);
// Find and wrap role section (use non-regex approach to avoid ReDoS)
result = wrapSection(result, /^##[ \t]*(?:your[ \t]+)?role[ \t]*$/im, 'role');

// Find and wrap constraints section
result = result.replace(
/^(##\s*(?:constraints?|rules?)\s*\n)([\s\S]*?)(?=\n##|\n---|\Z)/im,
'<constraints>\n$1$2</constraints>\n'
);
result = wrapSection(result, /^##[ \t]*(?:constraints?|rules?)[ \t]*$/im, 'constraints');

return result;
}
Expand Down Expand Up @@ -622,7 +647,7 @@ function fixMissingTriggerPhrase(content) {
// Check if already has trigger phrase
if (!/use when user asks/i.test(descLine)) {
// Extract current description
const match = descLine.match(/^description:\s*(.+)$/);
const match = descLine.match(/^description:[ \t]*(\S.*)$/);
if (match) {
const currentDesc = match[1].trim();
// Add trigger phrase
Expand Down
26 changes: 17 additions & 9 deletions lib/enhance/projectmemory-analyzer.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,25 @@ function extractFileReferences(content) {

const references = [];

// Match markdown links: [text](path)
const linkMatches = content.match(/\[([^\]]+)\]\(([^)]+)\)/g) || [];
for (const match of linkMatches) {
const pathMatch = match.match(/\]\(([^)]+)\)/);
if (pathMatch && pathMatch[1]) {
const href = pathMatch[1];
// Skip URLs and anchors
if (!href.startsWith('http') && !href.startsWith('#') && !href.startsWith('mailto:')) {
references.push(href.split('#')[0]); // Remove anchor
// Extract markdown links [text](path) using indexOf scanning (ReDoS-safe)
let pos = 0;
while (pos < content.length) {
const openBracket = content.indexOf('[', pos);
if (openBracket === -1) break;
const closeBracket = content.indexOf(']', openBracket + 1);
if (closeBracket === -1) break;
if (content[closeBracket + 1] === '(') {
const closeParen = content.indexOf(')', closeBracket + 2);
if (closeParen !== -1 && closeParen - closeBracket - 2 <= 500) {
const href = content.substring(closeBracket + 2, closeParen);
if (!href.startsWith('http') && !href.startsWith('#') && !href.startsWith('mailto:')) {
references.push(href.split('#')[0]); // Remove anchor
}
pos = closeParen + 1;
continue;
}
}
pos = openBracket + 1;
Copy link

Copilot AI Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new link extraction loop can become O(n^2) on large inputs with many [/] pairs (each iteration does indexOf(']') scanning potentially most of the remaining string, and pos only advances by 1 on the fallback path). To keep the ReDoS fix without introducing a different potential CPU hotspot, consider advancing pos to closeBracket + 1 when a ] is found but it isn't a link, and/or otherwise ensure the loop always makes proportional progress based on the furthest delimiter found.

Suggested change
pos = openBracket + 1;
// Advance past the closing bracket to avoid repeatedly rescanning the same region
pos = closeBracket + 1;

Copilot uses AI. Check for mistakes.
}

// Match backtick paths: `path/to/file.ext` or `file.ext` (root files)
Expand Down
4 changes: 2 additions & 2 deletions lib/enhance/projectmemory-patterns.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ const projectMemoryPatterns = {
const hasArchitecture = /##\s+architecture/i.test(content);
const hasStructure = /##\s+(?:project\s+)?structure/i.test(content);
const hasOverview = /##\s+overview/i.test(content);
const hasDirectoryTree = /```[\s\S]*?(?:├──|└──|lib\/|src\/)[\s\S]*?```/.test(content);
const hasDirectoryTree = content.includes('```') && /├──|└──|lib\/|src\//.test(content);

Comment on lines +60 to 61
Copy link

Copilot AI Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hasDirectoryTree is now content.includes('```') && /├──|└──|lib\/|src\//.test(content), which no longer checks that the tree markers appear inside a fenced code block. This can incorrectly treat prose that mentions src/ (plus any unrelated code fence) as a directory tree and suppress the intended "missing architecture/structure" finding. Consider a ReDoS-safe approach that still scopes the search to fenced blocks (e.g., scanning for ``` fences and testing the text between them for the tree markers).

Suggested change
const hasDirectoryTree = content.includes('```') && /||lib\/|src\//.test(content);
let hasDirectoryTree = false;
if (content.includes('```')) {
const fence = '```';
let searchStart = 0;
while (!hasDirectoryTree) {
const start = content.indexOf(fence, searchStart);
if (start === -1) break;
const end = content.indexOf(fence, start + fence.length);
if (end === -1) break;
const blockContent = content.slice(start + fence.length, end);
if (/||lib\/|src\//.test(blockContent)) {
hasDirectoryTree = true;
break;
}
searchStart = end + fence.length;
}
}

Copilot uses AI. Check for mistakes.
if (!hasArchitecture && !hasStructure && !hasOverview && !hasDirectoryTree) {
return {
Expand Down Expand Up @@ -85,7 +85,7 @@ const projectMemoryPatterns = {
const hasCommands = /##\s+(?:key\s+)?commands/i.test(content);
const hasScripts = /##\s+scripts/i.test(content);
const hasUsage = /##\s+usage/i.test(content);
const hasCodeBlocks = /```(?:bash|sh|shell)[\s\S]*?(?:npm|yarn|pnpm|git|make)/i.test(content);
const hasCodeBlocks = /```(?:bash|sh|shell)/i.test(content) && /\b(?:npm|yarn|pnpm|git|make)\b/i.test(content);

if (!hasCommands && !hasScripts && !hasUsage && !hasCodeBlocks) {
return {
Expand Down
43 changes: 25 additions & 18 deletions lib/enhance/prompt-patterns.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
const NOT_JSON_KEYWORDS = /(function|const|let|var|if|for|while|class)\b/;
// JS patterns require syntax context (not just keywords that might appear in JSON strings)
const LOOKS_LIKE_JS = /\b(function\s*\(|const\s+\w+\s*=|let\s+\w+\s*=|var\s+\w+\s*=|=>\s*[{(]|async\s+function|await\s+\w|class\s+\w+\s*{|import\s+\{|export\s+(const|function|class|default)|require\s*\()/;
const LOOKS_LIKE_PYTHON = /\b(def\s+\w+|import\s+\w+|from\s+\w+\s+import|class\s+\w+:|if\s+.*:|\s{4}|print\()\b/;
const LOOKS_LIKE_PYTHON = /\b(def\s+\w+|import\s+\w+|from\s+\w+\s+import|class\s+\w+:|if[ \t]+[^\n]*:|\s{4}|print\()\b/;

// Memoization caches for performance (keyed by content hash)
let _lastContent = null;
Expand Down Expand Up @@ -220,7 +220,7 @@
// Skip lines listing vague terms as documentation
if (/vague\s*(instructions?|terms?|language|patterns?)\s*[:"]/.test(trimmed)) return false;
// Skip lines with quoted lists of vague words
if (/["']usually["'].*["']sometimes["']/.test(trimmed)) return false;
if (trimmed.includes('usually') && trimmed.includes('sometimes') && /["']/.test(trimmed)) return false;
return true;
});
const filteredContent = lines.join('\n');
Expand Down Expand Up @@ -331,7 +331,11 @@
if (!content || typeof content !== 'string') return null;

// Skip workflow orchestrators that spawn agents/skills rather than produce output directly
const isOrchestrator = /##\s*Phase\s+\d+|Task\(\{|spawn.*agent|subagent_type|await Task\(|invoke.*skill|Skill\s*tool/i.test(content);
const lc = content.toLowerCase();
const isOrchestrator = /##\s*Phase\s+\d+/i.test(content) || content.includes('Task({') ||
(lc.includes('spawn') && lc.includes('agent')) || content.includes('subagent_type') ||
content.includes('await Task(') || (lc.includes('invoke') && lc.includes('skill')) ||
(/\bSkill\b/.test(content) && lc.includes('tool'));
Copy link

Copilot AI Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In isOrchestrator, the Skill tool detection changed from a case-insensitive regex (Skill\s*tool/i) to (/\bSkill\b/.test(content) && lc.includes('tool')), which is now case-sensitive for “skill”. This can miss orchestrator files that use skill tool/SKILL tool, changing behavior beyond the ReDoS fix. Consider making the Skill check case-insensitive (e.g., test against lc or use /\bskill\b/i).

Suggested change
(/\bSkill\b/.test(content) && lc.includes('tool'));
(/\bskill\b/.test(lc) && lc.includes('tool'));

Copilot uses AI. Check for mistakes.
if (isOrchestrator) return null;

// Skip reference docs and hooks (not prompts that produce conversational output)
Expand Down Expand Up @@ -590,7 +594,9 @@
if (isNonPrompt) return null;

// Skip workflow orchestrators and command files
const isOrchestrator = /##\s*Phase\s+\d+|Task\(\{|spawn.*agent|subagent_type/i.test(content);
const lc2 = content.toLowerCase();
const isOrchestrator = /##\s*Phase\s+\d+/i.test(content) || content.includes('Task({') ||
(lc2.includes('spawn') && lc2.includes('agent')) || content.includes('subagent_type');
if (isOrchestrator) return null;

// Check for example indicators
Expand Down Expand Up @@ -793,12 +799,12 @@
/\b(?:highest|lowest)\s+priority\b/i,
/\b(?:first|second|third)\s+priority\b/i,
// Numbered rules section (implicit priority order)
/##\s*(?:critical|important)\s*rules?\s*\n+\s*1\.\s/i,
/##[ \t]*(?:critical|important)[ \t]*rules?[ \t]*\n[ \t]*1\.\s/i,
// Precedence language
/\btakes?\s+precedence\b/i,
/\boverride[sd]?\b/i,
// Ordered constraint list
/##\s*constraints?\s*\n+\s*1\.\s/i
/##[ \t]*constraints?[ \t]*\n[ \t]*1\.\s/i
];

for (const pattern of priorityIndicators) {
Expand Down Expand Up @@ -846,7 +852,8 @@

// Skip if this is documentation ABOUT CoT (describes the anti-pattern)
// These files explain why step-by-step is redundant, not actually use it
if (/step[- ]by[- ]step.*(?:is\s+)?redundant|redundant.*step[- ]by[- ]step/i.test(content)) {
const lcContent = content.toLowerCase();
if (/step[- ]by[- ]step/i.test(content) && lcContent.includes('redundant')) {
return null;
}

Expand Down Expand Up @@ -961,7 +968,7 @@
// Check if requests JSON (exclude CLI flags and function descriptions)
// Exclude: "--output json", "analyzer returns JSON", "function returns JSON"
const requestsJson = (
(/\b(?:respond|output|return)\s+(?:with|in|as)?\s*JSON\b/i.test(content) &&
(/\b(?:respond|output|return)[ \t]+(?:(?:with|in|as)[ \t]+)?JSON\b/i.test(content) &&
!/--output\s+json/i.test(content) &&
Comment on lines 970 to 972
Copy link

Copilot AI Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The updated requestsJson regex requires exactly one space/tab after with|in|as (because of (?:with|in|as)[ \t]), so it will no longer match cases like respond with JSON (multiple spaces) or respond with\t\tJSON. Consider using a 1+ quantifier (e.g. [ \t]+) after the keyword to preserve prior behavior while keeping the ReDoS fix.

Copilot uses AI. Check for mistakes.
!/(?:analyzer|function|method)\s+returns?\s+JSON/i.test(content))
) ||
Expand All @@ -971,18 +978,18 @@

// Check if provides schema or example
const hasSchema = /\bproperties\b.{1,200}\btype\b/is.test(content) ||
/```json\s*\n\s*\{/i.test(content) ||
(content.includes('```json') && content.includes('{')) ||
/<json[_-]?schema>/i.test(content) ||
// JSON in JavaScript/TypeScript code blocks (quoted keys)
/```(?:javascript|js|typescript|ts)\s*\n[\s\S]*?\{\s*\n?\s*"[a-zA-Z]+"/i.test(content) ||
(/```(?:javascript|js|typescript|ts)\b/.test(content) && /\{\s*"[a-zA-Z]+"/i.test(content)) ||
// JavaScript object literal assignment (const x = { prop: ... })
/(?:const|let|var)\s+\w+\s*=\s*\{\s*\n\s*[a-zA-Z_]+\s*:/i.test(content) ||
/(?:const|let|var)\s+\w+\s*=\s*\{/.test(content) ||
// JSON example with quoted property names in prose
/\{\s*\n?\s*"[a-zA-Z_]+"\s*:\s*["\[\{]/i.test(content) ||
/\{(?:\n[ \t]*)?"[a-zA-Z_]+"[ \t]*:/i.test(content) ||
// Inline schema description: { prop, prop, prop } or { prop: type, ... }
/\{\s*[a-zA-Z_]+\s*,\s*[a-zA-Z_]+\s*,\s*[a-zA-Z_]+/i.test(content) ||
/\{[ \t]*[a-zA-Z_]+[ \t]*,[ \t]*[a-zA-Z_]+[ \t]*,[ \t]*[a-zA-Z_]+/i.test(content) ||
// Interface-style: { prop: value } patterns with multiple lines
/\{\s*\n\s+[a-zA-Z_]+\s*:\s*[\[\{"']/i.test(content);
/\{\n[ \t]+[a-zA-Z_]+[ \t]*:[ \t]*[\[\{"']/i.test(content);
Comment on lines 979 to +992
Copy link

Copilot AI Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hasSchema detection was changed from a bounded json-code-block regex to (content.includes('```json') && content.includes('{')), which can produce false positives (e.g., any { elsewhere in the file) and cause json_without_schema to miss cases where JSON is requested but no schema/example is actually provided. Consider using a ReDoS-safe, more specific pattern (e.g., matching { near the json fence or scanning within the json code block) to keep the original intent while avoiding catastrophic backtracking.

Copilot uses AI. Check for mistakes.

if (!hasSchema) {
return {
Expand Down Expand Up @@ -1136,10 +1143,10 @@
const patternRefs = [
/\blike\s+\S+\b/i,
/\bsimilar\s+to\b/i,
/\bfollow(?:ing)?\s+(?:the\s+)?(?:same\s+)?pattern\b/i,
/\bsee\s+\S+\s+(?:for|as)\s+(?:an?\s+)?example\b/i,
/\bfollow(?:ing)?[ \t]+(?:the[ \t]+)?(?:same[ \t]+)?pattern\b/i,
/\bsee[ \t]+\S+[ \t]+(?:for|as)[ \t]+(?:an?[ \t]+)?example\b/i,
/\blook\s+at\s+(?:how|the)\b/i,
/\S+\.(?:js|ts|py)\s+(?:is|as)\s+(?:a\s+)?(?:good\s+)?example/i
/\S+\.(?:js|ts|py)[ \t]+(?:is|as)[ \t]+(?:a[ \t]+)?(?:good[ \t]+)?example/i
];

for (const pattern of patternRefs) {
Expand Down Expand Up @@ -1304,7 +1311,7 @@

// JSON detection: starts with { or [ and contains : or ,
const looksLikeJson = LOOKS_LIKE_JSON_START.test(code) &&
LOOKS_LIKE_JSON_CONTENT.test(code) &&

Check failure

Code scanning / CodeQL

Polynomial regular expression used on uncontrolled data High

This
regular expression
that depends on
library input
may run slow on strings with many repetitions of '\t'.
!NOT_JSON_KEYWORDS.test(code);

// JavaScript detection: has JS keywords
Expand Down Expand Up @@ -1386,7 +1393,7 @@
if (codeBlockLines.has(lineNum)) continue;

const line = lines[i];
const match = line.match(/^(#{1,6})\s+(.+)$/);
const match = line.match(/^(#{1,6})[ \t]+(\S.*)$/);
if (match) {
headings.push({
level: match[1].length,
Expand Down
12 changes: 6 additions & 6 deletions lib/enhance/security-patterns.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const securityPatterns = {
if (!frontmatterMatch) return null;

const frontmatter = frontmatterMatch[1];
const toolsMatch = frontmatter.match(/^tools:\s*(.*)$/m);
const toolsMatch = frontmatter.match(/^tools:[ \t]*(\S.*)?$/m);
if (!toolsMatch) return null;

const tools = toolsMatch[1];
Expand Down Expand Up @@ -60,8 +60,8 @@ const securityPatterns = {

for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Look for shell commands with interpolation
if (/(?:exec|spawn|system|shell|`|Bash)\s*[(`].*\$\{/.test(line)) {
// Look for shell commands with interpolation (string checks avoid ReDoS)
if (/\b(?:exec|spawn|system|shell)\b|`|Bash/i.test(line) && /[(`]/.test(line) && line.includes('${')) {
issues.push({
issue: 'Command injection risk via string interpolation',
fix: 'Validate and escape user input before shell execution',
Expand Down Expand Up @@ -91,8 +91,8 @@ const securityPatterns = {

for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Look for user-controlled paths with ../
if (/(?:path|file|dir).*\$.*\.\.\/|\.\.\/.*\$/.test(line)) {
// Look for user-controlled paths with ../ (string checks avoid ReDoS)
if (/\b(?:path|file|dir)\b/i.test(line) && line.includes('$') && line.includes('../')) {
issues.push({
issue: 'Path traversal risk - user input may contain ../',
fix: 'Validate paths and use path.resolve() with base directory check',
Expand Down Expand Up @@ -181,7 +181,7 @@ const securityPatterns = {
if (!frontmatterMatch) return null;

const frontmatter = frontmatterMatch[1];
const toolsMatch = frontmatter.match(/^tools:\s*(.*)$/m);
const toolsMatch = frontmatter.match(/^tools:[ \t]*(\S.*)?$/m);
if (!toolsMatch) return null;

const tools = toolsMatch[1];
Expand Down
2 changes: 1 addition & 1 deletion lib/patterns/slop-analyzers.js
Original file line number Diff line number Diff line change
Expand Up @@ -1803,7 +1803,7 @@
// Skip if termination is part of a one-line conditional (e.g., "if (x) return;")
// These don't make subsequent code unreachable
if (/^\s*(if|elif|else\s+if)\s*\(/.test(trimmed) ||
/^\s*if\s+.*:/.test(trimmed)) {
/^[ \t]*if[ \t]+[^\n]*:/.test(trimmed)) {
continue;
}

Expand Down
2 changes: 1 addition & 1 deletion plugins/audit-project/lib/enhance/docs-patterns.js
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ const docsPatterns = {
}

// Check for very long lists that could be tables
const longLists = content.match(/(?:^[-*]\s+.+\n){10,}/gm);
const longLists = content.match(/(?:^[-*][ \t]+\S[^\n]*\n){10,}/gm);
if (longLists) {
suggestions.push('Long lists (10+ items) might be more efficient as tables');
}
Expand Down
Loading
Loading