|
15 | 15 | """ |
16 | 16 |
|
17 | 17 | import json |
| 18 | +import re |
18 | 19 | from pathlib import Path |
19 | 20 | from typing import Any |
20 | 21 |
|
21 | 22 | from .base import BaseWorkflow, ModelTier |
22 | 23 | from .step_config import WorkflowStepConfig |
23 | 24 |
|
| 25 | + |
| 26 | +def _is_dangerous_eval_usage(content: str, file_path: str) -> bool: |
| 27 | + """ |
| 28 | + Check if file contains dangerous eval/exec usage, filtering false positives. |
| 29 | +
|
| 30 | + Excludes: |
| 31 | + - String literals used for detection (e.g., 'if "eval(" in content') |
| 32 | + - Comments mentioning eval/exec (e.g., '# SECURITY FIX: Use json.loads() instead of eval()') |
| 33 | + - JavaScript's safe regex.exec() method |
| 34 | + - Pattern definitions for security scanners |
| 35 | +
|
| 36 | + Returns: |
| 37 | + True if dangerous eval/exec usage is found, False otherwise. |
| 38 | + """ |
| 39 | + # Check if file even contains eval or exec |
| 40 | + if "eval(" not in content and "exec(" not in content: |
| 41 | + return False |
| 42 | + |
| 43 | + # For JavaScript/TypeScript files, check for regex.exec() which is safe |
| 44 | + if file_path.endswith((".js", ".ts", ".tsx", ".jsx")): |
| 45 | + # Remove all regex.exec() calls (these are safe) |
| 46 | + content_without_regex_exec = re.sub(r"\.\s*exec\s*\(", ".SAFE_EXEC(", content) |
| 47 | + # If no eval/exec remains, it was all regex.exec() |
| 48 | + if "eval(" not in content_without_regex_exec and "exec(" not in content_without_regex_exec: |
| 49 | + return False |
| 50 | + |
| 51 | + # Check each line for real dangerous usage |
| 52 | + lines = content.splitlines() |
| 53 | + for line in lines: |
| 54 | + # Skip comment lines |
| 55 | + stripped = line.strip() |
| 56 | + if stripped.startswith("#") or stripped.startswith("//") or stripped.startswith("*"): |
| 57 | + continue |
| 58 | + |
| 59 | + # Check for eval( or exec( in this line |
| 60 | + if "eval(" not in line and "exec(" not in line: |
| 61 | + continue |
| 62 | + |
| 63 | + # Skip if it's inside a string literal for detection purposes |
| 64 | + # e.g., 'if "eval(" in content' or "pattern = r'eval\('" |
| 65 | + detection_patterns = [ |
| 66 | + r'["\'].*eval\(.*["\']', # "eval(" or 'eval(' in a string |
| 67 | + r'["\'].*exec\(.*["\']', # "exec(" or 'exec(' in a string |
| 68 | + r"in\s+\w+", # Pattern like 'in content' |
| 69 | + r'r["\'].*eval', # Raw string regex pattern |
| 70 | + r'r["\'].*exec', # Raw string regex pattern |
| 71 | + ] |
| 72 | + |
| 73 | + is_detection_code = False |
| 74 | + for pattern in detection_patterns: |
| 75 | + if re.search(pattern, line): |
| 76 | + # Check if it's really detection code |
| 77 | + if " in " in line and ( |
| 78 | + "content" in line or "text" in line or "code" in line or "source" in line |
| 79 | + ): |
| 80 | + is_detection_code = True |
| 81 | + break |
| 82 | + # Check if it's a string literal being defined (eval or exec) |
| 83 | + if re.search(r'["\'][^"\']*eval\([^"\']*["\']', line): |
| 84 | + is_detection_code = True |
| 85 | + break |
| 86 | + if re.search(r'["\'][^"\']*exec\([^"\']*["\']', line): |
| 87 | + is_detection_code = True |
| 88 | + break |
| 89 | + # Check for raw string regex patterns containing eval/exec |
| 90 | + if re.search(r"r['\"][^'\"]*(?:eval|exec)[^'\"]*['\"]", line): |
| 91 | + is_detection_code = True |
| 92 | + break |
| 93 | + |
| 94 | + if is_detection_code: |
| 95 | + continue |
| 96 | + |
| 97 | + # Skip JavaScript regex.exec() - pattern.exec(text) |
| 98 | + if re.search(r"\w+\.exec\s*\(", line): |
| 99 | + continue |
| 100 | + |
| 101 | + # This looks like real dangerous usage |
| 102 | + return True |
| 103 | + |
| 104 | + return False |
| 105 | + |
| 106 | + |
24 | 107 | # Define step configurations for executor-based execution |
25 | 108 | BUG_PREDICT_STEPS = { |
26 | 109 | "recommend": WorkflowStepConfig( |
@@ -193,7 +276,8 @@ async def _scan(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int |
193 | 276 | "severity": "low", |
194 | 277 | } |
195 | 278 | ) |
196 | | - if "eval(" in content or "exec(" in content: |
| 279 | + # Use smart detection to filter false positives |
| 280 | + if _is_dangerous_eval_usage(content, str(file_path)): |
197 | 281 | patterns_found.append( |
198 | 282 | { |
199 | 283 | "file": str(file_path), |
|
0 commit comments