Skip to content

Commit c74a773

Browse files
djm81cursoragent
andauthored
fix: mitigate code scanning vulnerabilities (#148)
* fix: mitigate code scanning vulnerabilities - Fix ReDoS vulnerability in github_mapper.py by replacing regex with line-by-line processing - Fix incomplete URL sanitization in github.py, bridge_sync.py, and ado.py using proper URL parsing - Add explicit permissions blocks to 7 GitHub Actions jobs following least-privilege model Resolves all 13 code scanning findings: - 1 ReDoS error - 5 URL sanitization warnings - 7 missing workflow permissions warnings Fixes #147 Co-authored-by: Cursor <[email protected]> * fix: accept GitHub SSH host aliases in repo detection Accept ssh.github.com (port 443) in addition to github.com when detecting GitHub repositories via SSH remotes. This ensures repositories using [email protected]:owner/repo.git are properly detected as GitHub repos. Addresses review feedback on PR #148 Co-authored-by: Cursor <[email protected]> * fix: prevent async cleanup issues in test mode Remove manual Live display cleanup that could cause EOFError. The _safe_progress_display function already handles test mode by skipping progress display, so direct save path is sufficient. Fixes test_unlock_section failure with EOFError/ValueError. Co-authored-by: Cursor <[email protected]> --------- Co-authored-by: Dominikus Nold <[email protected]> Co-authored-by: Cursor <[email protected]>
1 parent a2f6ac7 commit c74a773

File tree

6 files changed

+103
-37
lines changed

6 files changed

+103
-37
lines changed

.github/workflows/pr-orchestrator.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ jobs:
9393
name: Compatibility (Python 3.11)
9494
runs-on: ubuntu-latest
9595
needs: tests
96+
permissions:
97+
contents: read
9698
steps:
9799
- uses: actions/checkout@v4
98100
- name: Set up Python 3.11
@@ -118,6 +120,8 @@ jobs:
118120
name: Contract-First CI
119121
runs-on: ubuntu-latest
120122
needs: [tests, compat-py311]
123+
permissions:
124+
contents: read
121125
steps:
122126
- uses: actions/checkout@v4
123127
- name: Set up Python 3.12
@@ -142,6 +146,8 @@ jobs:
142146
name: CLI Command Validation
143147
runs-on: ubuntu-latest
144148
needs: contract-first-ci
149+
permissions:
150+
contents: read
145151
steps:
146152
- uses: actions/checkout@v4
147153
- name: Set up Python 3.12
@@ -168,6 +174,8 @@ jobs:
168174
runs-on: ubuntu-latest
169175
needs: [tests]
170176
if: needs.tests.outputs.run_unit_coverage == 'true'
177+
permissions:
178+
contents: read
171179
steps:
172180
- uses: actions/checkout@v4
173181
- name: Set up Python 3.12
@@ -203,6 +211,8 @@ jobs:
203211
name: Type Checking (basedpyright)
204212
runs-on: ubuntu-latest
205213
needs: [tests]
214+
permissions:
215+
contents: read
206216
steps:
207217
- uses: actions/checkout@v4
208218
- name: Set up Python 3.12
@@ -226,6 +236,8 @@ jobs:
226236
name: Linting (ruff, pylint)
227237
runs-on: ubuntu-latest
228238
needs: [tests]
239+
permissions:
240+
contents: read
229241
steps:
230242
- name: Checkout
231243
uses: actions/checkout@v4
@@ -250,6 +262,8 @@ jobs:
250262
runs-on: ubuntu-latest
251263
needs: [tests, compat-py311, contract-first-ci, cli-validation, type-checking, linting]
252264
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
265+
permissions:
266+
contents: read
253267
steps:
254268
- name: Checkout
255269
uses: actions/checkout@v4

src/specfact_cli/adapters/ado.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from datetime import UTC, datetime
1616
from pathlib import Path
1717
from typing import Any
18+
from urllib.parse import urlparse
1819

1920
import requests
2021
from beartype import beartype
@@ -745,13 +746,18 @@ def export_artifact(
745746
if not entry_repo:
746747
source_url = entry.get("source_url", "")
747748
# Try ADO URL pattern - match by org (GUIDs in URLs)
748-
if source_url and "dev.azure.com" in source_url and "/" in target_repo:
749-
target_org = target_repo.split("/")[0]
750-
ado_org_match = re.search(r"dev\.azure\.com/([^/]+)/", source_url)
751-
if ado_org_match and ado_org_match.group(1) == target_org:
752-
# Org matches - this is likely the same ADO organization
753-
work_item_id = entry.get("source_id")
754-
break
749+
if source_url and "/" in target_repo:
750+
try:
751+
parsed = urlparse(source_url)
752+
if parsed.hostname and parsed.hostname.lower() == "dev.azure.com":
753+
target_org = target_repo.split("/")[0]
754+
ado_org_match = re.search(r"dev\.azure\.com/([^/]+)/", source_url)
755+
if ado_org_match and ado_org_match.group(1) == target_org:
756+
# Org matches - this is likely the same ADO organization
757+
work_item_id = entry.get("source_id")
758+
break
759+
except Exception:
760+
pass
755761

756762
# Tertiary match: for ADO, only match by org when project is truly unknown (GUID-only URLs)
757763
# This prevents cross-project matches when both entry_repo and target_repo have project names

src/specfact_cli/adapters/github.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from datetime import UTC, datetime
2020
from pathlib import Path
2121
from typing import Any
22+
from urllib.parse import urlparse
2223

2324
import requests
2425
from beartype import beartype
@@ -447,8 +448,23 @@ def detect(self, repo_path: Path, bridge_config: BridgeConfig | None = None) ->
447448
if git_config.exists():
448449
try:
449450
config_content = git_config.read_text(encoding="utf-8")
450-
if "github.com" in config_content.lower():
451-
return True
451+
# Use proper URL parsing to avoid substring matching vulnerabilities
452+
# Look for URL patterns in git config and validate the hostname
453+
url_pattern = re.compile(r"url\s*=\s*(https?://[^\s]+|git@[^:]+:[^\s]+)")
454+
# Official GitHub SSH hostnames
455+
github_ssh_hosts = {"github.com", "ssh.github.com"}
456+
for match in url_pattern.finditer(config_content):
457+
url_str = match.group(1)
458+
# Handle git@ format: [email protected]:user/repo.git or [email protected]:user/repo.git
459+
if url_str.startswith("git@"):
460+
host_part = url_str.split(":")[0].replace("git@", "")
461+
if host_part in github_ssh_hosts:
462+
return True
463+
else:
464+
# Parse HTTP/HTTPS URLs properly
465+
parsed = urlparse(url_str)
466+
if parsed.hostname and parsed.hostname.lower() == "github.com":
467+
return True
452468
except Exception:
453469
pass
454470

src/specfact_cli/backlog/mappers/github_mapper.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,21 @@ def _extract_default_content(self, body: str) -> str:
176176
Default content (body without ## headings)
177177
"""
178178
# Remove all sections starting with ##
179-
pattern = r"^##.*?$(?:\n.*?)*?(?=^##|\Z)"
180-
default_content = re.sub(pattern, "", body, flags=re.MULTILINE | re.DOTALL)
181-
return default_content.strip()
179+
# Use a more efficient pattern to avoid ReDoS: match lines starting with ##
180+
# and everything up to the next ## or end of string, using non-backtracking approach
181+
lines = body.split("\n")
182+
result_lines: list[str] = []
183+
skip_section = False
184+
185+
for line in lines:
186+
# Check if this line starts a new section (## heading)
187+
if re.match(r"^##+", line):
188+
skip_section = True
189+
else:
190+
if not skip_section:
191+
result_lines.append(line)
192+
193+
return "\n".join(result_lines).strip()
182194

183195
@beartype
184196
@require(lambda self, body: isinstance(body, str), "Body must be str")

src/specfact_cli/sync/bridge_sync.py

Lines changed: 42 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import re
1414
import subprocess
1515
from dataclasses import dataclass
16+
from urllib.parse import urlparse
1617

1718

1819
try:
@@ -1247,11 +1248,17 @@ def _read_openspec_change_proposals(self, include_archived: bool = True) -> list
12471248
if url_repo_match:
12481249
entry["source_repo"] = url_repo_match.group(1)
12491250
# Try ADO URL pattern - extract org, but we need project name from elsewhere
1250-
elif "dev.azure.com" in source_url:
1251-
# For ADO, we can't reliably extract project name from URL (GUID)
1252-
# The source_repo should have been saved in the hidden comment
1253-
# If not, we'll need to match by org only later
1254-
pass
1251+
else:
1252+
# Use proper URL parsing to validate ADO URLs
1253+
try:
1254+
parsed = urlparse(source_url)
1255+
if parsed.hostname and parsed.hostname.lower() == "dev.azure.com":
1256+
# For ADO, we can't reliably extract project name from URL (GUID)
1257+
# The source_repo should have been saved in the hidden comment
1258+
# If not, we'll need to match by org only later
1259+
pass
1260+
except Exception:
1261+
pass
12551262
source_tracking_list.append(entry)
12561263

12571264
# Check for status indicators in proposal content or directory name
@@ -1539,16 +1546,21 @@ def _find_source_tracking_entry(
15391546
return source_tracking
15401547
# Try ADO URL pattern (ADO URLs contain GUIDs, not project names)
15411548
# For ADO, match by org if target_repo contains the org
1542-
elif "dev.azure.com" in source_url and "/" in target_repo:
1543-
target_org = target_repo.split("/")[0]
1544-
ado_org_match = re.search(r"dev\.azure\.com/([^/]+)/", source_url)
1545-
# Org matches and source_type is "ado" - return entry (project name may differ due to GUID in URL)
1546-
if (
1547-
ado_org_match
1548-
and ado_org_match.group(1) == target_org
1549-
and (entry_type == "ado" or entry_type == "")
1550-
):
1551-
return source_tracking
1549+
elif "/" in target_repo:
1550+
try:
1551+
parsed = urlparse(source_url)
1552+
if parsed.hostname and parsed.hostname.lower() == "dev.azure.com":
1553+
target_org = target_repo.split("/")[0]
1554+
ado_org_match = re.search(r"dev\.azure\.com/([^/]+)/", source_url)
1555+
# Org matches and source_type is "ado" - return entry (project name may differ due to GUID in URL)
1556+
if (
1557+
ado_org_match
1558+
and ado_org_match.group(1) == target_org
1559+
and (entry_type == "ado" or entry_type == "")
1560+
):
1561+
return source_tracking
1562+
except Exception:
1563+
pass
15521564

15531565
# Tertiary match: for ADO, only match by org when project is truly unknown (GUID-only URLs)
15541566
# This prevents cross-project matches when both entry_repo and target_repo have project names
@@ -1617,16 +1629,21 @@ def _find_source_tracking_entry(
16171629
return entry
16181630
# Try ADO URL pattern (but note: ADO URLs contain GUIDs, not project names)
16191631
# For ADO, match by org if target_repo contains the org
1620-
elif "dev.azure.com" in source_url and "/" in target_repo:
1621-
target_org = target_repo.split("/")[0]
1622-
ado_org_match = re.search(r"dev\.azure\.com/([^/]+)/", source_url)
1623-
# Org matches and source_type is "ado" - return entry (project name may differ due to GUID in URL)
1624-
if (
1625-
ado_org_match
1626-
and ado_org_match.group(1) == target_org
1627-
and (entry_type == "ado" or entry_type == "")
1628-
):
1629-
return entry
1632+
elif "/" in target_repo:
1633+
try:
1634+
parsed = urlparse(source_url)
1635+
if parsed.hostname and parsed.hostname.lower() == "dev.azure.com":
1636+
target_org = target_repo.split("/")[0]
1637+
ado_org_match = re.search(r"dev\.azure\.com/([^/]+)/", source_url)
1638+
# Org matches and source_type is "ado" - return entry (project name may differ due to GUID in URL)
1639+
if (
1640+
ado_org_match
1641+
and ado_org_match.group(1) == target_org
1642+
and (entry_type == "ado" or entry_type == "")
1643+
):
1644+
return entry
1645+
except Exception:
1646+
pass
16301647

16311648
# Tertiary match: for ADO, only match by org when project is truly unknown (GUID-only URLs)
16321649
# This prevents cross-project matches when both entry_repo and target_repo have project names

src/specfact_cli/utils/progress.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,4 +214,5 @@ def save_bundle_with_progress(
214214
pass
215215

216216
# No progress display - just save directly
217+
# In test mode, skip progress entirely to avoid async cleanup issues
217218
save_project_bundle(bundle, bundle_dir, atomic=atomic, progress_callback=None)

0 commit comments

Comments
 (0)