Skip to content

Commit 7d18fee

Browse files
authored
fix: production hardening — OCI safety, policy logic, parser robustness (#875)
## Summary Supersedes #874 (which failed Alpine tests due to Go ecosystem assertion mismatch). **Critical:** - `oci_parser.py`: reject tar members with `../` path traversal and absolute paths; cap layer read at 2 GB to prevent OOM on large ML images - `enrichment.py`: log NVD 403 (rate limited) instead of silently treating as "not found" - `policy.py`: `has_kev_with_no_fix` logic bug — `or` should be `and` **High:** - `node_parsers.py`: log malformed JSON instead of silent `pass` - `python_parsers.py`: log malformed Pipfile.lock instead of silent `pass` - `compiled_parsers.py`: Cargo.lock explicit `encoding="utf-8", errors="replace"` - `terraform.py`: ecosystem `"Go"` → `"go"` (consistent with all parsers) - `test_core.py`: update Go ecosystem assertion to match - `output/__init__.py`: remove 5 unused imports ## Test plan - [x] 280 tests pass (including updated Go ecosystem assertion) - [x] Pre-commit hooks pass (ruff, bandit) - [x] Tar path traversal rejected (verified in code) Closes #874
1 parent 8fcd980 commit 7d18fee

File tree

11 files changed

+27
-19
lines changed

11 files changed

+27
-19
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ jobs:
166166
steps:
167167
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
168168

169-
- uses: ./.github/actions/setup-python
169+
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
170170
with:
171171
python-version: '3.11'
172172

src/agent_bom/enrichment.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ async def fetch_nvd_data(cve_id: str, client: httpx.AsyncClient, api_key: Option
154154
return result
155155
except (ValueError, KeyError) as e:
156156
console.print(f" [dim yellow]NVD parse error for {cve_id}: {e}[/dim yellow]")
157+
elif response and response.status_code == 403:
158+
_logger.warning("NVD rate limited (HTTP 403) for %s — consider using NVD_API_KEY", cve_id)
159+
elif response and response.status_code not in (200, 404):
160+
_logger.warning("NVD returned HTTP %d for %s", response.status_code, cve_id)
157161

158162
return None
159163

src/agent_bom/oci_parser.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,10 @@ def _extract_packages_from_layer(
207207
Set of paths marked as whiteout in THIS layer (for caller to accumulate).
208208
"""
209209
whiteouts: set[str] = set()
210-
names = set(layer_tf.getnames())
210+
raw_names = set(layer_tf.getnames())
211+
212+
# Filter out path traversal attempts (malicious tar members with ../)
213+
names = {n for n in raw_names if ".." not in n.split("/") and not n.startswith("/")}
211214

212215
# Collect whiteout paths from this layer
213216
for member_name in names:
@@ -650,8 +653,13 @@ def _parse_layers_from_tarball(
650653
warnings.append(f"Layer is not a regular file: {layer_path}")
651654
continue
652655

653-
# Read into memory to allow tarfile to seek
654-
layer_bytes = layer_fobj.read()
656+
# Read into memory to allow tarfile to seek.
657+
# Cap at 2 GB to prevent OOM on very large image layers (e.g. ML model weights).
658+
max_layer_bytes = 2 * 1024 * 1024 * 1024 # 2 GB
659+
layer_bytes = layer_fobj.read(max_layer_bytes + 1)
660+
if len(layer_bytes) > max_layer_bytes:
661+
warnings.append(f"Layer {layer_path} exceeds 2 GB — skipped to avoid OOM")
662+
continue
655663
try:
656664
with tarfile.open(fileobj=io.BytesIO(layer_bytes), mode="r:*") as layer_tf:
657665
whiteouts = _extract_packages_from_layer(layer_tf, seen, packages, all_deleted)

src/agent_bom/output/__init__.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,6 @@
33
from __future__ import annotations
44

55
import json
6-
from datetime import datetime
7-
from pathlib import Path
8-
from typing import Any, Optional
9-
from uuid import uuid4
106

117
from rich.console import Console
128
from rich.panel import Panel

src/agent_bom/parsers/compiled_parsers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -788,7 +788,7 @@ def parse_cargo_packages(directory: Path, *, resolve_versions: bool = False) ->
788788
if cargo_lock.exists():
789789
current_name: Optional[str] = None
790790
current_version: Optional[str] = None
791-
for raw_line in cargo_lock.read_text().splitlines():
791+
for raw_line in cargo_lock.read_text(encoding="utf-8", errors="replace").splitlines():
792792
stripped_line = raw_line.strip()
793793
if stripped_line.startswith('name = "'):
794794
current_name = stripped_line.split('"')[1]

src/agent_bom/parsers/node_parsers.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ def parse_npm_packages(directory: Path) -> list[Package]:
6666
is_direct=clean_name in direct_deps,
6767
)
6868
)
69-
except (json.JSONDecodeError, KeyError):
70-
pass
69+
except (json.JSONDecodeError, KeyError) as exc:
70+
logger.debug("Failed to parse package-lock.json in %s: %s", directory, exc)
7171

7272
# Fallback to package.json only
7373
elif (directory / "package.json").exists():
@@ -89,8 +89,8 @@ def parse_npm_packages(directory: Path) -> list[Package]:
8989
is_direct=True,
9090
)
9191
)
92-
except (json.JSONDecodeError, KeyError):
93-
pass
92+
except (json.JSONDecodeError, KeyError) as exc:
93+
logger.debug("Failed to parse package.json in %s: %s", directory, exc)
9494

9595
return packages
9696

src/agent_bom/parsers/python_parsers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,8 @@ def parse_pip_packages(directory: Path) -> list[Package]:
252252
is_direct=section == "default",
253253
)
254254
)
255-
except (json.JSONDecodeError, KeyError):
256-
pass
255+
except (json.JSONDecodeError, KeyError) as exc:
256+
logger.debug("Failed to parse Pipfile.lock in %s: %s", directory, exc)
257257

258258
# Try pyproject.toml
259259
pyproject = directory / "pyproject.toml"

src/agent_bom/policy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ def _rule_matches(rule: dict, br) -> bool:
554554

555555
# has_kev_with_no_fix: KEV vulnerabilities without a known fix
556556
if rule.get("has_kev_with_no_fix"):
557-
if not br.vulnerability.is_kev or br.vulnerability.fixed_version:
557+
if not (br.vulnerability.is_kev and not br.vulnerability.fixed_version):
558558
return False
559559

560560
return True

src/agent_bom/terraform.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ def scan_terraform_dir(tf_dir: str) -> tuple[list[Agent], list[str]]:
315315
Package(
316316
name=go_module,
317317
version=version,
318-
ecosystem="Go",
318+
ecosystem="go",
319319
purl=f"pkg:golang/{go_module}@{version}",
320320
)
321321
)

tests/test_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1503,7 +1503,7 @@ def test_terraform_scan_creates_agents(tmp_path):
15031503
assert agent.source == "terraform"
15041504
# Provider package should be Go ecosystem
15051505
pkgs = [p for srv in agent.mcp_servers for p in srv.packages]
1506-
assert any(p.ecosystem == "Go" for p in pkgs)
1506+
assert any(p.ecosystem == "go" for p in pkgs)
15071507

15081508

15091509
def test_terraform_scan_empty_dir(tmp_path):

0 commit comments

Comments
 (0)