Skip to content

Commit dc4f24c

Browse files
committed
Fix some tests
1 parent e2a20ce commit dc4f24c

21 files changed

+192
-129
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ dependencies = [
2525
"tomlkit~=0.13.0",
2626
"wrapt~=1.17.0",
2727
"chardet~=5.2.0",
28-
"sarif-pydantic~=0.2.0",
28+
"sarif-pydantic~=0.3.0",
2929
"setuptools~=78.1",
3030
]
3131
keywords = ["codemod", "codemods", "security", "fix", "fixes"]

src/codemodder/codeql.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from pathlib import Path
22

3+
from sarif_pydantic import Location as LocationModel
4+
from sarif_pydantic import Result as ResultModel
35
from sarif_pydantic import Sarif
46
from typing_extensions import Self
57

@@ -14,27 +16,30 @@ def detect(cls, run_data: Run) -> bool:
1416

1517

1618
class CodeQLLocation(SarifLocation):
17-
@staticmethod
18-
def get_snippet(sarif_location) -> str:
19-
return ""
20-
2119
@classmethod
22-
def from_sarif(cls, sarif_location) -> Self:
23-
artifact_location = sarif_location["physicalLocation"]["artifactLocation"]
24-
file = Path(artifact_location["uri"])
20+
def from_sarif(cls, sarif_location: LocationModel) -> Self:
21+
if (physical_location := sarif_location.physical_location) is None:
22+
raise ValueError("Location does not contain a physicalLocation")
23+
if (artifact_location := physical_location.artifact_location) is None:
24+
raise ValueError("PhysicalLocation does not contain an artifactLocation")
25+
if (uri := artifact_location.uri) is None:
26+
raise ValueError("ArtifactLocation does not contain a uri")
27+
28+
file = Path(uri)
2529

26-
try:
27-
region = sarif_location["physicalLocation"]["region"]
28-
except KeyError:
30+
if not (region := physical_location.region):
2931
# A location without a region indicates a result for the entire file.
3032
# Use sentinel values of 0 index for start/end
3133
zero = LineInfo(0)
3234
return cls(file=file, start=zero, end=zero)
3335

34-
start = LineInfo(line=region["startLine"], column=region.get("startColumn"))
36+
if not region.start_line:
37+
raise ValueError("Region does not contain a startLine")
38+
39+
start = LineInfo(line=region.start_line, column=region.start_column or -1)
3540
end = LineInfo(
36-
line=region.get("endLine", start.line),
37-
column=region.get("endColumn", start.column),
41+
line=region.end_line or start.line,
42+
column=region.end_column or start.column,
3843
)
3944
return cls(file=file, start=start, end=end)
4045

@@ -43,7 +48,7 @@ class CodeQLResult(SarifResult):
4348
location_type = CodeQLLocation
4449

4550
@classmethod
46-
def rule_url_from_id(cls, result: dict, run: dict, rule_id: str) -> str:
51+
def rule_url_from_id(cls, result: ResultModel, run: Run, rule_id: str) -> str:
4752
del result, run, rule_id
4853
# TODO: Implement this method to return the specific rule URL
4954
return "https://codeql.github.com/codeql-query-help/"

src/codemodder/result.py

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

33
import itertools
4-
from abc import abstractmethod
54
from dataclasses import dataclass, field
65
from pathlib import Path
76
from typing import TYPE_CHECKING, Any, ClassVar, Sequence, Type
@@ -39,23 +38,30 @@ class Location(ABCDataclass):
3938
@dataclass(frozen=True)
4039
class SarifLocation(Location):
4140
@staticmethod
42-
@abstractmethod
43-
def get_snippet(sarif_location) -> str:
44-
pass
41+
def get_snippet(sarif_location: LocationModel) -> str | None:
42+
return sarif_location.message.text if sarif_location.message else None
4543

4644
@classmethod
4745
def from_sarif(cls, sarif_location: LocationModel) -> Self:
48-
artifact_location = sarif_location["physicalLocation"]["artifactLocation"]
49-
file = Path(artifact_location["uri"])
46+
if not (physical_location := sarif_location.physical_location):
47+
raise ValueError("Sarif location does not have a physical location")
48+
if not (artifact_location := physical_location.artifact_location):
49+
raise ValueError("Sarif location does not have an artifact location")
50+
if not (region := physical_location.region):
51+
raise ValueError("Sarif location does not have a region")
52+
if not (uri := artifact_location.uri):
53+
raise ValueError("Sarif location does not have a uri")
54+
55+
file = Path(uri)
5056
snippet = cls.get_snippet(sarif_location)
5157
start = LineInfo(
52-
line=sarif_location["physicalLocation"]["region"]["startLine"],
53-
column=sarif_location["physicalLocation"]["region"]["startColumn"],
58+
line=region.start_line or -1,
59+
column=region.start_column or -1,
5460
snippet=snippet,
5561
)
5662
end = LineInfo(
57-
line=sarif_location["physicalLocation"]["region"]["endLine"],
58-
column=sarif_location["physicalLocation"]["region"]["endColumn"],
63+
line=region.end_line or -1,
64+
column=region.end_column or -1,
5965
snippet=snippet,
6066
)
6167
return cls(file=file, start=start, end=end)
@@ -150,14 +156,17 @@ def extract_locations(cls, sarif_result: ResultModel) -> Sequence[Location]:
150156
)
151157

152158
@classmethod
153-
def extract_related_locations(cls, sarif_result) -> Sequence[LocationWithMessage]:
159+
def extract_related_locations(
160+
cls, sarif_result: ResultModel
161+
) -> Sequence[LocationWithMessage]:
154162
return tuple(
155163
[
156164
LocationWithMessage(
157-
message=rel_location.get("message", {}).get("text", ""),
165+
message=rel_location.message.text,
158166
location=cls.location_type.from_sarif(rel_location),
159167
)
160-
for rel_location in sarif_result.get("relatedLocations", [])
168+
for rel_location in sarif_result.related_locations or []
169+
if rel_location.message
161170
]
162171
)
163172

@@ -187,7 +196,12 @@ def extract_rule_id(
187196
return rule_id.split(".")[-1] if truncate_rule_id else rule_id
188197

189198
# it may be contained in the 'rule' field through the tool component in the sarif file
190-
if (rule := result.rule) and sarif_run.tool.extensions and rule.tool_component:
199+
if (
200+
(rule := result.rule)
201+
and sarif_run.tool.extensions
202+
and rule.tool_component
203+
and rule.tool_component.index is not None
204+
):
191205
tool_index = rule.tool_component.index
192206
rule_index = rule.index
193207
return sarif_run.tool.extensions[tool_index].rules[rule_index].id

src/codemodder/semgrep.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,52 @@
11
import itertools
2-
import json
32
import subprocess
43
from pathlib import Path
54
from tempfile import NamedTemporaryFile
65
from typing import Iterable, Optional
76

7+
from sarif_pydantic import Sarif
88
from typing_extensions import Self, override
99

1010
from codemodder.context import CodemodExecutionContext
1111
from codemodder.logging import logger
12-
from codemodder.result import Result, ResultSet, SarifLocation, SarifResult
12+
from codemodder.result import (
13+
LocationModel,
14+
Result,
15+
ResultModel,
16+
ResultSet,
17+
SarifLocation,
18+
SarifResult,
19+
)
1320
from codemodder.sarifs import AbstractSarifToolDetector, Run
1421

1522

1623
class SemgrepSarifToolDetector(AbstractSarifToolDetector):
1724
@classmethod
1825
def detect(cls, run_data: Run) -> bool:
19-
return "semgrep" in run_data.tool.driver.name
26+
return "semgrep" in run_data.tool.driver.name.lower()
2027

2128

2229
class SemgrepLocation(SarifLocation):
2330
@staticmethod
24-
def get_snippet(sarif_location) -> str:
31+
def get_snippet(sarif_location: LocationModel) -> str | None:
2532
return (
26-
sarif_location["physicalLocation"]["region"].get("snippet", {}).get("text")
33+
sarif_location.physical_location.region.snippet.text
34+
if sarif_location.physical_location
35+
and sarif_location.physical_location.region
36+
and sarif_location.physical_location.region.snippet
37+
else SarifLocation.get_snippet(sarif_location)
2738
)
2839

2940

3041
class SemgrepResult(SarifResult):
3142
location_type = SemgrepLocation
3243

44+
@override
3345
@classmethod
34-
def rule_url_from_id(cls, sarif_result, sarif_run, rule_id):
35-
del sarif_result, sarif_run
46+
def rule_url_from_id(
47+
cls, result: ResultModel, run: Run, rule_id: str
48+
) -> str | None:
49+
del result, run
3650
from core_codemods.semgrep.api import semgrep_url_from_id
3751

3852
return semgrep_url_from_id(rule_id)
@@ -41,17 +55,16 @@ def rule_url_from_id(cls, sarif_result, sarif_run, rule_id):
4155
class SemgrepResultSet(ResultSet):
4256
@classmethod
4357
def from_sarif(cls, sarif_file: str | Path, truncate_rule_id: bool = False) -> Self:
44-
with open(sarif_file, "r", encoding="utf-8") as f:
45-
data = json.load(f)
58+
data = Sarif.model_validate_json(Path(sarif_file).read_text())
4659

4760
result_set = cls()
48-
for sarif_run in data["runs"]:
49-
for result in sarif_run["results"]:
61+
for sarif_run in data.runs:
62+
for result in sarif_run.results or []:
5063
sarif_result = SemgrepResult.from_sarif(
5164
result, sarif_run, truncate_rule_id
5265
)
5366
result_set.add_result(sarif_result)
54-
result_set.store_tool_data(sarif_run.get("tool", {}))
67+
result_set.store_tool_data(sarif_run.tool.model_dump())
5568
return result_set
5669

5770

tests/codemods/semgrep/test_semgrep_django_secure_set_cookie.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def index(request, template):
3838
results = {
3939
"runs": [
4040
{
41+
"tool": {"driver": {"name": "Semgrep OSS"}},
4142
"results": [
4243
{
4344
"fingerprints": {"matchBasedId/v1": "123"},
@@ -66,7 +67,7 @@ def index(request, template):
6667
"properties": {},
6768
"ruleId": "python.django.security.audit.secure-cookies.django-secure-set-cookie",
6869
}
69-
]
70+
],
7071
}
7172
]
7273
}

tests/codemods/semgrep/test_semgrep_enable_jinja2_autoescape.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def test_import(self, tmpdir):
2727
results = {
2828
"runs": [
2929
{
30+
"tool": {"driver": {"name": "Semgrep OSS"}},
3031
"results": [
3132
{
3233
"fingerprints": {"matchBasedId/v1": "123"},
@@ -55,7 +56,7 @@ def test_import(self, tmpdir):
5556
"properties": {},
5657
"ruleId": "python.flask.security.xss.audit.direct-use-of-jinja2.direct-use-of-jinja2",
5758
}
58-
]
59+
],
5960
}
6061
]
6162
}

tests/codemods/semgrep/test_semgrep_harden_pyyaml.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def test_pyyaml(self, tmpdir):
2626
results = {
2727
"runs": [
2828
{
29+
"tool": {"driver": {"name": "Semgrep OSS"}},
2930
"results": [
3031
{
3132
"fingerprints": {"matchBasedId/v1": "123"},
@@ -54,7 +55,7 @@ def test_pyyaml(self, tmpdir):
5455
"properties": {},
5556
"ruleId": "python.lang.security.deserialization.avoid-pyyaml-load.avoid-pyyaml-load",
5657
}
57-
]
58+
],
5859
}
5960
]
6061
}
@@ -84,6 +85,7 @@ def index(request):
8485
results = {
8586
"runs": [
8687
{
88+
"tool": {"driver": {"name": "Semgrep OSS"}},
8789
"results": [
8890
{
8991
"fingerprints": {"matchBasedId/v1": "123"},
@@ -112,7 +114,7 @@ def index(request):
112114
"properties": {},
113115
"ruleId": "python.django.security.audit.avoid-insecure-deserialization.avoid-insecure-deserialization",
114116
}
115-
]
117+
],
116118
}
117119
]
118120
}

tests/codemods/semgrep/test_semgrep_jwt_decode_verify.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def test_import(self, tmpdir):
2525
results = {
2626
"runs": [
2727
{
28+
"tool": {"driver": {"name": "Semgrep OSS"}},
2829
"results": [
2930
{
3031
"fingerprints": {"matchBasedId/v1": "123"},
@@ -52,7 +53,7 @@ def test_import(self, tmpdir):
5253
},
5354
"ruleId": "python.jwt.security.unverified-jwt-decode.unverified-jwt-decode",
5455
}
55-
]
56+
],
5657
}
5758
]
5859
}

tests/codemods/semgrep/test_semgrep_nan_injection.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def home(request):
3838
results = {
3939
"runs": [
4040
{
41+
"tool": {"driver": {"name": "Semgrep OSS"}},
4142
"results": [
4243
{
4344
"fingerprints": {"matchBasedId/v1": "1932"},
@@ -112,6 +113,7 @@ def view(request):
112113
results = {
113114
"runs": [
114115
{
116+
"tool": {"driver": {"name": "Semgrep OSS"}},
115117
"results": [
116118
{
117119
"fingerprints": {"matchBasedId/v1": "1fdbd5a"},
@@ -247,6 +249,7 @@ def view(request):
247249
results = {
248250
"runs": [
249251
{
252+
"tool": {"driver": {"name": "Semgrep OSS"}},
250253
"results": [
251254
{
252255
"fingerprints": {"matchBasedId/v1": "asdfg"},
@@ -304,6 +307,7 @@ def view(request):
304307
results = {
305308
"runs": [
306309
{
310+
"tool": {"driver": {"name": "Semgrep OSS"}},
307311
"results": [
308312
{
309313
"fingerprints": {"matchBasedId/v1": "q324"},
@@ -359,6 +363,7 @@ def view(request):
359363
results = {
360364
"runs": [
361365
{
366+
"tool": {"driver": {"name": "Semgrep OSS"}},
362367
"results": [
363368
{
364369
"fingerprints": {"matchBasedId/v1": "asdtg"},
@@ -416,6 +421,7 @@ def view(request):
416421
results = {
417422
"runs": [
418423
{
424+
"tool": {"driver": {"name": "Semgrep OSS"}},
419425
"results": [
420426
{
421427
"fingerprints": {"matchBasedId/v1": "asd2"},

tests/codemods/semgrep/test_semgrep_no_csrf_exempt.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def foo():
5252
results = {
5353
"runs": [
5454
{
55+
"tool": {"driver": {"name": "Semgrep OSS"}},
5556
"results": [
5657
{
5758
"fingerprints": {"matchBasedId/v1": "a3ca2"},

0 commit comments

Comments
 (0)