Skip to content

Commit 9861343

Browse files
authored
Handle detection of SARIFs that have UTF-8 BOMs (#713)
1 parent 0d45f59 commit 9861343

File tree

2 files changed

+13
-1
lines changed

2 files changed

+13
-1
lines changed

src/codemodder/sarifs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def detect_sarif_tools(filenames: list[Path]) -> DefaultDict[str, list[str]]:
2323
ent.name: ent.load() for ent in entry_points().select(group="sarif_detectors")
2424
}
2525
for fname in filenames:
26-
data = json.loads(fname.read_text())
26+
data = json.loads(fname.read_text(encoding="utf-8-sig"))
2727
for name, det in detectors.items():
2828
# TODO: handle malformed sarif?
2929
for run in data["runs"]:

tests/test_sarif_processing.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import pytest
66

7+
from codemodder.sarifs import detect_sarif_tools
78
from codemodder.semgrep import SemgrepResult, SemgrepResultSet
89

910

@@ -34,6 +35,17 @@ def test_extract_rule_id_semgrep(self):
3435
)
3536
assert rule_id == "secure-random"
3637

38+
def test_detect_sarif_with_bom_encoding(self, tmpdir):
39+
sarif_file = Path("tests") / "samples" / "semgrep.sarif"
40+
sarif_file_bom = tmpdir / "semgrep_bom.sarif"
41+
42+
with open(sarif_file_bom, "w") as f:
43+
f.write("\ufeff")
44+
f.write(sarif_file.read_text(encoding="utf-8"))
45+
46+
results = detect_sarif_tools([sarif_file_bom])
47+
assert len(results) == 1
48+
3749
@pytest.mark.parametrize("truncate", [True, False])
3850
def test_results_by_rule_id(self, truncate):
3951
sarif_file = Path("tests") / "samples" / "semgrep.sarif"

0 commit comments

Comments
 (0)