Skip to content

Commit a5e5f16

Browse files
committed
Add CodeQL SARIF support to viz
1 parent 279d2a4 commit a5e5f16

File tree

5 files changed

+144
-25
lines changed

5 files changed

+144
-25
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
### Added
1111

1212
- Official support for Python 3.13
13+
- CodeQL SARIF file support, pass `--codeql` to `viz`
1314

1415
### Removed
1516

routes/commands/viz.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import collections
2-
import json
32
import logging
43
import pathlib
54
import webbrowser
@@ -16,7 +15,7 @@ def get_global(global_results, _global):
1615
return None
1716

1817
global_locations = " ".join(
19-
f"{r.check_id}:{r.path}:{r.start_line}" for r in global_results
18+
f"{r.id}:{r.path}:{r.start_line}" for r in global_results
2019
)
2120

2221
if len(global_results) > 1:
@@ -44,12 +43,7 @@ def d3ify(parts, output, result, _global):
4443
d3ify(parts, new_output, result, _global)
4544
else:
4645
name = f"ln {result.start_line}: {result.first_line}"
47-
48-
if _global:
49-
fill = _global.rd_fill
50-
else:
51-
fill = result.rd_fill
52-
46+
fill = _global.rd_fill if _global else result.rd_fill
5347
check_node = {"name": name, "fill": fill, "title": result.lines}
5448
new_node.setdefault("children", []).append(check_node)
5549

@@ -66,17 +60,18 @@ def merge_d3_results(d1s, d2s):
6660

6761

6862
def main(args):
69-
logger.info("Reading input file %s", args.input.name)
70-
data = json.load(args.input)
63+
logger.info("Processing input file %s", args.input.name)
64+
65+
output_cls = types.CodeQLOutput if args.codeql else types.SemgrepOutput
66+
output = output_cls.from_fd(args.input)
67+
results = output.results
7168

72-
semgrep_results = [types.SemgrepResult(r) for r in data["results"]]
73-
counts = collections.Counter([r.check_id for r in semgrep_results])
74-
count_output = " ".join(f"{k}={v}" for k, v in counts.items())
75-
logger.info("Finding rule counts: %s", count_output)
69+
for result_id, count in collections.Counter([r.id for r in results]).items():
70+
logger.info("Found %d results for id %s", count, result_id)
7671

7772
results_by_type = {
7873
key: list(group)
79-
for key, group in util.sorted_groupby(semgrep_results, key=lambda r: r.rd_type)
74+
for key, group in util.sorted_groupby(results, key=lambda r: r.rd_type)
8075
}
8176

8277
global_results = results_by_type.get(types.ResultType.GLOBAL.value, {})
@@ -86,7 +81,7 @@ def main(args):
8681
d3_results = []
8782
for result in results_by_type.get(types.ResultType.ROUTE.value, []):
8883
path = pathlib.PurePath(result.path)
89-
logger.debug("Processing %s:%s:%s", result.check_id, path, result.start_line)
84+
logger.debug("Processing %s:%s:%s", result.id, path, result.start_line)
9085
root, *_ = path.parts
9186
root_paths.add(root)
9287
output = []

routes/main.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,16 @@ def parse_args(args=None):
6666
help="Open HTML output file in browser",
6767
)
6868
viz_parser.add_argument(
69-
"--global",
70-
dest="_global",
69+
"-c",
70+
"--codeql",
7171
action="store_true",
72-
help="Expiremental: enable global security configuration detection",
72+
help="Parse input file as CodeQL SARIF format",
7373
)
7474
viz_parser.add_argument(
75-
"--interprocedural",
75+
"--global",
76+
dest="_global",
7677
action="store_true",
77-
help="Expiremental: enable interprocedural security configuration detection",
78+
help="Expiremental: enable global security configuration detection",
7879
)
7980

8081
return p.parse_args(args=args)

routes/types.py

Lines changed: 124 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import abc
12
import enum
3+
import json
24

35
from routes import const
46

@@ -8,12 +10,61 @@ class ResultType(enum.Enum):
810
GLOBAL = "global"
911

1012

11-
class SemgrepResult:
13+
class BaseOutput:
14+
def __init__(self, output):
15+
self.output = output
16+
17+
@classmethod
18+
def from_fd(cls, fd):
19+
return cls(json.load(fd))
20+
21+
@property
22+
@abc.abstractmethod
23+
def results(self):
24+
pass
25+
26+
27+
class BaseResult(abc.ABC):
28+
@property
29+
@abc.abstractmethod
30+
def id(self):
31+
pass
32+
33+
@property
34+
@abc.abstractmethod
35+
def path(self):
36+
pass
37+
38+
@property
39+
@abc.abstractmethod
40+
def start_line(self):
41+
pass
42+
43+
@property
44+
@abc.abstractmethod
45+
def lines(self):
46+
pass
47+
48+
@property
49+
@abc.abstractmethod
50+
def first_line(self):
51+
pass
52+
53+
@property
54+
def rd_type(self):
55+
return ResultType.ROUTE.value
56+
57+
@property
58+
def rd_fill(self):
59+
return const.DEFAULT_FILL_COLOR
60+
61+
62+
class SemgrepResult(BaseResult):
1263
def __init__(self, result):
1364
self.result = result
1465

1566
@property
16-
def check_id(self):
67+
def id(self):
1768
return self.result["check_id"]
1869

1970
@property
@@ -55,3 +106,74 @@ def rd_type(self):
55106
@property
56107
def rd_fill(self):
57108
return self.rd_metadata.get("fill", const.DEFAULT_FILL_COLOR)
109+
110+
111+
class SemgrepOutput(BaseOutput):
112+
@property
113+
def results(self):
114+
return [SemgrepResult(r) for r in self.output["results"]]
115+
116+
117+
class CodeQLResult(BaseResult):
118+
def __init__(self, result, output):
119+
self.result = result
120+
self.output = output
121+
122+
# Assume last location provides the relevant code snippet
123+
self.location = self.result["locations"][-1]["physicalLocation"]
124+
125+
@property
126+
def id(self):
127+
return self.result["rule"]["id"]
128+
129+
@property
130+
def path(self):
131+
# TODO find more robust way to ensure all result paths have a single root directory
132+
return "repo" + "/" + self.location["artifactLocation"]["uri"]
133+
134+
@property
135+
def artifact_index(self):
136+
return self.location["artifactLocation"]["index"]
137+
138+
@property
139+
def start_line(self):
140+
return self.location["region"]["startLine"]
141+
142+
@property
143+
def end_line(self):
144+
return self.location["region"].get("endLine", self.start_line)
145+
146+
@property
147+
def lines(self):
148+
artifact = self.output.first_run["artifacts"][self.artifact_index]
149+
150+
# --sarif-add-file-contents provides this data
151+
contents = artifact.get("contents", {}).get("text", "")
152+
if not contents:
153+
return []
154+
155+
context = 1
156+
157+
# -1 for 0-based indexing
158+
start = max(self.start_line - context - 1, 0)
159+
end = self.end_line + context
160+
lines = contents.split("\n")
161+
162+
return "\n".join(lines[start:end])
163+
164+
@property
165+
def first_line(self):
166+
# We provide 1 line of context above and below the result's line, so
167+
# the result's line should be the middle list index of 3 elements
168+
return self.lines.split("\n")[1]
169+
170+
171+
class CodeQLOutput(BaseOutput):
172+
@property
173+
def first_run(self):
174+
# Assume we only have a single run
175+
return self.output["runs"][0]
176+
177+
@property
178+
def results(self):
179+
return [CodeQLResult(r, self) for r in self.first_run["results"]]

tests/test_commands/test_viz.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ def test_viz_basic(data, expected):
218218
output=output,
219219
template=template,
220220
browser=False,
221-
interprocedural=True,
221+
codeql=False,
222222
_global=True,
223223
)
224224

@@ -249,7 +249,7 @@ def test_viz_multiple_root():
249249
output=output,
250250
template=template,
251251
browser=False,
252-
interprocedural=True,
252+
codeql=False,
253253
_global=True,
254254
)
255255

0 commit comments

Comments
 (0)