Skip to content

Commit e0115e8

Browse files
committed
Prevent duplicate findings from multiple sarif files
1 parent cbc6a29 commit e0115e8

File tree

8 files changed

+359
-309
lines changed

8 files changed

+359
-309
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ readme = "README.md"
1111
license = {file = "LICENSE"}
1212
description = "A pluggable framework for building codemods in Python"
1313
dependencies = [
14+
"boltons~=21.0.0",
1415
"GitPython<4",
1516
"isort>=5.12,<5.14",
1617
"libcst>=1.1,<1.6",

src/codemodder/codetf.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,17 @@ class Rule(BaseModel):
115115
name: str
116116
url: Optional[str] = None
117117

118+
class Config:
119+
frozen = True
120+
118121

119122
class Finding(BaseModel):
120123
id: str
121124
rule: Rule
122125

126+
class Config:
127+
frozen = True
128+
123129
def to_unfixed_finding(
124130
self,
125131
*,

src/codemodder/result.py

Lines changed: 46 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
from abc import abstractmethod
55
from dataclasses import dataclass, field
66
from pathlib import Path
7-
from typing import TYPE_CHECKING, Any, ClassVar, Type
7+
from typing import TYPE_CHECKING, Any, ClassVar, Sequence, Type
88

99
import libcst as cst
10+
from boltons.setutils import IndexedSet
1011
from libcst._position import CodeRange
1112
from typing_extensions import Self
1213

@@ -18,39 +19,40 @@
1819
from codemodder.context import CodemodExecutionContext
1920

2021

21-
@dataclass
22+
@dataclass(frozen=True)
2223
class LineInfo:
2324
line: int
2425
column: int = -1
2526
snippet: str | None = None
2627

2728

28-
@dataclass
29+
@dataclass(frozen=True)
2930
class Location(ABCDataclass):
3031
file: Path
3132
start: LineInfo
3233
end: LineInfo
3334

3435

36+
@dataclass(frozen=True)
3537
class SarifLocation(Location):
3638
@classmethod
3739
@abstractmethod
3840
def from_sarif(cls, sarif_location) -> Self:
3941
pass
4042

4143

42-
@dataclass
44+
@dataclass(frozen=True)
4345
class LocationWithMessage:
4446
location: Location
4547
message: str
4648

4749

48-
@dataclass(kw_only=True)
50+
@dataclass(frozen=True, kw_only=True)
4951
class Result(ABCDataclass):
5052
rule_id: str
51-
locations: list[Location]
52-
codeflows: list[list[Location]] = field(default_factory=list)
53-
related_locations: list[LocationWithMessage] = field(default_factory=list)
53+
locations: Sequence[Location]
54+
codeflows: Sequence[Sequence[Location]] = field(default_factory=tuple)
55+
related_locations: Sequence[LocationWithMessage] = field(default_factory=tuple)
5456
finding: Finding | None = None
5557

5658
def match_location(self, pos: CodeRange, node: cst.CSTNode) -> bool:
@@ -67,13 +69,16 @@ def match_location(self, pos: CodeRange, node: cst.CSTNode) -> bool:
6769
for location in self.locations
6870
)
6971

72+
def __hash__(self):
73+
return hash(self.rule_id)
7074

71-
@dataclass(kw_only=True)
75+
76+
@dataclass(frozen=True, kw_only=True)
7277
class SASTResult(Result):
7378
finding_id: str
7479

7580

76-
@dataclass(kw_only=True)
81+
@dataclass(frozen=True, kw_only=True)
7782
class SarifResult(SASTResult, ABCDataclass):
7883
location_type: ClassVar[Type[SarifLocation]]
7984

@@ -84,32 +89,40 @@ def from_sarif(
8489
raise NotImplementedError
8590

8691
@classmethod
87-
def extract_locations(cls, sarif_result) -> list[Location]:
88-
return [
89-
cls.location_type.from_sarif(location)
90-
for location in sarif_result["locations"]
91-
]
92+
def extract_locations(cls, sarif_result) -> Sequence[Location]:
93+
return tuple(
94+
[
95+
cls.location_type.from_sarif(location)
96+
for location in sarif_result["locations"]
97+
]
98+
)
9299

93100
@classmethod
94-
def extract_related_locations(cls, sarif_result) -> list[LocationWithMessage]:
95-
return [
96-
LocationWithMessage(
97-
message=rel_location.get("message", {}).get("text", ""),
98-
location=cls.location_type.from_sarif(rel_location),
99-
)
100-
for rel_location in sarif_result.get("relatedLocations", [])
101-
]
101+
def extract_related_locations(cls, sarif_result) -> Sequence[LocationWithMessage]:
102+
return tuple(
103+
[
104+
LocationWithMessage(
105+
message=rel_location.get("message", {}).get("text", ""),
106+
location=cls.location_type.from_sarif(rel_location),
107+
)
108+
for rel_location in sarif_result.get("relatedLocations", [])
109+
]
110+
)
102111

103112
@classmethod
104-
def extract_code_flows(cls, sarif_result) -> list[list[Location]]:
105-
return [
113+
def extract_code_flows(cls, sarif_result) -> Sequence[Sequence[Location]]:
114+
return tuple(
106115
[
107-
cls.location_type.from_sarif(locations.get("location"))
108-
for locations in threadflow.get("locations", {})
116+
tuple(
117+
[
118+
cls.location_type.from_sarif(locations.get("location"))
119+
for locations in threadflow.get("locations", {})
120+
]
121+
)
122+
for codeflow in sarif_result.get("codeFlows", {})
123+
for threadflow in codeflow.get("threadFlows", {})
109124
]
110-
for codeflow in sarif_result.get("codeFlows", {})
111-
for threadflow in codeflow.get("threadFlows", {})
112-
]
125+
)
113126

114127
@classmethod
115128
def extract_rule_id(cls, result, sarif_run, truncate_rule_id: bool = False) -> str:
@@ -199,5 +212,7 @@ def list_dict_or(
199212
) -> dict[Any, list[Any]]:
200213
result_dict = {}
201214
for k in other.keys() | dictionary.keys():
202-
result_dict[k] = dictionary.get(k, []) + other.get(k, [])
215+
result_dict[k] = list(
216+
IndexedSet(dictionary.get(k, [])) | (IndexedSet(other.get(k, [])))
217+
)
203218
return result_dict

src/codemodder/utils/abc_dataclass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from dataclasses import dataclass
33

44

5-
@dataclass
5+
@dataclass(frozen=True)
66
class ABCDataclass(ABC):
77
"""Inspired by https://stackoverflow.com/a/60669138"""
88

src/core_codemods/defectdojo/results.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def from_result(cls, result: dict) -> Self:
2727
return cls(
2828
finding_id=result["id"],
2929
rule_id=result["title"],
30-
locations=[DefectDojoLocation.from_result(result)],
30+
locations=tuple([DefectDojoLocation.from_result(result)]),
3131
finding=Finding(
3232
id=str(result["id"]),
3333
rule=Rule(

src/core_codemods/sonar/results.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from dataclasses import replace
33
from functools import cache
44
from pathlib import Path
5+
from typing import Sequence
56

67
import libcst as cst
78
from typing_extensions import Self
@@ -40,18 +41,22 @@ def from_result(cls, result: dict) -> Self:
4041
if not (rule_id := result.get("rule", None) or result.get("ruleKey", None)):
4142
raise ValueError("Could not extract rule id from sarif result.")
4243

43-
locations: list[Location] = (
44+
locations: Sequence[Location] = tuple(
4445
[SonarLocation.from_json_location(result)]
4546
if result.get("textRange")
4647
else []
4748
)
48-
all_flows: list[list[Location]] = [
49+
all_flows: Sequence[Sequence[Location]] = tuple(
4950
[
50-
SonarLocation.from_json_location(json_location)
51-
for json_location in flow.get("locations", {})
51+
tuple(
52+
[
53+
SonarLocation.from_json_location(json_location)
54+
for json_location in flow.get("locations", {})
55+
]
56+
)
57+
for flow in result.get("flows", [])
5258
]
53-
for flow in result.get("flows", [])
54-
]
59+
)
5560

5661
finding_id = result.get("key", rule_id)
5762

0 commit comments

Comments
 (0)