Skip to content

Commit e268378

Browse files
committed
Added transform for CodeTFv2 models to CodeTFv3
1 parent 1779c8d commit e268378

File tree

3 files changed

+196
-9
lines changed

3 files changed

+196
-9
lines changed

src/codemodder/codetf/v3/codetf.py

Lines changed: 91 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
from pydantic import BaseModel, model_validator
77

88
from ..common import Change, CodeTFWriter, Finding, FixQuality
9+
from ..v2.codetf import AIMetadata as AIMetadatav2
10+
from ..v2.codetf import CodeTF as CodeTFv2
911
from ..v2.codetf import Finding as V2Finding
12+
from ..v2.codetf import Result
13+
from ..v2.codetf import Run as Runv2
1014

1115

1216
class Run(BaseModel):
@@ -15,17 +19,17 @@ class Run(BaseModel):
1519
vendor: str
1620
tool: str
1721
version: str
18-
# Optional free-form metadata about the project being analyzed
19-
# e.g. project name, directory, commit SHA, etc.
20-
projectMetadata: Optional[str] = None
21-
# Analysis duration in milliseconds
22-
elapsed: Optional[int] = None
23-
# Optional free-form metadata about the inputs used for the analysis
22+
# optional free-form metadata about the project being analyzed
23+
# e.g. project name, directory, commit sha, etc.
24+
projectmetadata: dict | None = None
25+
# analysis duration in milliseconds
26+
elapsed: int | None = None
27+
# optional free-form metadata about the inputs used for the analysis
2428
# e.g. command line, environment variables, etc.
25-
inputMetadata: Optional[dict] = None
26-
# Optional free-form metadata about the analysis itself
29+
inputmetadata: dict | None = None
30+
# optional free-form metadata about the analysis itself
2731
# e.g. timeouts, memory usage, etc.
28-
analysisMetadata: Optional[dict] = None
32+
analysismetadata: dict | None = None
2933

3034

3135
class FixStatusType(str, Enum):
@@ -116,3 +120,81 @@ def validate_fixMetadata(self):
116120
class CodeTF(CodeTFWriter, BaseModel):
117121
run: Run
118122
results: list[FixResult]
123+
124+
125+
def from_v2_run(run: Runv2) -> Run:
126+
project_metadata = {"directory": run.directory} | (
127+
{"projectName": run.projectName} if run.projectName else {}
128+
)
129+
input_metadata = {"commandLine": run.commandLine} | (
130+
{"sarifs": run.sarifs} if run.sarifs else {}
131+
)
132+
133+
return Run(
134+
vendor=run.vendor,
135+
tool=run.tool,
136+
version=run.version,
137+
elapsed=run.elapsed,
138+
projectmetadata=project_metadata,
139+
inputmetadata=input_metadata,
140+
)
141+
142+
143+
def from_v2_aimetadata(ai_metadata: AIMetadatav2) -> AIMetadata:
144+
return AIMetadata(
145+
provider=ai_metadata.provider,
146+
models=[ai_metadata.model] if ai_metadata.model else None,
147+
total_tokens=ai_metadata.tokens,
148+
completion_tokens=ai_metadata.completion_tokens,
149+
)
150+
151+
152+
def from_v2_result(result: Result) -> list[FixResult]:
153+
fix_results: list[FixResult] = []
154+
# generate fixed
155+
for cs in result.changeset:
156+
# No way of identifying hybrid AI codemods by the metadata alone
157+
generation_metadata = GenerationMetadata(
158+
strategy=Strategy.ai if cs.ai else Strategy.deterministic,
159+
ai=from_v2_aimetadata(cs.ai) if cs.ai else None,
160+
provisional=False,
161+
)
162+
for c in cs.changes:
163+
for f in c.fixedFindings or []:
164+
fix_metadata = FixMetadata(
165+
id=result.codemod,
166+
summary=result.summary,
167+
description=result.description,
168+
generation=generation_metadata,
169+
)
170+
# Retrieve diff from changeset since individual diffs per change may not exist
171+
# If the codetf was generated with per-finding, each ChangeSet will have a single change anyway
172+
changeset = ChangeSet(
173+
path=cs.path, diff=cs.diff, changes=[c.to_common()]
174+
)
175+
fix_results.append(
176+
FixResult(
177+
finding=f,
178+
fixStatus=FixStatus(status=FixStatusType.fixed),
179+
changeSets=[changeset],
180+
fixMetadata=fix_metadata,
181+
)
182+
)
183+
184+
# generate unfixed
185+
for f in result.unfixedFindings or []:
186+
fix_results.append(
187+
FixResult(
188+
finding=f,
189+
fixStatus=FixStatus(status=FixStatusType.failed, reason=f.reason),
190+
)
191+
)
192+
193+
return fix_results
194+
195+
196+
def from_v2(codetf: CodeTFv2) -> CodeTF:
197+
return CodeTF(
198+
run=from_v2_run(codetf.run),
199+
results=[fr for result in codetf.results for fr in from_v2_result(result)],
200+
)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"run":{"vendor":"pixee","tool":"codemodder-python","version":"6.2.3.dev2+gba1bb73","commandLine":"codemodder --dry-run repo --path-include=code.py --codemod-include=sonar:python/secure-tempfile --output out.codetf --sonar-json temp_sonar_issues.json --verbose","elapsed":206,"directory":"/home/andrecs/pixee/codemodder-python/repo","sarifs":[]},"results":[{"codemod":"sonar:python/secure-tempfile","summary":"Upgrade and Secure Temp File Creation","description":"This codemod replaces all `tempfile.mktemp` calls with the more secure `tempfile.NamedTemporaryFile`\n\nThe Python [tempfile documentation](https://docs.python.org/3/library/tempfile.html#tempfile.mktemp) is explicit that `tempfile.mktemp` should be deprecated to avoid an unsafe and unexpected race condition. `tempfile.mktemp` does not handle the possibility that the returned file name could already be used by another process by the time your code opens the file. A more secure approach to create temporary files is to use `tempfile.NamedTemporaryFile` which will create the file for you and handle all security conditions. \n\nThe changes from this codemod look like this:\n\n```diff\n import tempfile\n- filename = tempfile.mktemp()\n+ with tempfile.NamedTemporaryFile(delete=False) as tf:\n+ filename = tf.name\n```\n\nThe change sets `delete=False` to closely follow your code's intention when calling `tempfile.mktemp`. However, you should use this as a starting point to determine when your temporary file should be deleted.\n","detectionTool":{"name":"Sonar"},"references":[{"url":"https://docs.python.org/3/library/tempfile.html#tempfile.mktemp","description":"https://docs.python.org/3/library/tempfile.html#tempfile.mktemp"},{"url":"https://cwe.mitre.org/data/definitions/377","description":"https://cwe.mitre.org/data/definitions/377"},{"url":"https://cwe.mitre.org/data/definitions/379","description":"https://cwe.mitre.org/data/definitions/379"},{"url":"https://rules.sonarsource.com/python/RSPEC-5445/","description":"Insecure temporary file creation methods should not be used"}],"properties":{},"failedFiles":[],"changeset":[{"path":"code.py","diff":"--- \n+++ \n@@ -2,5 +2,7 @@\n \n tmp_file = open(tempfile.mktemp(), \"w+\")\n tmp_file.write(\"text\")\n-filename = tempfile.mktemp()\n-filename_2 = tempfile.mktemp()\n+with tempfile.NamedTemporaryFile(delete=False) as tf:\n+ filename = tf.name\n+with tempfile.NamedTemporaryFile(delete=False) as tf:\n+ filename_2 = tf.name\n","changes":[{"lineNumber":5,"description":"Replaces `tempfile.mktemp` with `tempfile.mkstemp`.","diffSide":"right","fixedFindings":[{"id":"2mzYQLBPCYSBxYekUmkYOzcfIBk=","rule":{"id":"python:S5445","name":"Insecure temporary file creation methods should not be used","url":"https://rules.sonarsource.com/python/RSPEC-5445/"}}]},{"lineNumber":6,"description":"Replaces `tempfile.mktemp` with `tempfile.mkstemp`.","diffSide":"right","fixedFindings":[{"id":"rsaOe8uxk1JZ/mBTOPQIuh4tLas=","rule":{"id":"python:S5445","name":"Insecure temporary file creation methods should not be used","url":"https://rules.sonarsource.com/python/RSPEC-5445/"}}]}],"strategy":"deterministic","provisional":false}],"unfixedFindings":[{"id":"DmwOEj9aQKWqDyQ4MpDBx/rxFQ4=","rule":{"id":"python:S5445","name":"python:S5445","url":"https://rules.sonarsource.com/python/RSPEC-5445/"},"path":"code.py","lineNumber":3,"reason":"Pixee does not yet support this fix."}]}]}

tests/test_codetf.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
Rule,
1818
)
1919
from codemodder.codetf.v3.codetf import Finding as FindingV3
20+
from codemodder.codetf.v3.codetf import FixStatusType, from_v2
2021

2122

2223
@pytest.fixture(autouse=True)
@@ -186,3 +187,106 @@ def test_v2_finding_id_optional():
186187
def test_v3_finding_id_not_optional():
187188
with pytest.raises(ValidationError):
188189
FindingV3(id=None, rule=Rule(id="foo", name="whatever")) # type: ignore[arg-type]
190+
191+
192+
def test_v2_to_v3_conversion():
193+
with open("tests/samples/codetfv2_sample.codetf", "r") as f:
194+
codetfv2 = CodeTF.model_validate_json(f.read())
195+
codetf = from_v2(codetfv2)
196+
197+
# run
198+
assert codetf.run
199+
assert codetf.run.vendor == codetfv2.run.vendor
200+
assert codetf.run.tool == codetfv2.run.tool
201+
assert codetf.run.version == codetfv2.run.version
202+
assert codetf.run.elapsed == codetfv2.run.elapsed
203+
204+
assert (
205+
codetf.run.projectmetadata
206+
and "directory" in codetf.run.projectmetadata.keys()
207+
and codetf.run.projectmetadata["directory"] == codetfv2.run.directory
208+
)
209+
assert (
210+
codetf.run.projectmetadata
211+
and "projectName" not in codetf.run.projectmetadata.keys()
212+
and not codetfv2.run.projectName
213+
)
214+
215+
assert (
216+
codetf.run.inputmetadata
217+
and "commandLine" in codetf.run.inputmetadata.keys()
218+
and codetf.run.inputmetadata["commandLine"] == codetfv2.run.commandLine
219+
)
220+
assert not codetfv2.run.sarifs
221+
assert (
222+
codetf.run.inputmetadata and "sarifs" not in codetf.run.inputmetadata.keys()
223+
)
224+
# results
225+
v2_unfixed = [f for r in codetfv2.results for f in r.unfixedFindings or []]
226+
v2_fixed = [
227+
f
228+
for r in codetfv2.results
229+
for cs in r.changeset
230+
for c in cs.changes
231+
for f in c.fixedFindings or []
232+
]
233+
unfixed = [
234+
fr for fr in codetf.results if fr.fixStatus.status == FixStatusType.failed
235+
]
236+
fixed = [
237+
fr for fr in codetf.results if fr.fixStatus.status == FixStatusType.fixed
238+
]
239+
240+
# length
241+
assert len(codetf.results) == len(v2_unfixed) + len(v2_fixed) == 3
242+
assert len(unfixed) == len(v2_unfixed) == 1
243+
assert len(fixed) == len(v2_fixed) == 2
244+
245+
assert len(codetfv2.results) == 1
246+
assert len(codetfv2.results[0].changeset) == 1
247+
v2result = codetfv2.results[0]
248+
v2changeset = codetfv2.results[0].changeset[0]
249+
v2_finding_to_change = {
250+
f: c
251+
for r in codetfv2.results
252+
for cs in r.changeset
253+
for c in cs.changes
254+
for f in c.fixedFindings or []
255+
}
256+
257+
for f in fixed:
258+
# fix metadata
259+
assert (
260+
f.fixMetadata
261+
and f.fixMetadata.generation
262+
and f.fixMetadata.generation.ai == v2changeset.ai
263+
)
264+
assert (
265+
f.fixMetadata
266+
and f.fixMetadata.id
267+
and f.fixMetadata.id == v2result.codemod
268+
)
269+
assert (
270+
f.fixMetadata
271+
and f.fixMetadata.summary
272+
and f.fixMetadata.summary == v2result.summary
273+
)
274+
assert (
275+
f.fixMetadata
276+
and f.fixMetadata.description
277+
and f.fixMetadata.description == v2result.description
278+
)
279+
280+
# correctly associates findings to the change
281+
assert f.changeSets and f.changeSets[0].path == v2changeset.path
282+
assert f.changeSets and f.changeSets[0].diff == v2changeset.diff
283+
assert isinstance(f.finding, Finding) and f.changeSets[0].changes == [
284+
v2_finding_to_change[f.finding].to_common()
285+
]
286+
287+
# unfixed metadata
288+
assert (
289+
unfixed[0].fixStatus.reason
290+
and unfixed[0].fixStatus.reason == v2_unfixed[0].reason
291+
)
292+
assert unfixed[0].finding == v2_unfixed[0]

0 commit comments

Comments
 (0)