Skip to content

Commit 9074d9a

Browse files
committed
Added integration tests for new behavior
1 parent 669c59d commit 9074d9a

File tree

3 files changed

+315
-17
lines changed

3 files changed

+315
-17
lines changed
Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,19 @@
1-
from codemodder.codemods.test import SonarIntegrationTest
1+
from codemodder.codemods.test.integration_utils import SonarRemediationIntegrationTest
22
from core_codemods.sonar.sonar_jwt_decode_verify import (
33
JwtDecodeVerifySASTTransformer,
44
SonarJwtDecodeVerify,
55
)
66

77

8-
class TestJwtDecodeVerify(SonarIntegrationTest):
8+
class TestJwtDecodeVerify(SonarRemediationIntegrationTest):
99
codemod = SonarJwtDecodeVerify
1010
code_path = "tests/samples/jwt_decode_verify.py"
11-
replacement_lines = [
12-
(
13-
11,
14-
"""decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n""",
15-
),
16-
(
17-
12,
18-
"""decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n""",
19-
),
11+
12+
expected_diff_per_change = [
13+
'--- \n+++ \n@@ -8,7 +8,7 @@\n \n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n \n var = "something"\n',
14+
'--- \n+++ \n@@ -9,6 +9,6 @@\n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n \n var = "something"\n',
2015
]
2116

22-
expected_diff = '--- \n+++ \n@@ -8,7 +8,7 @@\n \n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n \n var = "something"\n'
23-
expected_line_change = "11"
17+
expected_lines_changed = [11, 12]
2418
num_changes = 2
2519
change_description = JwtDecodeVerifySASTTransformer.change_description

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ test = [
8181
"flask_wtf~=1.2.0",
8282
"fickling~=0.1.0,>=0.1.3",
8383
"graphql-server~=3.0.0b7",
84+
"unidiff>=0.75",
8485
]
8586
complexity = [
8687
"radon==6.0.*",

src/codemodder/codemods/test/integration_utils.py

Lines changed: 307 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from types import ModuleType
1010

1111
import jsonschema
12+
import unidiff
1213

1314
from codemodder import __version__
1415
from core_codemods.sonar.api import process_sonar_findings
@@ -35,6 +36,252 @@ def check_dependencies_after(self):
3536
assert new_requirements_txt == self.expected_requirements
3637

3738

39+
class BaseRemediationIntegrationTest:
40+
codemod = NotImplementedError
41+
original_code = NotImplementedError
42+
expected_diff_per_change = NotImplementedError
43+
num_changes = 1
44+
num_changed_files = 1
45+
allowed_exceptions = ()
46+
sonar_issues_json: str | None = None
47+
sonar_hotspots_json: str | None = None
48+
49+
@classmethod
50+
def setup_class(cls):
51+
codemod_id = (
52+
cls.codemod().id if isinstance(cls.codemod, type) else cls.codemod.id
53+
)
54+
cls.codemod_instance = validate_codemod_registration(codemod_id)
55+
56+
cls.output_path = tempfile.mkstemp()[1]
57+
cls.code_dir = tempfile.mkdtemp()
58+
59+
if not hasattr(cls, "code_filename"):
60+
# Only a few codemods require the analyzed file to have a specific filename
61+
# All others can just be `code.py`
62+
cls.code_filename = "code.py"
63+
64+
cls.code_path = os.path.join(cls.code_dir, cls.code_filename)
65+
66+
if cls.code_filename == "settings.py" and "Django" in str(cls):
67+
# manage.py must be in the directory above settings.py for this codemod to run
68+
parent_dir = Path(cls.code_dir).parent
69+
manage_py_path = parent_dir / "manage.py"
70+
manage_py_path.touch()
71+
72+
@classmethod
73+
def teardown_class(cls):
74+
"""Ensure any re-written file is undone after integration test class"""
75+
pass
76+
77+
def _assert_run_fields(self, run, output_path):
78+
assert run["vendor"] == "pixee"
79+
assert run["tool"] == "codemodder-python"
80+
assert run["version"] == __version__
81+
assert run["elapsed"] != ""
82+
assert run[
83+
"commandLine"
84+
] == f'codemodder {self.code_dir} --output {output_path} --codemod-include={self.codemod_instance.id} --path-include={self.code_filename} --path-exclude=""' + (
85+
f" --sonar-issues-json={self.sonar_issues_json}"
86+
if self.sonar_issues_json
87+
else ""
88+
) + (
89+
f" --sonar-hotspots-json={self.sonar_hotspots_json}"
90+
if self.sonar_hotspots_json
91+
else ""
92+
)
93+
assert run["directory"] == os.path.abspath(self.code_dir)
94+
assert run["sarifs"] == []
95+
96+
def _assert_results_fields(self, results, output_path):
97+
assert len(results) == 1
98+
result = results[0]
99+
assert result["codemod"] == self.codemod_instance.id
100+
assert result["references"] == [
101+
ref.model_dump(exclude_none=True)
102+
for ref in self.codemod_instance.references
103+
]
104+
105+
assert ("detectionTool" in result) == bool(self.sonar_issues_json)
106+
assert ("detectionTool" in result) == bool(self.sonar_hotspots_json)
107+
108+
# TODO: if/when we add description for each url
109+
for reference in result["references"][
110+
# Last references for Sonar has a different description
111+
: (
112+
-len(self.codemod.requested_rules)
113+
if self.sonar_issues_json or self.sonar_hotspots_json
114+
else None
115+
)
116+
]:
117+
assert reference["url"] == reference["description"]
118+
119+
self._assert_sonar_fields(result)
120+
121+
# There should be a changeset for every expected change
122+
assert len(result["changeset"]) == self.num_changes
123+
# gather all the change files and test against the expected number
124+
assert len({c["path"] for c in result["changeset"]}) == self.num_changed_files
125+
126+
# A codemod may change multiple files. For now we will
127+
# assert the resulting data for one file only.
128+
changes = [
129+
result for result in result["changeset"] if result["path"] == output_path
130+
]
131+
assert {c["path"] for c in changes} == {output_path}
132+
133+
changes_diff = [c["diff"] for c in changes]
134+
print(changes_diff)
135+
assert changes_diff == self.expected_diff_per_change
136+
137+
assert len(changes) == self.num_changes
138+
lines_changed = [c["changes"][0]["lineNumber"] for c in changes]
139+
assert lines_changed == self.expected_lines_changed
140+
assert {c["changes"][0]["description"] for c in changes} == {
141+
self.change_description
142+
}
143+
144+
def _assert_sonar_fields(self, result):
145+
del result
146+
147+
def _assert_codetf_output(self, codetf_schema):
148+
with open(self.output_path, "r", encoding="utf-8") as f:
149+
codetf = json.load(f)
150+
151+
jsonschema.validate(codetf, codetf_schema)
152+
153+
assert sorted(codetf.keys()) == ["results", "run"]
154+
run = codetf["run"]
155+
self._assert_run_fields(run, self.output_path)
156+
results = codetf["results"]
157+
# CodeTf2 spec requires relative paths
158+
self._assert_results_fields(results, self.code_filename)
159+
160+
def test_codetf_output(self, codetf_schema):
161+
"""
162+
Tests correct codetf output.
163+
"""
164+
command = [
165+
"codemodder",
166+
self.code_dir,
167+
"--output",
168+
self.output_path,
169+
f"--codemod-include={self.codemod_instance.id}",
170+
f"--path-include={self.code_filename}",
171+
'--path-exclude=""',
172+
]
173+
174+
if self.sonar_issues_json:
175+
command.append(f"--sonar-issues-json={self.sonar_issues_json}")
176+
if self.sonar_hotspots_json:
177+
command.append(f"--sonar-hotspots-json={self.sonar_hotspots_json}")
178+
179+
completed_process = subprocess.run(
180+
command,
181+
check=False,
182+
shell=False,
183+
)
184+
assert completed_process.returncode == 0
185+
186+
self._assert_codetf_output(codetf_schema)
187+
patched_codes = self._get_patched_code_for_each_change()
188+
self._check_code_after(patched_codes)
189+
190+
def apply_hunk_to_lines(self, lines, hunk):
191+
# The hunk target line numbers are 1-indexed.
192+
start_index = hunk.target_start - 1
193+
new_lines = lines[:start_index]
194+
orig_index = start_index
195+
196+
for hunk_line in hunk:
197+
if hunk_line.is_context:
198+
# For a context line, check that content matches.
199+
if orig_index >= len(lines):
200+
raise ValueError(
201+
"Context line beyond available lines: " + hunk_line.value
202+
)
203+
if lines[orig_index].rstrip("\n") != hunk_line.value.rstrip("\n"):
204+
raise ValueError(
205+
"Context line mismatch:\nExpected: "
206+
+ lines[orig_index]
207+
+ "\nGot: "
208+
+ hunk_line.value
209+
)
210+
new_lines.append(lines[orig_index])
211+
orig_index += 1
212+
elif hunk_line.is_removed:
213+
# Expect the original line to match, but then skip it.
214+
if orig_index >= len(lines):
215+
raise ValueError(
216+
"Removal line beyond available lines: " + hunk_line.value
217+
)
218+
if lines[orig_index].rstrip("\n") != hunk_line.value.rstrip("\n"):
219+
raise ValueError(
220+
"Removal line mismatch:\nExpected: "
221+
+ lines[orig_index]
222+
+ "\nGot: "
223+
+ hunk_line.value
224+
)
225+
orig_index += 1
226+
elif hunk_line.is_added:
227+
# For an added line, insert the new content.
228+
new_lines.append(hunk_line.value)
229+
# Append any remaining lines after the hunk.
230+
new_lines.extend(lines[orig_index:])
231+
return new_lines
232+
233+
def apply_diff(self, diff_str, original_str):
234+
# unidiff expect the hunk header to have a filename, append it
235+
diff_lines = diff_str.splitlines()
236+
patched_diff = []
237+
for line in diff_lines:
238+
if line.startswith("+++") or line.startswith("---"):
239+
line = line + " " + self.code_filename
240+
patched_diff.append(line)
241+
fixed_diff_str = "\n".join(patched_diff)
242+
243+
patch_set = unidiff.PatchSet(fixed_diff_str)
244+
245+
# Make a list of lines from the original string.
246+
# Assumes original_str uses newline characters.
247+
patched_lines = original_str.splitlines(keepends=True)
248+
249+
# For simplicity, assume the diff only contains modifications for one file.
250+
if len(patch_set) != 1:
251+
raise ValueError("Only single-file patches are supported in this example.")
252+
253+
file_patch = list(patch_set)[0]
254+
# Process each hunk from the patch sequentially.
255+
for hunk in file_patch:
256+
try:
257+
patched_lines = self.apply_hunk_to_lines(patched_lines, hunk)
258+
except ValueError as e:
259+
print("Error applying hunk:", e)
260+
sys.exit(1)
261+
262+
return "".join(patched_lines)
263+
264+
def _get_patched_code_for_each_change(self) -> list[str]:
265+
with open(self.output_path, "r", encoding="utf-8") as f:
266+
codetf = json.load(f)
267+
changes = codetf["results"][0]["changeset"]
268+
patched_codes = []
269+
with open(self.code_path, "r", encoding="utf-8") as f: # type: ignore
270+
original_code = f.read()
271+
for c in changes:
272+
patched_codes.append(self.apply_diff(c["diff"], original_code))
273+
return patched_codes
274+
275+
def _check_code_after(self, patched_codes):
276+
"""
277+
Check if each change will produce executable code.
278+
"""
279+
for patched_code in patched_codes:
280+
execute_code(
281+
code=patched_code, allowed_exceptions=self.allowed_exceptions # type: ignore
282+
)
283+
284+
38285
class BaseIntegrationTest(DependencyTestMixin):
39286
codemod = NotImplementedError
40287
original_code = NotImplementedError
@@ -166,10 +413,6 @@ def _assert_codetf_output(self, codetf_schema):
166413
# CodeTf2 spec requires relative paths
167414
self._assert_results_fields(results, self.code_filename)
168415

169-
def write_original_code(self):
170-
with open(self.code_path, "w", encoding="utf-8") as f:
171-
f.write(self.original_code)
172-
173416
def check_code_after(self) -> ModuleType:
174417
with open(self.code_path, "r", encoding="utf-8") as f: # type: ignore
175418
new_code = f.read()
@@ -178,6 +421,10 @@ def check_code_after(self) -> ModuleType:
178421
path=self.code_path, allowed_exceptions=self.allowed_exceptions # type: ignore
179422
)
180423

424+
def write_original_code(self):
425+
with open(self.code_path, "w", encoding="utf-8") as f:
426+
f.write(self.original_code)
427+
181428
def test_file_rewritten(self, codetf_schema):
182429
"""
183430
Tests that file is re-written correctly with new code and correct codetf output.
@@ -238,6 +485,62 @@ def _run_idempotency_check(self, command):
238485
sys.path.append(SAMPLES_DIR)
239486

240487

488+
class SonarRemediationIntegrationTest(BaseRemediationIntegrationTest):
489+
"""
490+
Sonar integration tests must use code from a file in tests/samples
491+
because those files are what appears in sonar_issues.json
492+
"""
493+
494+
code_path = NotImplementedError
495+
sonar_issues_json = "tests/samples/sonar_issues.json"
496+
sonar_hotspots_json = "tests/samples/sonar_hotspots.json"
497+
498+
@classmethod
499+
def setup_class(cls):
500+
codemod_id = (
501+
cls.codemod().id if isinstance(cls.codemod, type) else cls.codemod.id
502+
)
503+
cls.codemod_instance = validate_codemod_registration(codemod_id)
504+
505+
cls.output_path = tempfile.mkstemp()[1]
506+
cls.code_dir = SAMPLES_DIR
507+
cls.code_filename = os.path.relpath(cls.code_path, SAMPLES_DIR)
508+
509+
# TODO: support sonar integration tests that add a dependency to
510+
# `requirements_file_name`. These tests would not be able to run
511+
# in parallel at this time since they would all override the same
512+
# tests/samples/requirements.txt file, unless we change that to
513+
# a temporary file.
514+
cls.check_sonar_issues()
515+
516+
@classmethod
517+
def check_sonar_issues(cls):
518+
sonar_results = process_sonar_findings(
519+
(cls.sonar_issues_json, cls.sonar_hotspots_json)
520+
)
521+
522+
assert any(
523+
x in sonar_results for x in cls.codemod.requested_rules
524+
), f"Make sure to add a sonar issue/hotspot for {cls.codemod.rule_id} in {cls.sonar_issues_json} or {cls.sonar_hotspots_json}"
525+
results_for_codemod = sonar_results[cls.codemod.requested_rules[-1]]
526+
file_path = pathlib.Path(cls.code_filename)
527+
assert (
528+
file_path in results_for_codemod
529+
), f"Make sure to add a sonar issue/hotspot for file `{cls.code_filename}` under one of the rules `{cls.codemod.requested_rules}`in {cls.sonar_issues_json} or {cls.sonar_hotspots_json}"
530+
531+
def _assert_sonar_fields(self, result):
532+
assert self.codemod_instance._metadata.tool is not None
533+
rules = self.codemod_instance._metadata.tool.rules
534+
for i in range(len(rules)):
535+
assert (
536+
result["references"][len(result["references"]) - len(rules) + i][
537+
"description"
538+
]
539+
== self.codemod_instance._metadata.tool.rules[i].name
540+
)
541+
assert result["detectionTool"]["name"] == "Sonar"
542+
543+
241544
class SonarIntegrationTest(BaseIntegrationTest):
242545
"""
243546
Sonar integration tests must use code from a file in tests/samples

0 commit comments

Comments
 (0)