99from types import ModuleType
1010
1111import jsonschema
12+ import unidiff
1213
1314from codemodder import __version__
1415from core_codemods .sonar .api import process_sonar_findings
@@ -35,6 +36,252 @@ def check_dependencies_after(self):
3536 assert new_requirements_txt == self .expected_requirements
3637
3738
39+ class BaseRemediationIntegrationTest :
40+ codemod = NotImplementedError
41+ original_code = NotImplementedError
42+ expected_diff_per_change = NotImplementedError
43+ num_changes = 1
44+ num_changed_files = 1
45+ allowed_exceptions = ()
46+ sonar_issues_json : str | None = None
47+ sonar_hotspots_json : str | None = None
48+
49+ @classmethod
50+ def setup_class (cls ):
51+ codemod_id = (
52+ cls .codemod ().id if isinstance (cls .codemod , type ) else cls .codemod .id
53+ )
54+ cls .codemod_instance = validate_codemod_registration (codemod_id )
55+
56+ cls .output_path = tempfile .mkstemp ()[1 ]
57+ cls .code_dir = tempfile .mkdtemp ()
58+
59+ if not hasattr (cls , "code_filename" ):
60+ # Only a few codemods require the analyzed file to have a specific filename
61+ # All others can just be `code.py`
62+ cls .code_filename = "code.py"
63+
64+ cls .code_path = os .path .join (cls .code_dir , cls .code_filename )
65+
66+ if cls .code_filename == "settings.py" and "Django" in str (cls ):
67+ # manage.py must be in the directory above settings.py for this codemod to run
68+ parent_dir = Path (cls .code_dir ).parent
69+ manage_py_path = parent_dir / "manage.py"
70+ manage_py_path .touch ()
71+
72+ @classmethod
73+ def teardown_class (cls ):
74+ """Ensure any re-written file is undone after integration test class"""
75+ pass
76+
77+ def _assert_run_fields (self , run , output_path ):
78+ assert run ["vendor" ] == "pixee"
79+ assert run ["tool" ] == "codemodder-python"
80+ assert run ["version" ] == __version__
81+ assert run ["elapsed" ] != ""
82+ assert run [
83+ "commandLine"
84+ ] == f'codemodder { self .code_dir } --output { output_path } --codemod-include={ self .codemod_instance .id } --path-include={ self .code_filename } --path-exclude=""' + (
85+ f" --sonar-issues-json={ self .sonar_issues_json } "
86+ if self .sonar_issues_json
87+ else ""
88+ ) + (
89+ f" --sonar-hotspots-json={ self .sonar_hotspots_json } "
90+ if self .sonar_hotspots_json
91+ else ""
92+ )
93+ assert run ["directory" ] == os .path .abspath (self .code_dir )
94+ assert run ["sarifs" ] == []
95+
96+ def _assert_results_fields (self , results , output_path ):
97+ assert len (results ) == 1
98+ result = results [0 ]
99+ assert result ["codemod" ] == self .codemod_instance .id
100+ assert result ["references" ] == [
101+ ref .model_dump (exclude_none = True )
102+ for ref in self .codemod_instance .references
103+ ]
104+
105+ assert ("detectionTool" in result ) == bool (self .sonar_issues_json )
106+ assert ("detectionTool" in result ) == bool (self .sonar_hotspots_json )
107+
108+ # TODO: if/when we add description for each url
109+ for reference in result ["references" ][
110+ # Last references for Sonar has a different description
111+ : (
112+ - len (self .codemod .requested_rules )
113+ if self .sonar_issues_json or self .sonar_hotspots_json
114+ else None
115+ )
116+ ]:
117+ assert reference ["url" ] == reference ["description" ]
118+
119+ self ._assert_sonar_fields (result )
120+
121+ # There should be a changeset for every expected change
122+ assert len (result ["changeset" ]) == self .num_changes
123+ # gather all the change files and test against the expected number
124+ assert len ({c ["path" ] for c in result ["changeset" ]}) == self .num_changed_files
125+
126+ # A codemod may change multiple files. For now we will
127+ # assert the resulting data for one file only.
128+ changes = [
129+ result for result in result ["changeset" ] if result ["path" ] == output_path
130+ ]
131+ assert {c ["path" ] for c in changes } == {output_path }
132+
133+ changes_diff = [c ["diff" ] for c in changes ]
134+ print (changes_diff )
135+ assert changes_diff == self .expected_diff_per_change
136+
137+ assert len (changes ) == self .num_changes
138+ lines_changed = [c ["changes" ][0 ]["lineNumber" ] for c in changes ]
139+ assert lines_changed == self .expected_lines_changed
140+ assert {c ["changes" ][0 ]["description" ] for c in changes } == {
141+ self .change_description
142+ }
143+
144+ def _assert_sonar_fields (self , result ):
145+ del result
146+
147+ def _assert_codetf_output (self , codetf_schema ):
148+ with open (self .output_path , "r" , encoding = "utf-8" ) as f :
149+ codetf = json .load (f )
150+
151+ jsonschema .validate (codetf , codetf_schema )
152+
153+ assert sorted (codetf .keys ()) == ["results" , "run" ]
154+ run = codetf ["run" ]
155+ self ._assert_run_fields (run , self .output_path )
156+ results = codetf ["results" ]
157+ # CodeTf2 spec requires relative paths
158+ self ._assert_results_fields (results , self .code_filename )
159+
160+ def test_codetf_output (self , codetf_schema ):
161+ """
162+ Tests correct codetf output.
163+ """
164+ command = [
165+ "codemodder" ,
166+ self .code_dir ,
167+ "--output" ,
168+ self .output_path ,
169+ f"--codemod-include={ self .codemod_instance .id } " ,
170+ f"--path-include={ self .code_filename } " ,
171+ '--path-exclude=""' ,
172+ ]
173+
174+ if self .sonar_issues_json :
175+ command .append (f"--sonar-issues-json={ self .sonar_issues_json } " )
176+ if self .sonar_hotspots_json :
177+ command .append (f"--sonar-hotspots-json={ self .sonar_hotspots_json } " )
178+
179+ completed_process = subprocess .run (
180+ command ,
181+ check = False ,
182+ shell = False ,
183+ )
184+ assert completed_process .returncode == 0
185+
186+ self ._assert_codetf_output (codetf_schema )
187+ patched_codes = self ._get_patched_code_for_each_change ()
188+ self ._check_code_after (patched_codes )
189+
190+ def apply_hunk_to_lines (self , lines , hunk ):
191+ # The hunk target line numbers are 1-indexed.
192+ start_index = hunk .target_start - 1
193+ new_lines = lines [:start_index ]
194+ orig_index = start_index
195+
196+ for hunk_line in hunk :
197+ if hunk_line .is_context :
198+ # For a context line, check that content matches.
199+ if orig_index >= len (lines ):
200+ raise ValueError (
201+ "Context line beyond available lines: " + hunk_line .value
202+ )
203+ if lines [orig_index ].rstrip ("\n " ) != hunk_line .value .rstrip ("\n " ):
204+ raise ValueError (
205+ "Context line mismatch:\n Expected: "
206+ + lines [orig_index ]
207+ + "\n Got: "
208+ + hunk_line .value
209+ )
210+ new_lines .append (lines [orig_index ])
211+ orig_index += 1
212+ elif hunk_line .is_removed :
213+ # Expect the original line to match, but then skip it.
214+ if orig_index >= len (lines ):
215+ raise ValueError (
216+ "Removal line beyond available lines: " + hunk_line .value
217+ )
218+ if lines [orig_index ].rstrip ("\n " ) != hunk_line .value .rstrip ("\n " ):
219+ raise ValueError (
220+ "Removal line mismatch:\n Expected: "
221+ + lines [orig_index ]
222+ + "\n Got: "
223+ + hunk_line .value
224+ )
225+ orig_index += 1
226+ elif hunk_line .is_added :
227+ # For an added line, insert the new content.
228+ new_lines .append (hunk_line .value )
229+ # Append any remaining lines after the hunk.
230+ new_lines .extend (lines [orig_index :])
231+ return new_lines
232+
233+ def apply_diff (self , diff_str , original_str ):
234+ # unidiff expect the hunk header to have a filename, append it
235+ diff_lines = diff_str .splitlines ()
236+ patched_diff = []
237+ for line in diff_lines :
238+ if line .startswith ("+++" ) or line .startswith ("---" ):
239+ line = line + " " + self .code_filename
240+ patched_diff .append (line )
241+ fixed_diff_str = "\n " .join (patched_diff )
242+
243+ patch_set = unidiff .PatchSet (fixed_diff_str )
244+
245+ # Make a list of lines from the original string.
246+ # Assumes original_str uses newline characters.
247+ patched_lines = original_str .splitlines (keepends = True )
248+
249+ # For simplicity, assume the diff only contains modifications for one file.
250+ if len (patch_set ) != 1 :
251+ raise ValueError ("Only single-file patches are supported in this example." )
252+
253+ file_patch = list (patch_set )[0 ]
254+ # Process each hunk from the patch sequentially.
255+ for hunk in file_patch :
256+ try :
257+ patched_lines = self .apply_hunk_to_lines (patched_lines , hunk )
258+ except ValueError as e :
259+ print ("Error applying hunk:" , e )
260+ sys .exit (1 )
261+
262+ return "" .join (patched_lines )
263+
264+ def _get_patched_code_for_each_change (self ) -> list [str ]:
265+ with open (self .output_path , "r" , encoding = "utf-8" ) as f :
266+ codetf = json .load (f )
267+ changes = codetf ["results" ][0 ]["changeset" ]
268+ patched_codes = []
269+ with open (self .code_path , "r" , encoding = "utf-8" ) as f : # type: ignore
270+ original_code = f .read ()
271+ for c in changes :
272+ patched_codes .append (self .apply_diff (c ["diff" ], original_code ))
273+ return patched_codes
274+
275+ def _check_code_after (self , patched_codes ):
276+ """
277+ Check if each change will produce executable code.
278+ """
279+ for patched_code in patched_codes :
280+ execute_code (
281+ code = patched_code , allowed_exceptions = self .allowed_exceptions # type: ignore
282+ )
283+
284+
38285class BaseIntegrationTest (DependencyTestMixin ):
39286 codemod = NotImplementedError
40287 original_code = NotImplementedError
@@ -166,10 +413,6 @@ def _assert_codetf_output(self, codetf_schema):
166413 # CodeTf2 spec requires relative paths
167414 self ._assert_results_fields (results , self .code_filename )
168415
169- def write_original_code (self ):
170- with open (self .code_path , "w" , encoding = "utf-8" ) as f :
171- f .write (self .original_code )
172-
173416 def check_code_after (self ) -> ModuleType :
174417 with open (self .code_path , "r" , encoding = "utf-8" ) as f : # type: ignore
175418 new_code = f .read ()
@@ -178,6 +421,10 @@ def check_code_after(self) -> ModuleType:
178421 path = self .code_path , allowed_exceptions = self .allowed_exceptions # type: ignore
179422 )
180423
424+ def write_original_code (self ):
425+ with open (self .code_path , "w" , encoding = "utf-8" ) as f :
426+ f .write (self .original_code )
427+
181428 def test_file_rewritten (self , codetf_schema ):
182429 """
183430 Tests that file is re-written correctly with new code and correct codetf output.
@@ -238,6 +485,62 @@ def _run_idempotency_check(self, command):
238485sys .path .append (SAMPLES_DIR )
239486
240487
488+ class SonarRemediationIntegrationTest (BaseRemediationIntegrationTest ):
489+ """
490+ Sonar integration tests must use code from a file in tests/samples
491+ because those files are what appears in sonar_issues.json
492+ """
493+
494+ code_path = NotImplementedError
495+ sonar_issues_json = "tests/samples/sonar_issues.json"
496+ sonar_hotspots_json = "tests/samples/sonar_hotspots.json"
497+
498+ @classmethod
499+ def setup_class (cls ):
500+ codemod_id = (
501+ cls .codemod ().id if isinstance (cls .codemod , type ) else cls .codemod .id
502+ )
503+ cls .codemod_instance = validate_codemod_registration (codemod_id )
504+
505+ cls .output_path = tempfile .mkstemp ()[1 ]
506+ cls .code_dir = SAMPLES_DIR
507+ cls .code_filename = os .path .relpath (cls .code_path , SAMPLES_DIR )
508+
509+ # TODO: support sonar integration tests that add a dependency to
510+ # `requirements_file_name`. These tests would not be able to run
511+ # in parallel at this time since they would all override the same
512+ # tests/samples/requirements.txt file, unless we change that to
513+ # a temporary file.
514+ cls .check_sonar_issues ()
515+
516+ @classmethod
517+ def check_sonar_issues (cls ):
518+ sonar_results = process_sonar_findings (
519+ (cls .sonar_issues_json , cls .sonar_hotspots_json )
520+ )
521+
522+ assert any (
523+ x in sonar_results for x in cls .codemod .requested_rules
524+ ), f"Make sure to add a sonar issue/hotspot for { cls .codemod .rule_id } in { cls .sonar_issues_json } or { cls .sonar_hotspots_json } "
525+ results_for_codemod = sonar_results [cls .codemod .requested_rules [- 1 ]]
526+ file_path = pathlib .Path (cls .code_filename )
527+ assert (
528+ file_path in results_for_codemod
529+ ), f"Make sure to add a sonar issue/hotspot for file `{ cls .code_filename } ` under one of the rules `{ cls .codemod .requested_rules } `in { cls .sonar_issues_json } or { cls .sonar_hotspots_json } "
530+
531+ def _assert_sonar_fields (self , result ):
532+ assert self .codemod_instance ._metadata .tool is not None
533+ rules = self .codemod_instance ._metadata .tool .rules
534+ for i in range (len (rules )):
535+ assert (
536+ result ["references" ][len (result ["references" ]) - len (rules ) + i ][
537+ "description"
538+ ]
539+ == self .codemod_instance ._metadata .tool .rules [i ].name
540+ )
541+ assert result ["detectionTool" ]["name" ] == "Sonar"
542+
543+
241544class SonarIntegrationTest (BaseIntegrationTest ):
242545 """
243546 Sonar integration tests must use code from a file in tests/samples
0 commit comments