33import logging
44import os
55import sys
6+ from collections import defaultdict
67from pathlib import Path
78from typing import DefaultDict , Sequence
89
1415from codemodder .context import CodemodExecutionContext
1516from codemodder .dependency import Dependency
1617from codemodder .llm import MisconfiguredAIClient
17- from codemodder .logging import configure_logger , log_list , log_section , logger
18+ from codemodder .logging import (
19+ OutputFormat ,
20+ configure_logger ,
21+ log_list ,
22+ log_section ,
23+ logger ,
24+ )
1825from codemodder .project_analysis .file_parsers .package_store import PackageStore
1926from codemodder .project_analysis .python_repo_manager import PythonRepoManager
2027from codemodder .result import ResultSet
@@ -45,7 +52,7 @@ def find_semgrep_results(
4552 return run_semgrep (context , yaml_files , files_to_analyze )
4653
4754
48- def log_report (context , argv , elapsed_ms , files_to_analyze ):
55+ def log_report (context , output , elapsed_ms , files_to_analyze ):
4956 log_section ("report" )
5057 logger .info ("scanned: %s files" , len (files_to_analyze ))
5158 all_failures = context .get_failed_files ()
@@ -60,7 +67,7 @@ def log_report(context, argv, elapsed_ms, files_to_analyze):
6067 len (all_changes ),
6168 len (set (all_changes )),
6269 )
63- logger .info ("report file: %s" , argv . output )
70+ logger .info ("report file: %s" , output )
6471 logger .info ("total elapsed: %s ms" , elapsed_ms )
6572 logger .info (" semgrep: %s ms" , context .timer .get_time_ms ("semgrep" ))
6673 logger .info (" parse: %s ms" , context .timer .get_time_ms ("parse" ))
@@ -111,79 +118,79 @@ def record_dependency_update(dependency_results: dict[Dependency, PackageStore |
111118 logger .debug ("The following dependencies could not be added: %s" , str_list )
112119
113120
114- def run (original_args ) -> int :
121+ def run (
122+ directory : Path | str ,
123+ dry_run : bool ,
124+ output : Path | str | None = None ,
125+ output_format : str = "codetf" ,
126+ verbose : bool = False ,
127+ log_format : OutputFormat = OutputFormat .JSON ,
128+ project_name : str | None = None ,
129+ tool_result_files_map : DefaultDict [str , list [str ]] = defaultdict (list ),
130+ path_include : list [str ] | None = None ,
131+ path_exclude : list [str ] | None = None ,
132+ codemod_include : list [str ] | None = None ,
133+ codemod_exclude : list [str ] | None = None ,
134+ max_workers : int = 1 ,
135+ original_cli_args : list [str ] | None = None ,
136+ codemod_registry : registry .CodemodRegistry | None = None ,
137+ sast_only : bool = False ,
138+ ) -> tuple [CodeTF | None , int ]:
115139 start = datetime .datetime .now ()
116140
117- codemod_registry = registry .load_registered_codemods ()
118- provider_registry = providers .load_providers ()
141+ codemod_registry = codemod_registry or registry .load_registered_codemods ()
119142
120- # A little awkward, but we need the codemod registry in order to validate potential arguments
121- argv = parse_args (original_args , codemod_registry )
122- if not os .path .exists (argv .directory ):
123- logger .error (
124- "given directory '%s' doesn't exist or can’t be read" ,
125- argv .directory ,
126- )
127- return 1
143+ path_include = path_include or []
144+ path_exclude = path_exclude or []
145+ codemod_include = codemod_include or []
146+ codemod_exclude = codemod_exclude or []
147+
148+ provider_registry = providers .load_providers ()
128149
129- configure_logger (argv . verbose , argv . log_format , argv . project_name )
150+ configure_logger (verbose , log_format , project_name )
130151
131152 log_section ("startup" )
132153 logger .info ("codemodder: python/%s" , __version__ )
133- logger .info ("command: %s %s" , Path (sys .argv [0 ]).name , " " .join (original_args ))
134-
135- try :
136- # TODO: this should be dict[str, list[Path]]
137- tool_result_files_map : DefaultDict [str , list [str ]] = detect_sarif_tools (
138- [Path (name ) for name in argv .sarif or []]
139- )
140- except (DuplicateToolError , FileNotFoundError ) as err :
141- logger .error (err )
142- return 1
143-
144- tool_result_files_map ["sonar" ].extend (argv .sonar_issues_json or [])
145- tool_result_files_map ["sonar" ].extend (argv .sonar_hotspots_json or [])
146- tool_result_files_map ["defectdojo" ] = argv .defectdojo_findings_json or []
147154
148155 for file_name in itertools .chain (* tool_result_files_map .values ()):
149156 if not os .path .exists (file_name ):
150157 logger .error (
151158 f"FileNotFoundError: [Errno 2] No such file or directory: '{ file_name } '"
152159 )
153- return 1
160+ return None , 1
154161
155- repo_manager = PythonRepoManager (Path (argv . directory ))
162+ repo_manager = PythonRepoManager (Path (directory ))
156163
157164 try :
158165 context = CodemodExecutionContext (
159- Path (argv . directory ),
160- argv . dry_run ,
161- argv . verbose ,
166+ Path (directory ),
167+ dry_run ,
168+ verbose ,
162169 codemod_registry ,
163170 provider_registry ,
164171 repo_manager ,
165- argv . path_include ,
166- argv . path_exclude ,
172+ path_include ,
173+ path_exclude ,
167174 tool_result_files_map ,
168- argv . max_workers ,
175+ max_workers ,
169176 )
170177 except MisconfiguredAIClient as e :
171178 logger .error (e )
172- return 3 # Codemodder instructions conflicted (according to spec)
179+ return None , 3 # Codemodder instructions conflicted (according to spec)
173180
174- repo_manager .parse_project ()
181+ context . repo_manager .parse_project ()
175182
176183 # TODO: this should be a method of CodemodExecutionContext
177184 codemods_to_run = codemod_registry .match_codemods (
178- argv . codemod_include ,
179- argv . codemod_exclude ,
180- sast_only = argv . sonar_issues_json or argv . sarif ,
185+ codemod_include ,
186+ codemod_exclude ,
187+ sast_only = sast_only ,
181188 )
182189
183190 log_section ("setup" )
184191 log_list (logging .INFO , "running" , codemods_to_run , predicate = lambda c : c .id )
185192 log_list (logging .INFO , "including paths" , context .included_paths )
186- log_list (logging .INFO , "excluding paths" , argv . path_exclude )
193+ log_list (logging .INFO , "excluding paths" , path_exclude )
187194
188195 log_list (
189196 logging .DEBUG , "matched files" , (str (path ) for path in context .files_to_analyze )
@@ -203,24 +210,71 @@ def run(original_args) -> int:
203210 elapsed = datetime .datetime .now () - start
204211 elapsed_ms = int (elapsed .total_seconds () * 1000 )
205212
206- if argv .output :
207- codetf = CodeTF .build (
208- context ,
209- elapsed_ms ,
210- original_args ,
211- context .compile_results (codemods_to_run ),
212- )
213- codetf .write_report (argv .output )
213+ logger .debug ("Output format %s" , output_format )
214+ codetf = CodeTF .build (
215+ context ,
216+ elapsed_ms ,
217+ original_cli_args or [],
218+ context .compile_results (codemods_to_run ),
219+ )
220+ if output :
221+ codetf .write_report (output )
214222
215223 log_report (
216224 context ,
217- argv ,
225+ output ,
218226 elapsed_ms ,
219227 [] if not codemods_to_run else context .files_to_analyze ,
220228 )
221- return 0
229+ return codetf , 0
230+
231+
232+ def _run_cli (original_args ) -> int :
233+ codemod_registry = registry .load_registered_codemods ()
234+ argv = parse_args (original_args , codemod_registry )
235+ if not os .path .exists (argv .directory ):
236+ logger .error (
237+ "given directory '%s' doesn't exist or can’t be read" ,
238+ argv .directory ,
239+ )
240+ return 1
241+
242+ try :
243+ # TODO: this should be dict[str, list[Path]]
244+ tool_result_files_map : DefaultDict [str , list [str ]] = detect_sarif_tools (
245+ [Path (name ) for name in argv .sarif or []]
246+ )
247+ except (DuplicateToolError , FileNotFoundError ) as err :
248+ logger .error (err )
249+ return 1
250+
251+ tool_result_files_map ["sonar" ].extend (argv .sonar_issues_json or [])
252+ tool_result_files_map ["sonar" ].extend (argv .sonar_hotspots_json or [])
253+ tool_result_files_map ["defectdojo" ].extend (argv .defectdojo_findings_json or [])
254+
255+ logger .info ("command: %s %s" , Path (sys .argv [0 ]).name , " " .join (original_args ))
256+
257+ _ , status = run (
258+ argv .directory ,
259+ argv .dry_run ,
260+ argv .output ,
261+ argv .output_format ,
262+ argv .verbose ,
263+ argv .log_format ,
264+ argv .project_name ,
265+ tool_result_files_map ,
266+ argv .path_include ,
267+ argv .path_exclude ,
268+ argv .codemod_include ,
269+ argv .codemod_exclude ,
270+ max_workers = argv .max_workers ,
271+ original_cli_args = original_args ,
272+ codemod_registry = codemod_registry ,
273+ sast_only = argv .sonar_issues_json or argv .sarif ,
274+ )
275+ return status
222276
223277
224278def main ():
225279 sys_argv = sys .argv [1 :]
226- sys .exit (run (sys_argv ))
280+ sys .exit (_run_cli (sys_argv ))
0 commit comments