Skip to content

Commit 1361e68

Browse files
Enable running codemodder as a library import (#879)
* First steps towards refactoring as a library * Make sure no threads are used for maxworkers=1 case * Fix up some types and defaults * organize run args * run returns output * make output a Path * make dry run required * fix sast only filtering --------- Co-authored-by: Daniel D'Avella <[email protected]>
1 parent c2560fc commit 1361e68

File tree

8 files changed

+207
-96
lines changed

8 files changed

+207
-96
lines changed

README.md

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ To install the package from source, use `pip`:
3030
$ pip install /path/to/codemodder-python
3131
```
3232

33-
## Running Locally
33+
## Running `codemodder`
3434

35-
The codemodder package provides an executable called `codemodder`. This should be available on your path by default after installation.
35+
### CLI
36+
37+
Codemodder can be run as a CLI. The codemodder package provides an executable called `codemodder`. This should be available on your path by default after installation.
3638

3739
For basic usage, run the `codemodder` command with a target directory path:
3840

@@ -55,6 +57,19 @@ For a full list of options, use the `--help` flag:
5557
$ codemodder --help
5658
```
5759

60+
### Library
61+
62+
You can also run `codemodder` as a library by importing the module and running `run`. For basic usage, pass a target directory path and the `dry_run` argument:
63+
64+
```python
65+
import codemodder
66+
67+
output, exit_code = codemodder.run("/path/to/my-project", dry_run=True)
68+
```
69+
70+
Unlike the CLI which has a default `dry_run` of `False`, when calling `codemodder` as a library you must indicate if you want `codemodder` to make changes to your files.
71+
72+
5873
## Architecture
5974

6075
Codemods are composed of the following key components:

src/codemodder/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@
22
from ._version import __version__
33
except ImportError: # pragma: no cover
44
__version__ = "unknown"
5+
6+
from codemodder.codemodder import run
7+
8+
__all__ = ["run", "__version__"]

src/codemodder/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ def parse_args(argv, codemod_registry: CodemodRegistry):
121121
parser.add_argument(
122122
"--dry-run",
123123
action=argparse.BooleanOptionalAction,
124+
default=False,
124125
help="do everything except make changes to files",
125126
)
126127
parser.add_argument(

src/codemodder/codemodder.py

Lines changed: 108 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import logging
44
import os
55
import sys
6+
from collections import defaultdict
67
from pathlib import Path
78
from typing import DefaultDict, Sequence
89

@@ -14,7 +15,13 @@
1415
from codemodder.context import CodemodExecutionContext
1516
from codemodder.dependency import Dependency
1617
from codemodder.llm import MisconfiguredAIClient
17-
from codemodder.logging import configure_logger, log_list, log_section, logger
18+
from codemodder.logging import (
19+
OutputFormat,
20+
configure_logger,
21+
log_list,
22+
log_section,
23+
logger,
24+
)
1825
from codemodder.project_analysis.file_parsers.package_store import PackageStore
1926
from codemodder.project_analysis.python_repo_manager import PythonRepoManager
2027
from codemodder.result import ResultSet
@@ -45,7 +52,7 @@ def find_semgrep_results(
4552
return run_semgrep(context, yaml_files, files_to_analyze)
4653

4754

48-
def log_report(context, argv, elapsed_ms, files_to_analyze):
55+
def log_report(context, output, elapsed_ms, files_to_analyze):
4956
log_section("report")
5057
logger.info("scanned: %s files", len(files_to_analyze))
5158
all_failures = context.get_failed_files()
@@ -60,7 +67,7 @@ def log_report(context, argv, elapsed_ms, files_to_analyze):
6067
len(all_changes),
6168
len(set(all_changes)),
6269
)
63-
logger.info("report file: %s", argv.output)
70+
logger.info("report file: %s", output)
6471
logger.info("total elapsed: %s ms", elapsed_ms)
6572
logger.info(" semgrep: %s ms", context.timer.get_time_ms("semgrep"))
6673
logger.info(" parse: %s ms", context.timer.get_time_ms("parse"))
@@ -111,79 +118,79 @@ def record_dependency_update(dependency_results: dict[Dependency, PackageStore |
111118
logger.debug("The following dependencies could not be added: %s", str_list)
112119

113120

114-
def run(original_args) -> int:
121+
def run(
122+
directory: Path | str,
123+
dry_run: bool,
124+
output: Path | str | None = None,
125+
output_format: str = "codetf",
126+
verbose: bool = False,
127+
log_format: OutputFormat = OutputFormat.JSON,
128+
project_name: str | None = None,
129+
tool_result_files_map: DefaultDict[str, list[str]] = defaultdict(list),
130+
path_include: list[str] | None = None,
131+
path_exclude: list[str] | None = None,
132+
codemod_include: list[str] | None = None,
133+
codemod_exclude: list[str] | None = None,
134+
max_workers: int = 1,
135+
original_cli_args: list[str] | None = None,
136+
codemod_registry: registry.CodemodRegistry | None = None,
137+
sast_only: bool = False,
138+
) -> tuple[CodeTF | None, int]:
115139
start = datetime.datetime.now()
116140

117-
codemod_registry = registry.load_registered_codemods()
118-
provider_registry = providers.load_providers()
141+
codemod_registry = codemod_registry or registry.load_registered_codemods()
119142

120-
# A little awkward, but we need the codemod registry in order to validate potential arguments
121-
argv = parse_args(original_args, codemod_registry)
122-
if not os.path.exists(argv.directory):
123-
logger.error(
124-
"given directory '%s' doesn't exist or can’t be read",
125-
argv.directory,
126-
)
127-
return 1
143+
path_include = path_include or []
144+
path_exclude = path_exclude or []
145+
codemod_include = codemod_include or []
146+
codemod_exclude = codemod_exclude or []
147+
148+
provider_registry = providers.load_providers()
128149

129-
configure_logger(argv.verbose, argv.log_format, argv.project_name)
150+
configure_logger(verbose, log_format, project_name)
130151

131152
log_section("startup")
132153
logger.info("codemodder: python/%s", __version__)
133-
logger.info("command: %s %s", Path(sys.argv[0]).name, " ".join(original_args))
134-
135-
try:
136-
# TODO: this should be dict[str, list[Path]]
137-
tool_result_files_map: DefaultDict[str, list[str]] = detect_sarif_tools(
138-
[Path(name) for name in argv.sarif or []]
139-
)
140-
except (DuplicateToolError, FileNotFoundError) as err:
141-
logger.error(err)
142-
return 1
143-
144-
tool_result_files_map["sonar"].extend(argv.sonar_issues_json or [])
145-
tool_result_files_map["sonar"].extend(argv.sonar_hotspots_json or [])
146-
tool_result_files_map["defectdojo"] = argv.defectdojo_findings_json or []
147154

148155
for file_name in itertools.chain(*tool_result_files_map.values()):
149156
if not os.path.exists(file_name):
150157
logger.error(
151158
f"FileNotFoundError: [Errno 2] No such file or directory: '{file_name}'"
152159
)
153-
return 1
160+
return None, 1
154161

155-
repo_manager = PythonRepoManager(Path(argv.directory))
162+
repo_manager = PythonRepoManager(Path(directory))
156163

157164
try:
158165
context = CodemodExecutionContext(
159-
Path(argv.directory),
160-
argv.dry_run,
161-
argv.verbose,
166+
Path(directory),
167+
dry_run,
168+
verbose,
162169
codemod_registry,
163170
provider_registry,
164171
repo_manager,
165-
argv.path_include,
166-
argv.path_exclude,
172+
path_include,
173+
path_exclude,
167174
tool_result_files_map,
168-
argv.max_workers,
175+
max_workers,
169176
)
170177
except MisconfiguredAIClient as e:
171178
logger.error(e)
172-
return 3 # Codemodder instructions conflicted (according to spec)
179+
return None, 3 # Codemodder instructions conflicted (according to spec)
173180

174-
repo_manager.parse_project()
181+
context.repo_manager.parse_project()
175182

176183
# TODO: this should be a method of CodemodExecutionContext
177184
codemods_to_run = codemod_registry.match_codemods(
178-
argv.codemod_include,
179-
argv.codemod_exclude,
180-
sast_only=argv.sonar_issues_json or argv.sarif,
185+
codemod_include,
186+
codemod_exclude,
187+
sast_only=sast_only,
181188
)
182189

183190
log_section("setup")
184191
log_list(logging.INFO, "running", codemods_to_run, predicate=lambda c: c.id)
185192
log_list(logging.INFO, "including paths", context.included_paths)
186-
log_list(logging.INFO, "excluding paths", argv.path_exclude)
193+
log_list(logging.INFO, "excluding paths", path_exclude)
187194

188195
log_list(
189196
logging.DEBUG, "matched files", (str(path) for path in context.files_to_analyze)
@@ -203,24 +210,71 @@ def run(original_args) -> int:
203210
elapsed = datetime.datetime.now() - start
204211
elapsed_ms = int(elapsed.total_seconds() * 1000)
205212

206-
if argv.output:
207-
codetf = CodeTF.build(
208-
context,
209-
elapsed_ms,
210-
original_args,
211-
context.compile_results(codemods_to_run),
212-
)
213-
codetf.write_report(argv.output)
213+
logger.debug("Output format %s", output_format)
214+
codetf = CodeTF.build(
215+
context,
216+
elapsed_ms,
217+
original_cli_args or [],
218+
context.compile_results(codemods_to_run),
219+
)
220+
if output:
221+
codetf.write_report(output)
214222

215223
log_report(
216224
context,
217-
argv,
225+
output,
218226
elapsed_ms,
219227
[] if not codemods_to_run else context.files_to_analyze,
220228
)
221-
return 0
229+
return codetf, 0
230+
231+
232+
def _run_cli(original_args) -> int:
233+
codemod_registry = registry.load_registered_codemods()
234+
argv = parse_args(original_args, codemod_registry)
235+
if not os.path.exists(argv.directory):
236+
logger.error(
237+
"given directory '%s' doesn't exist or can’t be read",
238+
argv.directory,
239+
)
240+
return 1
241+
242+
try:
243+
# TODO: this should be dict[str, list[Path]]
244+
tool_result_files_map: DefaultDict[str, list[str]] = detect_sarif_tools(
245+
[Path(name) for name in argv.sarif or []]
246+
)
247+
except (DuplicateToolError, FileNotFoundError) as err:
248+
logger.error(err)
249+
return 1
250+
251+
tool_result_files_map["sonar"].extend(argv.sonar_issues_json or [])
252+
tool_result_files_map["sonar"].extend(argv.sonar_hotspots_json or [])
253+
tool_result_files_map["defectdojo"].extend(argv.defectdojo_findings_json or [])
254+
255+
logger.info("command: %s %s", Path(sys.argv[0]).name, " ".join(original_args))
256+
257+
_, status = run(
258+
argv.directory,
259+
argv.dry_run,
260+
argv.output,
261+
argv.output_format,
262+
argv.verbose,
263+
argv.log_format,
264+
argv.project_name,
265+
tool_result_files_map,
266+
argv.path_include,
267+
argv.path_exclude,
268+
argv.codemod_include,
269+
argv.codemod_exclude,
270+
max_workers=argv.max_workers,
271+
original_cli_args=original_args,
272+
codemod_registry=codemod_registry,
273+
sast_only=argv.sonar_issues_json or argv.sarif,
274+
)
275+
return status
222276

223277

224278
def main():
225279
sys_argv = sys.argv[1:]
226-
sys.exit(run(sys_argv))
280+
sys.exit(_run_cli(sys_argv))

src/codemodder/codemods/base_codemod.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,15 @@ def _apply(
229229
self._process_file, context=context, results=results, rules=rules
230230
)
231231

232-
with ThreadPoolExecutor() as executor:
233-
logger.debug("using executor with %s workers", context.max_workers)
234-
contexts = executor.map(process_file, files_to_analyze)
235-
executor.shutdown(wait=True)
232+
contexts = []
233+
if context.max_workers == 1:
234+
logger.debug("processing files serially")
235+
contexts.extend([process_file(file) for file in files_to_analyze])
236+
else:
237+
with ThreadPoolExecutor() as executor:
238+
logger.debug("using executor with %s workers", context.max_workers)
239+
contexts.extend(executor.map(process_file, files_to_analyze))
240+
executor.shutdown(wait=True)
236241

237242
context.process_results(self.id, contexts)
238243

src/codemodder/codetf.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import os
1010
import sys
1111
from enum import Enum
12+
from pathlib import Path
1213
from typing import TYPE_CHECKING, Optional
1314

1415
from pydantic import BaseModel, model_validator
@@ -165,7 +166,7 @@ def build(
165166
cls,
166167
context: CodemodExecutionContext,
167168
elapsed_ms,
168-
original_args,
169+
original_args: list,
169170
results: list[Result],
170171
):
171172
command_name = os.path.basename(sys.argv[0])
@@ -183,10 +184,9 @@ def build(
183184
)
184185
return cls(run=run, results=results)
185186

186-
def write_report(self, outfile):
187+
def write_report(self, outfile: Path | str):
187188
try:
188-
with open(outfile, "w", encoding="utf-8") as f:
189-
f.write(self.model_dump_json(exclude_none=True))
189+
Path(outfile).write_text(self.model_dump_json(exclude_none=True))
190190
except Exception:
191191
logger.exception("failed to write report file.")
192192
# Any issues with writing the output file should exit status 2.

0 commit comments

Comments
 (0)