Skip to content

Commit e9587bc

Browse files
authored
Merge pull request #34 from IBM/incremental-analysis
Support for Incremental analysis
2 parents 4f513bd + 9c45cf3 commit e9587bc

File tree

4 files changed

+89
-46
lines changed

4 files changed

+89
-46
lines changed

cldk/analysis/java/codeanalyzer/codeanalyzer.py

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,15 @@ class JCodeanalyzer:
5151
"""
5252

5353
def __init__(
54-
self,
55-
project_dir: Union[str, Path],
56-
source_code: str | None,
57-
analysis_backend_path: Union[str, Path, None],
58-
analysis_json_path: Union[str, Path, None],
59-
analysis_level: str,
60-
use_graalvm_binary: bool,
61-
eager_analysis: bool,
54+
self,
55+
project_dir: Union[str, Path],
56+
source_code: str | None,
57+
analysis_backend_path: Union[str, Path, None],
58+
analysis_json_path: Union[str, Path, None],
59+
analysis_level: str,
60+
use_graalvm_binary: bool,
61+
eager_analysis: bool,
62+
target_files: List[str] | None
6263
) -> None:
6364
self.project_dir = project_dir
6465
self.source_code = source_code
@@ -67,6 +68,7 @@ def __init__(
6768
self.use_graalvm_binary = use_graalvm_binary
6869
self.eager_analysis = eager_analysis
6970
self.analysis_level = analysis_level
71+
self.target_files = target_files
7072
self.application = self._init_codeanalyzer(
7173
analysis_level=1 if analysis_level == AnalysisLevel.symbol_table else 2)
7274
# Attributes related the Java code analysis...
@@ -198,11 +200,19 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
198200
"""
199201

200202
codeanalyzer_exec = self._get_codeanalyzer_exec()
201-
203+
codeanalyzer_args = ''
202204
if self.analysis_json_path is None:
203205
logger.info("Reading analysis from the pipe.")
204-
codeanalyzer_args = codeanalyzer_exec + shlex.split(
205-
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}")
206+
# If target file is provided, the input is merged into a single string and passed to codeanalyzer
207+
if self.target_files:
208+
target_file_options = ' -t '.join([s.strip() for s in self.target_files])
209+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
210+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -t {target_file_options}"
211+
)
212+
else:
213+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
214+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
215+
)
206216
try:
207217
logger.info(f"Running codeanalyzer: {' '.join(codeanalyzer_args)}")
208218
console_out: CompletedProcess[str] = subprocess.run(
@@ -216,15 +226,29 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
216226
raise CodeanalyzerExecutionException(str(e)) from e
217227

218228
else:
229+
# Check if the code analyzer needs to be run
230+
is_run_code_analyzer = False
219231
analysis_json_path_file = Path(self.analysis_json_path).joinpath("analysis.json")
220-
if not analysis_json_path_file.exists() or self.eager_analysis:
221-
# If the analysis file does not exist, we'll run the analysis. Alternately, if the eager_analysis
222-
# flag is set, we'll run the analysis every time the object is created. This will happen regradless
223-
# of the existence of the analysis file.
224-
# Create the executable command for codeanalyzer.
232+
# If target file is provided, the input is merged into a single string and passed to codeanalyzer
233+
if self.target_files:
234+
target_file_options = ' -t '.join([s.strip() for s in self.target_files])
225235
codeanalyzer_args = codeanalyzer_exec + shlex.split(
226-
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}")
227-
236+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
237+
f" -o {self.analysis_json_path} -t {target_file_options}"
238+
)
239+
is_run_code_analyzer = True
240+
else:
241+
if not analysis_json_path_file.exists() or self.eager_analysis:
242+
# If the analysis file does not exist, we'll run the analysis. Alternately, if the eager_analysis
243+
# flag is set, we'll run the analysis every time the object is created. This will happen regradless
244+
# of the existence of the analysis file.
245+
# Create the executable command for codeanalyzer.
246+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
247+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}"
248+
)
249+
is_run_code_analyzer = True
250+
251+
if is_run_code_analyzer:
228252
try:
229253
logger.info(f"Running codeanalyzer subprocess with args {codeanalyzer_args}")
230254
subprocess.run(
@@ -238,7 +262,6 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
238262

239263
except Exception as e:
240264
raise CodeanalyzerExecutionException(str(e)) from e
241-
242265
with open(analysis_json_path_file) as f:
243266
data = json.load(f)
244267
return JApplication(**data)
@@ -252,7 +275,6 @@ def _codeanalyzer_single_file(self):
252275
JApplication
253276
The application view of the Java code with the analysis results.
254277
"""
255-
# self.source_code: str = re.sub(r"[\r\n\t\f\v]+", lambda x: " " if x.group() in "\t\f\v" else " ", self.source_code)
256278
codeanalyzer_exec = self._get_codeanalyzer_exec()
257279
codeanalyzer_args = ["--source-analysis", self.source_code]
258280
codeanalyzer_cmd = codeanalyzer_exec + codeanalyzer_args

cldk/analysis/java/java.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,16 @@
1515
class JavaAnalysis(SymbolTable, CallGraph):
1616

1717
def __init__(
18-
self,
19-
project_dir: str | Path | None,
20-
source_code: str | None,
21-
analysis_backend: str,
22-
analysis_backend_path: str | None,
23-
analysis_json_path: str | Path | None,
24-
analysis_level: str,
25-
use_graalvm_binary: bool,
26-
eager_analysis: bool,
18+
self,
19+
project_dir: str | Path | None,
20+
source_code: str | None,
21+
analysis_backend: str,
22+
analysis_backend_path: str | None,
23+
analysis_json_path: str | Path | None,
24+
analysis_level: str,
25+
target_files: List[str] | None,
26+
use_graalvm_binary: bool,
27+
eager_analysis: bool,
2728
) -> None:
2829
"""
2930
Parameters
@@ -44,7 +45,9 @@ def __init__(
4445
eager_analysis : bool, optional
4546
A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed
4647
eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.
47-
48+
target_files: str, optional
49+
The target files for which the analysis will run or get modified. Currently, this feature only supported
50+
with symbol table analysis. In the future, we will add this feature to other analysis levels.
4851
Attributes
4952
----------
5053
analysis_backend : JCodeQL | JApplication
@@ -59,7 +62,8 @@ def __init__(
5962
self.analysis_backend_path = analysis_backend_path
6063
self.eager_analysis = eager_analysis
6164
self.use_graalvm_binary = use_graalvm_binary
62-
self.analysis_backend = analysis_backend
65+
self.analysis_backend = analysis_backend
66+
self.target_files = target_files
6367
# Initialize the analysis analysis_backend
6468
if analysis_backend.lower() == "codeql":
6569
self.analysis_backend: JCodeQL = JCodeQL(self.project_dir, self.analysis_json_path)
@@ -72,6 +76,7 @@ def __init__(
7276
analysis_json_path=self.analysis_json_path,
7377
use_graalvm_binary=self.use_graalvm_binary,
7478
analysis_backend_path=self.analysis_backend_path,
79+
target_files=self.target_files
7580
)
7681
else:
7782
raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.")
@@ -439,7 +444,9 @@ def get_implemented_interfaces(self, qualified_class_name) -> List[str]:
439444
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
440445
return self.backend.get_implemented_interfaces(qualified_class_name)
441446

442-
def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str, method_signature: str | None = None) -> (List)[Tuple[JMethodDetail, JMethodDetail]]:
447+
def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str,
448+
method_signature: str | None = None) -> (List)[
449+
Tuple[JMethodDetail, JMethodDetail]]:
443450
"""
444451
A call graph using symbol table for a given class and a given method.
445452
Args:
@@ -479,7 +486,7 @@ def get_class_call_graph(self, qualified_class_name: str, method_signature: str
479486
"""
480487
if using_symbol_table:
481488
return self.__get_class_call_graph_using_symbol_table(qualified_class_name=qualified_class_name,
482-
method_signature=method_signature)
489+
method_signature=method_signature)
483490
if self.analysis_backend in [AnalysisEngine.CODEQL, AnalysisEngine.TREESITTER]:
484491
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
485492
return self.backend.get_class_call_graph(qualified_class_name, method_signature)

cldk/core.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from pathlib import Path
22

3-
43
import logging
4+
from typing import List
55

6+
from cldk.analysis import AnalysisLevel
67
from cldk.analysis.java import JavaAnalysis
78
from cldk.analysis.java.treesitter import JavaSitter
89
from cldk.utils.exceptions import CldkInitializationException
@@ -30,15 +31,16 @@ def __init__(self, language: str):
3031
self.language: str = language
3132

3233
def analysis(
33-
self,
34-
project_path: str | Path | None = None,
35-
source_code: str | None = None,
36-
eager: bool = False,
37-
analysis_backend: str | None = "codeanalyzer",
38-
analysis_level: str = "symbol_table",
39-
analysis_backend_path: str | None = None,
40-
analysis_json_path: str | Path = None,
41-
use_graalvm_binary: bool = False,
34+
self,
35+
project_path: str | Path | None = None,
36+
source_code: str | None = None,
37+
eager: bool = False,
38+
analysis_backend: str | None = "codeanalyzer",
39+
analysis_level: str = AnalysisLevel.symbol_table,
40+
target_files: List[str] | None = None,
41+
analysis_backend_path: str | None = None,
42+
analysis_json_path: str | Path = None,
43+
use_graalvm_binary: bool = False,
4244
) -> JavaAnalysis:
4345
"""
4446
Initialize the preprocessor based on the specified language and analysis_backend.
@@ -65,7 +67,11 @@ def analysis(
6567
eager : bool, optional
6668
A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed
6769
eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.
68-
70+
analysis_level: str, optional
71+
Analysis levels. Refer to AnalysisLevel.
72+
target_files: List[str] | None, optional
73+
The target files (paths) for which the analysis will run or get modified. Currently, this feature only supported
74+
with symbol table analysis. In the future, we will add this feature to other analysis levels.
6975
Returns
7076
-------
7177
JavaAnalysis
@@ -77,13 +83,19 @@ def analysis(
7783
If neither project_path nor source_code is provided.
7884
NotImplementedError
7985
If the specified language is not implemented yet.
86+
87+
Args:
88+
analysis_level:
89+
target_files:
90+
analysis_level:
8091
"""
8192

8293
if project_path is None and source_code is None:
8394
raise CldkInitializationException("Either project_path or source_code must be provided.")
8495

8596
if project_path is not None and source_code is not None:
86-
raise CldkInitializationException("Both project_path and source_code are provided. Please provide " "only one.")
97+
raise CldkInitializationException(
98+
"Both project_path and source_code are provided. Please provide " "only one.")
8799

88100
if self.language == "java":
89101
return JavaAnalysis(
@@ -94,6 +106,7 @@ def analysis(
94106
analysis_backend_path=analysis_backend_path,
95107
analysis_json_path=analysis_json_path,
96108
use_graalvm_binary=use_graalvm_binary,
109+
target_files=target_files,
97110
eager_analysis=eager,
98111
)
99112
else:
@@ -114,7 +127,7 @@ def treesitter_parser(self):
114127
else:
115128
raise NotImplementedError(f"Treesitter parser for {self.language} is not implemented yet.")
116129

117-
def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer| NotImplementedError]:
130+
def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer | NotImplementedError]:
118131
"""
119132
Parse the project using treesitter.
120133

cldk/models/java/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ class JCompilationUnit(BaseModel):
341341
comment: str
342342
imports: List[str]
343343
type_declarations: Dict[str, JType]
344+
is_modified: bool = False
344345

345346

346347
class JMethodDetail(BaseModel):

0 commit comments

Comments
 (0)