Skip to content

Commit 9cdb49f

Browse files
authored
Merge branch 'main' into docstring
2 parents 31ebcdf + 67cc3bd commit 9cdb49f

File tree

9 files changed

+202
-228
lines changed

9 files changed

+202
-228
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,7 @@ if __name__ == "__main__":
269269
print(f"Instruction:\n{instruction}")
270270
print(f"LLM Output:\n{llm_output}")
271271
```
272+
273+
### Publication (papers and blogs related to CLDK)
274+
1. Pan, Rangeet, Myeongsoo Kim, Rahul Krishna, Raju Pavuluri, and Saurabh Sinha. "[Multi-language Unit Test Generation using LLMs.](https://arxiv.org/abs/2409.03093)" arXiv preprint arXiv:2409.03093 (2024).
275+
2. Pan, Rangeet, Rahul Krishna, Raju Pavuluri, Saurabh Sinha, and Maja Vukovic., "[Simplify your Code LLM solutions using CodeLLM Dev Kit (CLDK).](https://www.linkedin.com/pulse/simplify-your-code-llm-solutions-using-codellm-dev-kit-rangeet-pan-vnnpe/?trackingId=kZ3U6d8GSDCs8S1oApXZgg%3D%3D)", Blog.

cldk/analysis/java/codeanalyzer/codeanalyzer.py

Lines changed: 55 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ def __init__(
8484
analysis_level: str,
8585
use_graalvm_binary: bool,
8686
eager_analysis: bool,
87+
target_files: List[str] | None
8788
) -> None:
8889
self.project_dir = project_dir
8990
self.source_code = source_code
@@ -92,6 +93,7 @@ def __init__(
9293
self.use_graalvm_binary = use_graalvm_binary
9394
self.eager_analysis = eager_analysis
9495
self.analysis_level = analysis_level
96+
self.target_files = target_files
9597
self.application = self._init_codeanalyzer(
9698
analysis_level=1 if analysis_level == AnalysisLevel.symbol_table else 2)
9799
# Attributes related the Java code analysis...
@@ -183,15 +185,11 @@ def _get_codeanalyzer_exec(self) -> List[str]:
183185
resources.files("cldk.analysis.java.codeanalyzer.bin") / "codeanalyzer") as codeanalyzer_bin_path:
184186
codeanalyzer_exec = shlex.split(codeanalyzer_bin_path.__str__())
185187
else:
186-
print(f'analysis path: {self.analysis_json_path}')
187-
analysis_json_path_file = Path(self.analysis_json_path).joinpath("analysis.json")
188+
188189
if self.analysis_backend_path:
189190
analysis_backend_path = Path(self.analysis_backend_path)
190191
logger.info(f"Using codeanalyzer.jar from {analysis_backend_path}")
191192
codeanalyzer_exec = shlex.split(f"java -jar {analysis_backend_path / 'codeanalyzer.jar'}")
192-
elif analysis_json_path_file.exists():
193-
logger.info(f"Using existing analysis from {self.analysis_json_path}")
194-
codeanalyzer_exec = shlex.split(f"java -jar codeanalyzer.jar")
195193
else:
196194
# Since the path to codeanalyzer.jar was not provided, we'll download the latest version from GitHub.
197195
with resources.as_file(resources.files("cldk.analysis.java.codeanalyzer.jar")) as codeanalyzer_jar_path:
@@ -200,7 +198,16 @@ def _get_codeanalyzer_exec(self) -> List[str]:
200198
codeanalyzer_jar_file = self._download_or_update_code_analyzer(codeanalyzer_jar_path)
201199
codeanalyzer_exec = shlex.split(f"java -jar {codeanalyzer_jar_file}")
202200
return codeanalyzer_exec
203-
201+
202+
def init_japplication(self, data: str) -> JApplication:
203+
"""Return JApplication giving the stringified JSON as input.
204+
Returns
205+
-------
206+
JApplication
207+
The application view of the Java code with the analysis results.
208+
"""
209+
return JApplication(**json.loads(data))
210+
204211
def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
205212
""" Initializes the Codeanalyzer.
206213
@@ -214,11 +221,19 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
214221
CodeanalyzerExecutionException: If there is an error running Codeanalyzer.
215222
"""
216223
codeanalyzer_exec = self._get_codeanalyzer_exec()
217-
224+
codeanalyzer_args = ''
218225
if self.analysis_json_path is None:
219226
logger.info("Reading analysis from the pipe.")
220-
codeanalyzer_args = codeanalyzer_exec + shlex.split(
221-
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}")
227+
# If target file is provided, the input is merged into a single string and passed to codeanalyzer
228+
if self.target_files:
229+
target_file_options = ' -t '.join([s.strip() for s in self.target_files])
230+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
231+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -t {target_file_options}"
232+
)
233+
else:
234+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
235+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
236+
)
222237
try:
223238
logger.info(f"Running codeanalyzer: {' '.join(codeanalyzer_args)}")
224239
console_out: CompletedProcess[str] = subprocess.run(
@@ -232,15 +247,29 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
232247
raise CodeanalyzerExecutionException(str(e)) from e
233248

234249
else:
250+
# Check if the code analyzer needs to be run
251+
is_run_code_analyzer = False
235252
analysis_json_path_file = Path(self.analysis_json_path).joinpath("analysis.json")
236-
if not analysis_json_path_file.exists() or self.eager_analysis:
237-
# If the analysis file does not exist, we'll run the analysis. Alternately, if the eager_analysis
238-
# flag is set, we'll run the analysis every time the object is created. This will happen regradless
239-
# of the existence of the analysis file.
240-
# Create the executable command for codeanalyzer.
253+
# If target file is provided, the input is merged into a single string and passed to codeanalyzer
254+
if self.target_files:
255+
target_file_options = ' -t '.join([s.strip() for s in self.target_files])
241256
codeanalyzer_args = codeanalyzer_exec + shlex.split(
242-
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}")
257+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
258+
f" -o {self.analysis_json_path} -t {target_file_options}"
259+
)
260+
is_run_code_analyzer = True
261+
else:
262+
if not analysis_json_path_file.exists() or self.eager_analysis:
263+
# If the analysis file does not exist, we'll run the analysis. Alternately, if the eager_analysis
264+
# flag is set, we'll run the analysis every time the object is created. This will happen regradless
265+
# of the existence of the analysis file.
266+
# Create the executable command for codeanalyzer.
267+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
268+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}"
269+
)
270+
is_run_code_analyzer = True
243271

272+
if is_run_code_analyzer:
244273
try:
245274
logger.info(f"Running codeanalyzer subprocess with args {codeanalyzer_args}")
246275
subprocess.run(
@@ -254,7 +283,6 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
254283

255284
except Exception as e:
256285
raise CodeanalyzerExecutionException(str(e)) from e
257-
258286
with open(analysis_json_path_file) as f:
259287
data = json.load(f)
260288
return JApplication(**data)
@@ -265,7 +293,6 @@ def _codeanalyzer_single_file(self):
265293
Returns:
266294
JApplication: The application view of the Java code with the analysis results.
267295
"""
268-
# self.source_code: str = re.sub(r"[\r\n\t\f\v]+", lambda x: " " if x.group() in "\t\f\v" else " ", self.source_code)
269296
codeanalyzer_exec = self._get_codeanalyzer_exec()
270297
codeanalyzer_args = ["--source-analysis", self.source_code]
271298
codeanalyzer_cmd = codeanalyzer_exec + codeanalyzer_args
@@ -410,8 +437,9 @@ def get_all_callers(self, target_class_name: str, target_method_signature: str,
410437
caller_detail_dict = {}
411438
call_graph = None
412439
if using_symbol_table:
413-
call_graph = self.__raw_call_graph_using_symbol_table_target_method(target_class_name=target_class_name,
414-
target_method_signature=target_method_signature)
440+
call_graph = self.__call_graph_using_symbol_table(qualified_class_name=target_class_name,
441+
method_signature=target_method_signature,
442+
is_target_method=True)
415443
else:
416444
call_graph = self.call_graph
417445
if (target_method_signature, target_class_name) not in call_graph.nodes():
@@ -703,10 +731,11 @@ def __call_graph_using_symbol_table(self,
703731
cg = nx.DiGraph()
704732
sdg = None
705733
if is_target_method:
706-
sdg = None
734+
sdg = self.__raw_call_graph_using_symbol_table_target_method(target_class_name=qualified_class_name,
735+
target_method_signature=method_signature)
707736
else:
708737
sdg = self.__raw_call_graph_using_symbol_table(qualified_class_name=qualified_class_name,
709-
method_signature=method_signature)
738+
method_signature=method_signature)
710739
tsu = JavaSitter()
711740
edge_list = [
712741
(
@@ -733,8 +762,8 @@ def __call_graph_using_symbol_table(self,
733762
return cg
734763

735764
def __raw_call_graph_using_symbol_table_target_method(self,
736-
target_class_name: str,
737-
target_method_signature: str,
765+
target_class_name: str,
766+
target_method_signature: str,
738767
cg=None) -> list[JGraphEdgesST]:
739768
""" Generates call graph using symbol table information given the target method and target class
740769
Args:
@@ -752,7 +781,7 @@ def __raw_call_graph_using_symbol_table_target_method(self,
752781
for class_name in self.get_all_classes():
753782
for method in self.get_all_methods_in_class(qualified_class_name=class_name):
754783
method_details = self.get_method(qualified_class_name=class_name,
755-
method_signature=method)
784+
method_signature=method)
756785
for call_site in method_details.call_sites:
757786
source_method_details = None
758787
source_class = ''
@@ -776,9 +805,9 @@ def __raw_call_graph_using_symbol_table_target_method(self,
776805
if call_site.receiver_type != "":
777806
# call to any class
778807
if self.get_class(qualified_class_name=call_site.receiver_type):
779-
if callee_signature==target_method_signature and call_site.receiver_type == target_class_name:
808+
if callee_signature == target_method_signature and call_site.receiver_type == target_class_name:
780809
source_method_details = self.get_method(method_signature=method,
781-
qualified_class_name=class_name)
810+
qualified_class_name=class_name)
782811
source_class = class_name
783812
else:
784813
# check if any method exists with the signature in the class even if the receiver type is blank

cldk/analysis/java/java.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,18 @@
1515
class JavaAnalysis(SymbolTable, CallGraph):
1616

1717
def __init__(
18-
self,
19-
project_dir: str | Path | None,
20-
source_code: str | None,
21-
analysis_backend: str,
22-
analysis_backend_path: str | None,
23-
analysis_json_path: str | Path | None,
24-
analysis_level: str,
25-
use_graalvm_binary: bool,
26-
eager_analysis: bool,
18+
self,
19+
project_dir: str | Path | None,
20+
source_code: str | None,
21+
analysis_backend: str,
22+
analysis_backend_path: str | None,
23+
analysis_json_path: str | Path | None,
24+
analysis_level: str,
25+
target_files: List[str] | None,
26+
use_graalvm_binary: bool,
27+
eager_analysis: bool,
2728
) -> None:
29+
2830
""" Initialization method for Java Analysis backend.
2931
3032
Args:
@@ -39,16 +41,21 @@ def __init__(
3941
4042
Raises:
4143
NotImplementedError: Raised when anaysis backend is not supported.
44+
45+
Attributes:
46+
application (JApplication): The application view of the Java code.
4247
4348
"""
49+
4450
self.project_dir = project_dir
4551
self.source_code = source_code
4652
self.analysis_level = analysis_level
4753
self.analysis_json_path = analysis_json_path
4854
self.analysis_backend_path = analysis_backend_path
4955
self.eager_analysis = eager_analysis
5056
self.use_graalvm_binary = use_graalvm_binary
51-
self.analysis_backend = analysis_backend
57+
self.analysis_backend = analysis_backend
58+
self.target_files = target_files
5259
# Initialize the analysis analysis_backend
5360
if analysis_backend.lower() == "codeql":
5461
self.analysis_backend: JCodeQL = JCodeQL(self.project_dir, self.analysis_json_path)
@@ -61,6 +68,7 @@ def __init__(
6168
analysis_json_path=self.analysis_json_path,
6269
use_graalvm_binary=self.use_graalvm_binary,
6370
analysis_backend_path=self.analysis_backend_path,
71+
target_files=self.target_files
6472
)
6573
else:
6674
raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.")
@@ -189,12 +197,14 @@ def get_callers(self, target_class_name: str, target_method_declaration: str,
189197
raise NotImplementedError("Generating all callers over a single file is not implemented yet.")
190198
return self.backend.get_all_callers(target_class_name, target_method_declaration, using_symbol_table)
191199

192-
def get_callees(self, source_class_name: str, source_method_declaration: str) ->Dict:
200+
201+
def get_callees(self, source_class_name: str, source_method_declaration: str, using_symbol_table: bool = False) ->Dict:
193202
""" Returns a dictionary of callees by the given method in the given class.
194203
195204
Args:
196205
source_class_name (str): Qualified class name where the given method is.
197206
source_method_declaration (str): Given method
207+
using_symbol_table (bool): Whether to use symbol table. Defaults to false.
198208
199209
Raises:
200210
NotImplementedError: Raised when this functionality is not suported.
@@ -204,7 +214,7 @@ def get_callees(self, source_class_name: str, source_method_declaration: str) ->
204214
"""
205215
if self.source_code:
206216
raise NotImplementedError("Generating all callees over a single file is not implemented yet.")
207-
return self.backend.get_all_callees(source_class_name, source_method_declaration)
217+
return self.backend.get_all_callees(source_class_name, source_method_declaration, using_symbol_table)
208218

209219
def get_methods(self) -> Dict[str, Dict[str, JCallable]]:
210220
""" Returns all methods in the Java code.
@@ -442,6 +452,7 @@ def get_implemented_interfaces(self, qualified_class_name: str) -> List[str]:
442452

443453
def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str, method_signature: str | None = None) -> (List)[Tuple[JMethodDetail, JMethodDetail]]:
444454
"""A call graph using symbol table for a given class and a given method.
455+
445456
Args:
446457
qualified_class_name (str): The qualified name of the class.
447458
method_signature (str | None, optional): The signature of the method in the class.. Defaults to None.
@@ -473,7 +484,7 @@ def get_class_call_graph(self, qualified_class_name: str, method_signature: str
473484
"""
474485
if using_symbol_table:
475486
return self.__get_class_call_graph_using_symbol_table(qualified_class_name=qualified_class_name,
476-
method_signature=method_signature)
487+
method_signature=method_signature)
477488
if self.analysis_backend in [AnalysisEngine.CODEQL, AnalysisEngine.TREESITTER]:
478489
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
479490
return self.backend.get_class_call_graph(qualified_class_name, method_signature)

0 commit comments

Comments
 (0)