diff --git a/codegen-on-oss/codegen_on_oss/analyzers/__init__.py b/codegen-on-oss/codegen_on_oss/analyzers/__init__.py index f1ef5c5b4..5dad7789e 100644 --- a/codegen-on-oss/codegen_on_oss/analyzers/__init__.py +++ b/codegen-on-oss/codegen_on_oss/analyzers/__init__.py @@ -6,88 +6,78 @@ as an API backend for frontend applications. """ -# Main API interface -from codegen_on_oss.analyzers.api import ( - CodegenAnalyzerAPI, - create_api, - api_analyze_codebase, - api_analyze_pr, - api_get_visualization, - api_get_static_errors -) - # Modern analyzer architecture -from codegen_on_oss.analyzers.analyzer import ( - AnalyzerManager, - AnalyzerPlugin, - AnalyzerRegistry, - CodeQualityPlugin, - DependencyPlugin -) - -# Issue tracking system -from codegen_on_oss.analyzers.issues import ( - Issue, - IssueCollection, - IssueSeverity, - AnalysisType, - IssueCategory, - CodeLocation -) - -# Analysis result models -from codegen_on_oss.analyzers.models.analysis_result import ( - AnalysisResult, - CodeQualityResult, - DependencyResult, - PrAnalysisResult -) - -# Core analysis modules -from codegen_on_oss.analyzers.code_quality import CodeQualityAnalyzer -from codegen_on_oss.analyzers.dependencies import DependencyAnalyzer - +from codegen_on_oss.analyzers.analyzer import (AnalyzerManager, AnalyzerPlugin, + AnalyzerRegistry, + CodeQualityPlugin, + DependencyPlugin) +# Main API interface +from codegen_on_oss.analyzers.api import (CodegenAnalyzerAPI, + api_analyze_codebase, api_analyze_pr, + api_get_static_errors, + api_get_visualization, create_api) # Legacy analyzer interfaces (for backward compatibility) from codegen_on_oss.analyzers.base_analyzer import BaseCodeAnalyzer +# Core analysis modules +from codegen_on_oss.analyzers.code_quality import CodeQualityAnalyzer +from codegen_on_oss.analyzers.codebase_analysis import ( + get_class_summary, get_codebase_summary, get_dependency_graph, + get_file_complexity_metrics, get_file_summary, get_function_summary, + get_symbol_references, get_symbol_summary) from codegen_on_oss.analyzers.codebase_analyzer import CodebaseAnalyzer -from codegen_on_oss.analyzers.error_analyzer import CodebaseAnalyzer as ErrorAnalyzer +from codegen_on_oss.analyzers.dependencies import DependencyAnalyzer +from codegen_on_oss.analyzers.error_analyzer import \ + CodebaseAnalyzer as ErrorAnalyzer +# Issue tracking system +from codegen_on_oss.analyzers.issues import (AnalysisType, CodeLocation, Issue, + IssueCategory, IssueCollection, + IssueSeverity) +# Analysis result models +from codegen_on_oss.analyzers.models.analysis_result import (AnalysisResult, + CodeQualityResult, + DependencyResult, + PrAnalysisResult) __all__ = [ # Main API - 'CodegenAnalyzerAPI', - 'create_api', - 'api_analyze_codebase', - 'api_analyze_pr', - 'api_get_visualization', - 'api_get_static_errors', - + "CodegenAnalyzerAPI", + "create_api", + "api_analyze_codebase", + "api_analyze_pr", + "api_get_visualization", + "api_get_static_errors", # Modern architecture - 'AnalyzerManager', - 'AnalyzerPlugin', - 'AnalyzerRegistry', - 'CodeQualityPlugin', - 'DependencyPlugin', - + "AnalyzerManager", + "AnalyzerPlugin", + "AnalyzerRegistry", + "CodeQualityPlugin", + "DependencyPlugin", # Issue tracking - 'Issue', - 'IssueCollection', - 'IssueSeverity', - 'AnalysisType', - 'IssueCategory', - 'CodeLocation', - + "Issue", + "IssueCollection", + "IssueSeverity", + "AnalysisType", + "IssueCategory", + "CodeLocation", # Analysis results - 'AnalysisResult', - 'CodeQualityResult', - 'DependencyResult', - 'PrAnalysisResult', - + "AnalysisResult", + "CodeQualityResult", + "DependencyResult", + "PrAnalysisResult", # Core analyzers - 'CodeQualityAnalyzer', - 'DependencyAnalyzer', - + "CodeQualityAnalyzer", + "DependencyAnalyzer", + # Codebase analysis utilities + "get_codebase_summary", + "get_file_summary", + "get_class_summary", + "get_function_summary", + "get_symbol_summary", + "get_dependency_graph", + "get_symbol_references", + "get_file_complexity_metrics", # Legacy interfaces (for backward compatibility) - 'BaseCodeAnalyzer', - 'CodebaseAnalyzer', - 'ErrorAnalyzer', -] \ No newline at end of file + "BaseCodeAnalyzer", + "CodebaseAnalyzer", + "ErrorAnalyzer", +] diff --git a/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py b/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py new file mode 100644 index 000000000..5bb8db053 --- /dev/null +++ b/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py @@ -0,0 +1,322 @@ +#!/usr/bin/env python3 +""" +Codebase Analysis Module + +This module provides basic code analysis functionality for codebases, including: +- Functions for getting codebase summaries +- Functions for getting file summaries +- Basic code analysis utilities + +This is a dedicated implementation of the SDK's codebase_analysis.py module +for the analyzers directory, ensuring consistent analysis results. +""" + +from typing import Any, Dict, List, Optional, Set, Tuple, Union + +from codegen.sdk.core.class_definition import Class +from codegen.sdk.core.codebase import Codebase +from codegen.sdk.core.external_module import ExternalModule +from codegen.sdk.core.file import SourceFile +from codegen.sdk.core.function import Function +from codegen.sdk.core.import_resolution import Import +from codegen.sdk.core.symbol import Symbol +from codegen.sdk.enums import EdgeType, SymbolType + + +def get_codebase_summary(codebase: Codebase) -> str: # type: ignore + """ + Generate a comprehensive summary of a codebase. + + Args: + codebase: The Codebase object to summarize + + Returns: + A formatted string containing a summary of the codebase's nodes and edges + """ + node_summary = f"""Contains {len(codebase.ctx.get_nodes())} nodes +- {len(list(codebase.files))} files +- {len(list(codebase.imports))} imports +- {len(list(codebase.external_modules))} external_modules +- {len(list(codebase.symbols))} symbols +\t- {len(list(codebase.classes))} classes +\t- {len(list(codebase.functions))} functions +\t- {len(list(codebase.global_vars))} global_vars +\t- {len(list(codebase.interfaces))} interfaces +""" + edge_summary = f"""Contains {len(codebase.ctx.edges)} edges +- {len([x for x in codebase.ctx.edges if x[2].type == EdgeType.SYMBOL_USAGE])} symbol -> used symbol +- {len([x for x in codebase.ctx.edges if x[2].type == EdgeType.IMPORT_SYMBOL_RESOLUTION])} import -> used symbol +- {len([x for x in codebase.ctx.edges if x[2].type == EdgeType.EXPORT])} export -> exported symbol + """ + + return f"{node_summary}\n{edge_summary}" + + +def get_file_summary(file: SourceFile) -> str: # type: ignore + """ + Generate a summary of a source file. + + Args: + file: The SourceFile object to summarize + + Returns: + A formatted string containing a summary of the file's dependencies and usage + """ + return f"""==== [ `{file.name}` (SourceFile) Dependency Summary ] ==== +- {len(file.imports)} imports +- {len(file.symbols)} symbol references +\t- {len(file.classes)} classes +\t- {len(file.functions)} functions +\t- {len(file.global_vars)} global variables +\t- {len(file.interfaces)} interfaces + +==== [ `{file.name}` Usage Summary ] ==== +- {len(file.imports)} importers +""" + + +def get_class_summary(cls: Class) -> str: # type: ignore + """ + Generate a summary of a class. + + Args: + cls: The Class object to summarize + + Returns: + A formatted string containing a summary of the class's dependencies and usage + """ + return f"""==== [ `{cls.name}` (Class) Dependency Summary ] ==== +- parent classes: {cls.parent_class_names} +- {len(cls.methods)} methods +- {len(cls.attributes)} attributes +- {len(cls.decorators)} decorators +- {len(cls.dependencies)} dependencies + +{get_symbol_summary(cls)} + """ + + +def get_function_summary(func: Function) -> str: # type: ignore + """ + Generate a summary of a function. + + Args: + func: The Function object to summarize + + Returns: + A formatted string containing a summary of the function's dependencies and usage + """ + return f"""==== [ `{func.name}` (Function) Dependency Summary ] ==== +- {len(func.return_statements)} return statements +- {len(func.parameters)} parameters +- {len(func.function_calls)} function calls +- {len(func.call_sites)} call sites +- {len(func.decorators)} decorators +- {len(func.dependencies)} dependencies + +{get_symbol_summary(func)} + """ + + +def get_symbol_summary(symbol: Symbol) -> str: # type: ignore + """ + Generate a summary of a symbol. + + Args: + symbol: The Symbol object to summarize + + Returns: + A formatted string containing a summary of the symbol's usage + """ + usages = symbol.symbol_usages + imported_symbols = [x.imported_symbol for x in usages if isinstance(x, Import)] + + return f"""==== [ `{symbol.name}` ({type(symbol).__name__}) Usage Summary ] ==== +- {len(usages)} usages +\t- {len([x for x in usages if isinstance(x, Symbol) and x.symbol_type == SymbolType.Function])} functions +\t- {len([x for x in usages if isinstance(x, Symbol) and x.symbol_type == SymbolType.Class])} classes +\t- {len([x for x in usages if isinstance(x, Symbol) and x.symbol_type == SymbolType.GlobalVar])} global variables +\t- {len([x for x in usages if isinstance(x, Symbol) and x.symbol_type == SymbolType.Interface])} interfaces +\t- {len(imported_symbols)} imports +\t\t- {len([x for x in imported_symbols if isinstance(x, Symbol) and x.symbol_type == SymbolType.Function])} functions +\t\t- {len([x for x in imported_symbols if isinstance(x, Symbol) and x.symbol_type == SymbolType.Class])} classes +\t\t- {len([x for x in imported_symbols if isinstance(x, Symbol) and x.symbol_type == SymbolType.GlobalVar])} global variables +\t\t- {len([x for x in imported_symbols if isinstance(x, Symbol) and x.symbol_type == SymbolType.Interface])} interfaces +\t\t- {len([x for x in imported_symbols if isinstance(x, ExternalModule)])} external modules +\t\t- {len([x for x in imported_symbols if isinstance(x, SourceFile)])} files + """ + + +def get_dependency_graph(codebase: Codebase, file_path: Optional[str] = None) -> Dict[str, List[str]]: # type: ignore + """ + Generate a dependency graph for a codebase or specific file. + + Args: + codebase: The Codebase object to analyze + file_path: Optional path to a specific file to analyze + + Returns: + A dictionary mapping file paths to lists of dependencies + """ + dependency_graph = {} + + files_to_analyze = [f for f in codebase.files if not file_path or f.file_path == file_path] + + for file in files_to_analyze: + dependencies = [] + + # Add direct imports + for imp in file.imports: + if hasattr(imp, "imported_symbol") and hasattr(imp.imported_symbol, "file"): + if hasattr(imp.imported_symbol.file, "file_path"): + dependencies.append(imp.imported_symbol.file.file_path) + + # Add symbol dependencies + for symbol in file.symbols: + for dep in symbol.dependencies: + if hasattr(dep, "file") and hasattr(dep.file, "file_path"): + dependencies.append(dep.file.file_path) + + # Remove duplicates and self-references + unique_deps = list(set([d for d in dependencies if d != file.file_path])) + dependency_graph[file.file_path] = unique_deps + + return dependency_graph + + +def get_symbol_references(codebase: Codebase, symbol_name: str) -> List[Dict[str, Any]]: # type: ignore + """ + Find all references to a symbol in the codebase. + + Args: + codebase: The Codebase object to search + symbol_name: The name of the symbol to find references for + + Returns: + A list of dictionaries containing reference information + """ + references = [] + + # Find all symbols with the given name + target_symbols = [s for s in codebase.symbols if s.name == symbol_name] + + for symbol in target_symbols: + # Find all edges that reference this symbol + for edge in codebase.ctx.edges: + if edge[1] == symbol.id: # If the edge points to our symbol + source_node = codebase.ctx.get_node(edge[0]) + if source_node: + # Get file and line information if available + file_path = None + line_number = None + + if hasattr(source_node, "file") and hasattr(source_node.file, "file_path"): + file_path = source_node.file.file_path + + if hasattr(source_node, "line"): + line_number = source_node.line + + references.append( + { + "file_path": file_path, + "line": line_number, + "source_type": type(source_node).__name__, + "source_name": getattr(source_node, "name", str(source_node)), + "edge_type": edge[2].type.name + if hasattr(edge[2], "type") + else "Unknown", + } + ) + + return references + + +def get_file_complexity_metrics(file: SourceFile) -> Dict[str, Any]: # type: ignore + """ + Calculate complexity metrics for a source file. + + Args: + file: The SourceFile object to analyze + + Returns: + A dictionary containing complexity metrics + """ + metrics = { + "file_path": file.file_path, + "name": file.name, + "num_lines": 0, + "num_imports": len(file.imports), + "num_classes": len(file.classes), + "num_functions": len(file.functions), + "num_global_vars": len(file.global_vars), + "cyclomatic_complexity": 0, + "max_function_complexity": 0, + "max_class_complexity": 0, + } + + # Calculate lines of code if source is available + if hasattr(file, "source") and file.source: + metrics["num_lines"] = len(file.source.split("\n")) + + # Calculate function complexities + function_complexities = [] + for func in file.functions: + complexity = _calculate_function_complexity(func) + function_complexities.append(complexity) + metrics["cyclomatic_complexity"] += complexity + + if function_complexities: + metrics["max_function_complexity"] = max(function_complexities) + + # Calculate class complexities + class_complexities = [] + for cls in file.classes: + complexity = 0 + for method in cls.methods: + method_complexity = _calculate_function_complexity(method) + complexity += method_complexity + class_complexities.append(complexity) + metrics["cyclomatic_complexity"] += complexity + + if class_complexities: + metrics["max_class_complexity"] = max(class_complexities) + + return metrics + + +def _calculate_function_complexity(func: Function) -> int: # type: ignore + """ + Calculate the cyclomatic complexity of a function. + + Args: + func: The Function object to analyze + + Returns: + An integer representing the cyclomatic complexity + """ + complexity = 1 # Base complexity + + if not hasattr(func, "source") or not func.source: + return complexity + + # Simple heuristic: count control flow statements + source = func.source.lower() + + # Count if statements + complexity += source.count(" if ") + source.count("\nif ") + + # Count else if / elif statements + complexity += source.count("elif ") + source.count("else if ") + + # Count loops + complexity += source.count(" for ") + source.count("\nfor ") + complexity += source.count(" while ") + source.count("\nwhile ") + + # Count exception handlers + complexity += source.count("except ") + source.count("catch ") + + # Count logical operators (each one creates a new path) + complexity += source.count(" and ") + source.count(" && ") + complexity += source.count(" or ") + source.count(" || ") + + return complexity diff --git a/codegen-on-oss/tests/test_codebase_analysis.py b/codegen-on-oss/tests/test_codebase_analysis.py new file mode 100644 index 000000000..8046e9a0d --- /dev/null +++ b/codegen-on-oss/tests/test_codebase_analysis.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +""" +Tests for the codebase_analysis module. + +This module tests the functionality of the codebase_analysis.py module +in the analyzers directory, ensuring it provides the expected functionality +for codebase and file summaries. +""" + +import os +import sys +import unittest +from unittest.mock import MagicMock, patch + +# Add the parent directory to the path so we can import the module +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +from codegen_on_oss.analyzers.codebase_analysis import ( + get_class_summary, get_codebase_summary, get_dependency_graph, + get_file_complexity_metrics, get_file_summary, get_function_summary, + get_symbol_references, get_symbol_summary) + + +class TestCodebaseAnalysis(unittest.TestCase): + """Test cases for the codebase_analysis module.""" + + def setUp(self): + """Set up test fixtures.""" + # Create mock objects for testing + self.mock_codebase = MagicMock() + self.mock_file = MagicMock() + self.mock_class = MagicMock() + self.mock_function = MagicMock() + self.mock_symbol = MagicMock() + + # Set up mock codebase + self.mock_codebase.ctx.get_nodes.return_value = [1, 2, 3] + self.mock_codebase.ctx.edges = [ + (1, 2, MagicMock(type=MagicMock(name="SYMBOL_USAGE"))), + (2, 3, MagicMock(type=MagicMock(name="IMPORT_SYMBOL_RESOLUTION"))), + (3, 1, MagicMock(type=MagicMock(name="EXPORT"))), + ] + self.mock_codebase.files = [MagicMock(), MagicMock()] + self.mock_codebase.imports = [MagicMock()] + self.mock_codebase.external_modules = [MagicMock()] + self.mock_codebase.symbols = [MagicMock()] + self.mock_codebase.classes = [MagicMock()] + self.mock_codebase.functions = [MagicMock()] + self.mock_codebase.global_vars = [MagicMock()] + self.mock_codebase.interfaces = [MagicMock()] + + # Set up mock file + self.mock_file.name = "test_file.py" + self.mock_file.file_path = "/path/to/test_file.py" + self.mock_file.imports = [MagicMock()] + self.mock_file.symbols = [MagicMock()] + self.mock_file.classes = [MagicMock()] + self.mock_file.functions = [MagicMock()] + self.mock_file.global_vars = [MagicMock()] + self.mock_file.interfaces = [MagicMock()] + self.mock_file.source = "def test_function():\n if True:\n return 1\n else:\n return 0" + + # Set up mock class + self.mock_class.name = "TestClass" + self.mock_class.parent_class_names = ["BaseClass"] + self.mock_class.methods = [MagicMock()] + self.mock_class.attributes = [MagicMock()] + self.mock_class.decorators = [MagicMock()] + self.mock_class.dependencies = [MagicMock()] + self.mock_class.symbol_usages = [MagicMock()] + + # Set up mock function + self.mock_function.name = "test_function" + self.mock_function.return_statements = [MagicMock()] + self.mock_function.parameters = [MagicMock()] + self.mock_function.function_calls = [MagicMock()] + self.mock_function.call_sites = [MagicMock()] + self.mock_function.decorators = [MagicMock()] + self.mock_function.dependencies = [MagicMock()] + self.mock_function.symbol_usages = [MagicMock()] + self.mock_function.source = "def test_function():\n if True:\n return 1\n else:\n return 0" + + # Set up mock symbol + self.mock_symbol.name = "test_symbol" + self.mock_symbol.symbol_usages = [MagicMock()] + + def test_get_codebase_summary(self): + """Test the get_codebase_summary function.""" + summary = get_codebase_summary(self.mock_codebase) + + # Check that the summary contains expected information + self.assertIn("Contains 3 nodes", summary) + self.assertIn("2 files", summary) + self.assertIn("1 imports", summary) + self.assertIn("1 external_modules", summary) + self.assertIn("1 symbols", summary) + self.assertIn("1 classes", summary) + self.assertIn("1 functions", summary) + self.assertIn("1 global_vars", summary) + self.assertIn("1 interfaces", summary) + self.assertIn("Contains 3 edges", summary) + self.assertIn("1 symbol -> used symbol", summary) + self.assertIn("1 import -> used symbol", summary) + self.assertIn("1 export -> exported symbol", summary) + + def test_get_file_summary(self): + """Test the get_file_summary function.""" + summary = get_file_summary(self.mock_file) + + # Check that the summary contains expected information + self.assertIn("`test_file.py` (SourceFile) Dependency Summary", summary) + self.assertIn("1 imports", summary) + self.assertIn("1 symbol references", summary) + self.assertIn("1 classes", summary) + self.assertIn("1 functions", summary) + self.assertIn("1 global variables", summary) + self.assertIn("1 interfaces", summary) + self.assertIn("`test_file.py` Usage Summary", summary) + self.assertIn("1 importers", summary) + + def test_get_class_summary(self): + """Test the get_class_summary function.""" + with patch( + "codegen_on_oss.analyzers.codebase_analysis.get_symbol_summary", + return_value="SYMBOL SUMMARY", + ): + summary = get_class_summary(self.mock_class) + + # Check that the summary contains expected information + self.assertIn("`TestClass` (Class) Dependency Summary", summary) + self.assertIn("parent classes: ['BaseClass']", summary) + self.assertIn("1 methods", summary) + self.assertIn("1 attributes", summary) + self.assertIn("1 decorators", summary) + self.assertIn("1 dependencies", summary) + self.assertIn("SYMBOL SUMMARY", summary) + + def test_get_function_summary(self): + """Test the get_function_summary function.""" + with patch( + "codegen_on_oss.analyzers.codebase_analysis.get_symbol_summary", + return_value="SYMBOL SUMMARY", + ): + summary = get_function_summary(self.mock_function) + + # Check that the summary contains expected information + self.assertIn("`test_function` (Function) Dependency Summary", summary) + self.assertIn("1 return statements", summary) + self.assertIn("1 parameters", summary) + self.assertIn("1 function calls", summary) + self.assertIn("1 call sites", summary) + self.assertIn("1 decorators", summary) + self.assertIn("1 dependencies", summary) + self.assertIn("SYMBOL SUMMARY", summary) + + def test_get_file_complexity_metrics(self): + """Test the get_file_complexity_metrics function.""" + metrics = get_file_complexity_metrics(self.mock_file) + + # Check that the metrics contain expected information + self.assertEqual(metrics["file_path"], "/path/to/test_file.py") + self.assertEqual(metrics["name"], "test_file.py") + self.assertEqual(metrics["num_lines"], 5) + self.assertEqual(metrics["num_imports"], 1) + self.assertEqual(metrics["num_classes"], 1) + self.assertEqual(metrics["num_functions"], 1) + self.assertEqual(metrics["num_global_vars"], 1) + + # Test with a function that has control flow + self.mock_function.source = """def complex_function(a, b): + if a > 0: + if b > 0: + return a + b + else: + return a - b + elif a < 0 and b < 0: + return -a - b + else: + for i in range(10): + if i % 2 == 0: + continue + a += i + return a + """ + + # Mock the functions list to include our complex function + self.mock_file.functions = [self.mock_function] + + metrics = get_file_complexity_metrics(self.mock_file) + self.assertGreater(metrics["cyclomatic_complexity"], 1) + + +if __name__ == "__main__": + unittest.main()