44Provides the high-level API for analyzing codebases.
55"""
66
7+ import logging
78import sys
9+ import time
810from collections import defaultdict
911from datetime import datetime
1012from pathlib import Path
1618from .parsers import TREE_SITTER_AVAILABLE , TreeSitterParser , UniversalParser
1719from .similarity import RAPIDFUZZ_AVAILABLE , SimilarityDetector
1820
21+ log = logging .getLogger (__name__ )
22+
1923
2024class ProjectAnalyzer :
2125 """
@@ -103,6 +107,7 @@ def __init__(
103107 use_treesitter : bool = True ,
104108 verbose : bool = False ,
105109 include_private : bool = False ,
110+ enable_similarity : bool = True ,
106111 ):
107112 """
108113 Initialize the project analyzer.
@@ -112,10 +117,12 @@ def __init__(
112117 use_treesitter: Whether to use Tree-sitter for parsing
113118 verbose: Whether to print status messages
114119 include_private: Whether to include private functions/classes
120+ enable_similarity: Whether to enable similarity detection
115121 """
116122 self .root_path = Path (root_path ).resolve ()
117123 self .verbose = verbose
118124 self .include_private = include_private
125+ self .enable_similarity = enable_similarity
119126 self .modules : List [ModuleInfo ] = []
120127 self .languages : Dict [str , int ] = defaultdict (int )
121128
@@ -137,10 +144,10 @@ def __init__(
137144 def _print_status (self ):
138145 """Print library availability status."""
139146 parts = []
140- parts .append ("TS✓ " if TREE_SITTER_AVAILABLE else "TS✗ " )
141- parts .append ("NX✓ " if NETWORKX_AVAILABLE else "NX✗ " )
142- parts .append ("RF✓ " if RAPIDFUZZ_AVAILABLE else "RF✗ " )
143- parts .append ("NLP✓ " if (SPACY_AVAILABLE or NLTK_AVAILABLE ) else "NLP✗ " )
147+ parts .append ("TS" if TREE_SITTER_AVAILABLE else "TS" )
148+ parts .append ("NX" if NETWORKX_AVAILABLE else "NX" )
149+ parts .append ("RF" if RAPIDFUZZ_AVAILABLE else "RF" )
150+ parts .append ("NLP" if (SPACY_AVAILABLE or NLTK_AVAILABLE ) else "NLP" )
144151 print (f"Libs: { ' ' .join (parts )} " , file = sys .stderr )
145152
146153 def analyze (self ) -> ProjectInfo :
@@ -150,18 +157,49 @@ def analyze(self) -> ProjectInfo:
150157 Returns:
151158 ProjectInfo with complete analysis results
152159 """
160+ analyze_start = time .time ()
161+
153162 # Scan and parse files
163+ t0 = time .time ()
154164 self ._scan_files ()
165+ t_scan = time .time () - t0
166+ if self .verbose :
167+ log .info (
168+ "Scan complete: modules=%d languages=%s time=%.2fs" ,
169+ len (self .modules ),
170+ dict (self .languages ),
171+ t_scan ,
172+ )
155173
156174 # Build dependency graph
175+ t0 = time .time ()
157176 dep_graph = self .dep_analyzer .build_graph (self .modules )
158177 dep_metrics = self .dep_analyzer .analyze_metrics ()
178+ t_dep = time .time () - t0
179+ if self .verbose :
180+ log .info ("Dependency analysis complete: nodes=%d time=%.2fs" , len (dep_graph or {}), t_dep )
159181
160182 # Detect entry points
183+ t0 = time .time ()
161184 entrypoints = self ._detect_entrypoints ()
185+ t_ep = time .time () - t0
186+ if self .verbose :
187+ log .info ("Entrypoint detection complete: entrypoints=%d time=%.2fs" , len (entrypoints ), t_ep )
162188
163189 # Find similar functions
164- similar = self .sim_detector .find_similar_functions (self .modules )
190+ similar : Dict [str , List [str ]] = {}
191+ if self .enable_similarity :
192+ t0 = time .time ()
193+ similar = self .sim_detector .find_similar_functions (self .modules )
194+ t_sim = time .time () - t0
195+ if self .verbose :
196+ log .info ("Similarity detection complete: matches=%d time=%.2fs" , len (similar ), t_sim )
197+ else :
198+ if self .verbose :
199+ log .info ("Similarity detection skipped (--no-similarity)" )
200+
201+ if self .verbose :
202+ log .info ("Total analysis time: %.2fs" , time .time () - analyze_start )
165203
166204 return ProjectInfo (
167205 name = self .root_path .name ,
@@ -221,14 +259,14 @@ def _scan_files(self):
221259 module = self .ts_parser .parse (rel_path , content , language )
222260 except Exception as e :
223261 if self .verbose :
224- print ( f "Tree-sitter parser failed for { rel_path } : { e } " , file = sys . stderr )
262+ log . debug ( "Tree-sitter parser failed for %s: %s " , rel_path , e )
225263
226264 if module is None :
227265 try :
228266 module = self .fallback_parser .parse (rel_path , content , language )
229267 except Exception as e :
230268 if self .verbose :
231- print ( f "Fallback parser failed for { rel_path } : { e } " , file = sys . stderr )
269+ log . debug ( "Fallback parser failed for %s: %s " , rel_path , e )
232270 continue
233271
234272 if module :
0 commit comments