Skip to content

Commit 64242a2

Browse files
refactor(goal): code analysis engine
changes: - file: analyzer.py area: analyzer modified: [ProjectAnalyzer, __init__, _scan_files, analyze, _print_status] - file: cli.py area: cli modified: [main] removed: [_maybe_print_pretty_help] - file: similarity.py area: core modified: [__init__, SimilarityDetector, find_similar_functions] dependencies: flow: "cli→analyzer→similarity" - analyzer.py -> similarity.py - cli.py -> analyzer.py stats: lines: "+98/-72 (net +26)" files: 3 complexity: "+70% complexity (monitor)"
1 parent b77e36c commit 64242a2

File tree

11 files changed

+118
-79
lines changed

11 files changed

+118
-79
lines changed

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
## [1.0.45] - 2026-02-26
2+
3+
### Summary
4+
5+
refactor(goal): code analysis engine
6+
7+
### Other
8+
9+
- update code2logic/analyzer.py
10+
- update code2logic/cli.py
11+
- update code2logic/similarity.py
12+
13+
114
## [1.0.44] - 2026-02-26
215

316
### Summary

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.0.44
1+
1.0.45

code2logic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
>>> print(output)
1919
"""
2020

21-
__version__ = "1.0.44"
21+
__version__ = "1.0.45"
2222
__author__ = "Softreck"
2323
__email__ = "info@softreck.dev"
2424
__license__ = "MIT"

code2logic/analyzer.py

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
Provides the high-level API for analyzing codebases.
55
"""
66

7+
import logging
78
import sys
9+
import time
810
from collections import defaultdict
911
from datetime import datetime
1012
from pathlib import Path
@@ -16,6 +18,8 @@
1618
from .parsers import TREE_SITTER_AVAILABLE, TreeSitterParser, UniversalParser
1719
from .similarity import RAPIDFUZZ_AVAILABLE, SimilarityDetector
1820

21+
log = logging.getLogger(__name__)
22+
1923

2024
class ProjectAnalyzer:
2125
"""
@@ -103,6 +107,7 @@ def __init__(
103107
use_treesitter: bool = True,
104108
verbose: bool = False,
105109
include_private: bool = False,
110+
enable_similarity: bool = True,
106111
):
107112
"""
108113
Initialize the project analyzer.
@@ -112,10 +117,12 @@ def __init__(
112117
use_treesitter: Whether to use Tree-sitter for parsing
113118
verbose: Whether to print status messages
114119
include_private: Whether to include private functions/classes
120+
enable_similarity: Whether to enable similarity detection
115121
"""
116122
self.root_path = Path(root_path).resolve()
117123
self.verbose = verbose
118124
self.include_private = include_private
125+
self.enable_similarity = enable_similarity
119126
self.modules: List[ModuleInfo] = []
120127
self.languages: Dict[str, int] = defaultdict(int)
121128

@@ -137,10 +144,10 @@ def __init__(
137144
def _print_status(self):
138145
"""Print library availability status."""
139146
parts = []
140-
parts.append("TS" if TREE_SITTER_AVAILABLE else "TS")
141-
parts.append("NX" if NETWORKX_AVAILABLE else "NX")
142-
parts.append("RF" if RAPIDFUZZ_AVAILABLE else "RF")
143-
parts.append("NLP" if (SPACY_AVAILABLE or NLTK_AVAILABLE) else "NLP")
147+
parts.append("TS" if TREE_SITTER_AVAILABLE else "TS")
148+
parts.append("NX" if NETWORKX_AVAILABLE else "NX")
149+
parts.append("RF" if RAPIDFUZZ_AVAILABLE else "RF")
150+
parts.append("NLP" if (SPACY_AVAILABLE or NLTK_AVAILABLE) else "NLP")
144151
print(f"Libs: {' '.join(parts)}", file=sys.stderr)
145152

146153
def analyze(self) -> ProjectInfo:
@@ -150,18 +157,49 @@ def analyze(self) -> ProjectInfo:
150157
Returns:
151158
ProjectInfo with complete analysis results
152159
"""
160+
analyze_start = time.time()
161+
153162
# Scan and parse files
163+
t0 = time.time()
154164
self._scan_files()
165+
t_scan = time.time() - t0
166+
if self.verbose:
167+
log.info(
168+
"Scan complete: modules=%d languages=%s time=%.2fs",
169+
len(self.modules),
170+
dict(self.languages),
171+
t_scan,
172+
)
155173

156174
# Build dependency graph
175+
t0 = time.time()
157176
dep_graph = self.dep_analyzer.build_graph(self.modules)
158177
dep_metrics = self.dep_analyzer.analyze_metrics()
178+
t_dep = time.time() - t0
179+
if self.verbose:
180+
log.info("Dependency analysis complete: nodes=%d time=%.2fs", len(dep_graph or {}), t_dep)
159181

160182
# Detect entry points
183+
t0 = time.time()
161184
entrypoints = self._detect_entrypoints()
185+
t_ep = time.time() - t0
186+
if self.verbose:
187+
log.info("Entrypoint detection complete: entrypoints=%d time=%.2fs", len(entrypoints), t_ep)
162188

163189
# Find similar functions
164-
similar = self.sim_detector.find_similar_functions(self.modules)
190+
similar: Dict[str, List[str]] = {}
191+
if self.enable_similarity:
192+
t0 = time.time()
193+
similar = self.sim_detector.find_similar_functions(self.modules)
194+
t_sim = time.time() - t0
195+
if self.verbose:
196+
log.info("Similarity detection complete: matches=%d time=%.2fs", len(similar), t_sim)
197+
else:
198+
if self.verbose:
199+
log.info("Similarity detection skipped (--no-similarity)")
200+
201+
if self.verbose:
202+
log.info("Total analysis time: %.2fs", time.time() - analyze_start)
165203

166204
return ProjectInfo(
167205
name=self.root_path.name,
@@ -221,14 +259,14 @@ def _scan_files(self):
221259
module = self.ts_parser.parse(rel_path, content, language)
222260
except Exception as e:
223261
if self.verbose:
224-
print(f"Tree-sitter parser failed for {rel_path}: {e}", file=sys.stderr)
262+
log.debug("Tree-sitter parser failed for %s: %s", rel_path, e)
225263

226264
if module is None:
227265
try:
228266
module = self.fallback_parser.parse(rel_path, content, language)
229267
except Exception as e:
230268
if self.verbose:
231-
print(f"Fallback parser failed for {rel_path}: {e}", file=sys.stderr)
269+
log.debug("Fallback parser failed for %s: %s", rel_path, e)
232270
continue
233271

234272
if module:

code2logic/cli.py

Lines changed: 21 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import argparse
1212
import json
13+
import logging
1314
import os
1415
import signal
1516
import subprocess
@@ -508,24 +509,12 @@ def _code2logic_llm_cli(argv: list[str]) -> None:
508509
return
509510

510511

511-
def main():
512-
"""Main CLI entry point."""
513-
cli_start = time.time()
514-
515-
try:
516-
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
517-
except Exception:
518-
pass
519-
520-
if len(sys.argv) > 1 and sys.argv[1] == 'llm':
521-
_code2logic_llm_cli(sys.argv[2:])
522-
return
523-
512+
def main(argv=None):
524513
parser = argparse.ArgumentParser(
525-
prog='code2logic',
526-
description='Convert source code to logical representation for LLM analysis',
527-
formatter_class=argparse.RawDescriptionHelpFormatter,
528-
epilog='''
514+
description='Analyze source code and generate logical representations',
515+
formatter_class=argparse.RawDescriptionHelpFormatter
516+
)
517+
epilog='''
529518
Examples:
530519
code2logic /path/to/project # Standard Markdown
531520
code2logic /path/to/project -f csv # CSV (best for LLM, ~50% smaller)
@@ -551,41 +540,6 @@ def main():
551540
standard - + intent, category, domain, imports (8 columns)
552541
full - + calls, lines, complexity, hash (16 columns)
553542
'''
554-
)
555-
556-
def _maybe_print_pretty_help() -> bool:
557-
"""Print colorized help as markdown when appropriate.
558-
559-
Returns True if help was printed and the CLI should exit early.
560-
"""
561-
force_pretty = os.environ.get("CODE2LOGIC_PRETTY_HELP") == "1" or bool(os.environ.get("FORCE_COLOR"))
562-
if not force_pretty:
563-
if not hasattr(sys.stdout, "isatty") or not sys.stdout.isatty():
564-
return False
565-
try:
566-
from .terminal import render
567-
except Exception:
568-
return False
569-
570-
help_md = f"""# code2logic
571-
572-
Convert source code to logical representation for LLM analysis.
573-
574-
## Usage
575-
576-
```bash
577-
code2logic [path] [options]
578-
```
579-
580-
## Help
581-
582-
```text
583-
{parser.format_help().rstrip()}
584-
```
585-
"""
586-
render.markdown(help_md)
587-
return True
588-
589543
parser.add_argument(
590544
'path',
591545
nargs='?',
@@ -690,6 +644,11 @@ def _maybe_print_pretty_help() -> bool:
690644
action='store_true',
691645
help='Disable Tree-sitter (use fallback parser)'
692646
)
647+
parser.add_argument(
648+
'--no-similarity',
649+
action='store_true',
650+
help='Disable similarity detection (RapidFuzz) to speed up analysis on large projects'
651+
)
693652
parser.add_argument(
694653
'-v', '--verbose',
695654
action='store_true',
@@ -732,11 +691,10 @@ def _maybe_print_pretty_help() -> bool:
732691
)
733692

734693
if len(sys.argv) == 1 or any(a in ("-h", "--help") for a in sys.argv[1:]):
735-
if not _maybe_print_pretty_help():
736-
parser.print_help()
694+
parser.print_help()
737695
return
738696

739-
args = parser.parse_args()
697+
args = parser.parse_args(argv)
740698

741699
if not args.no_install and os.environ.get("CODE2LOGIC_NO_INSTALL") in ("1", "true", "True", "yes", "YES"):
742700
args.no_install = True
@@ -750,6 +708,11 @@ def _maybe_print_pretty_help() -> bool:
750708
# Initialize logger
751709
log = Logger(verbose=args.verbose, debug=args.debug)
752710

711+
logging.basicConfig(
712+
level=(logging.DEBUG if args.debug else (logging.INFO if args.verbose else logging.WARNING)),
713+
format='[%(levelname)s] %(message)s',
714+
)
715+
753716
if args.verbose and not args.quiet:
754717
log.header("CODE2LOGIC")
755718
log.detail(f"Version: {__version__}")
@@ -842,9 +805,7 @@ def _maybe_print_pretty_help() -> bool:
842805

843806
# Path is required for analysis
844807
if args.path is None:
845-
# Keep behavior consistent with --help
846-
if not _maybe_print_pretty_help():
847-
parser.print_help()
808+
parser.print_help()
848809
return
849810

850811
# Validate path
@@ -865,7 +826,8 @@ def _maybe_print_pretty_help() -> bool:
865826
analyzer = ProjectAnalyzer(
866827
args.path,
867828
use_treesitter=not args.no_treesitter,
868-
verbose=args.debug
829+
verbose=args.verbose or args.debug,
830+
enable_similarity=not args.no_similarity,
869831
)
870832
project = analyzer.analyze()
871833
analyze_time = time.time() - analyze_start

code2logic/similarity.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@
44
Detects similar functions across modules to identify
55
potential duplicates and refactoring opportunities.
66
"""
7-
7+
import logging
8+
import time
9+
from collections import defaultdict
810
from typing import Dict, List
911

1012
from .models import ModuleInfo
1113

14+
log = logging.getLogger(__name__)
15+
1216
# Optional Rapidfuzz import
1317
RAPIDFUZZ_AVAILABLE = False
1418
try:
@@ -43,6 +47,8 @@ def __init__(self, threshold: float = 80.0):
4347
threshold: Minimum similarity score (0-100) to consider as similar
4448
"""
4549
self.threshold = threshold
50+
self.max_functions = 8000
51+
self.progress_every = 250
4652

4753
def find_similar_functions(self, modules: List[ModuleInfo]) -> Dict[str, List[str]]:
4854
"""
@@ -58,6 +64,8 @@ def find_similar_functions(self, modules: List[ModuleInfo]) -> Dict[str, List[st
5864
if not RAPIDFUZZ_AVAILABLE:
5965
return {}
6066

67+
start = time.time()
68+
6169
# Collect all functions
6270
all_funcs: List[dict] = []
6371
for m in modules:
@@ -76,15 +84,35 @@ def find_similar_functions(self, modules: List[ModuleInfo]) -> Dict[str, List[st
7684
if len(all_funcs) < 2:
7785
return {}
7886

87+
if len(all_funcs) > self.max_functions:
88+
log.warning(
89+
"Skipping similarity detection: too many functions (%d > %d). Use --no-similarity to silence this.",
90+
len(all_funcs),
91+
self.max_functions,
92+
)
93+
return {}
94+
7995
# Find similar functions
8096
similar: Dict[str, List[str]] = {}
8197
names = [f['name'] for f in all_funcs]
8298

99+
name_to_fulls: Dict[str, List[str]] = defaultdict(list)
100+
for f in all_funcs:
101+
name_to_fulls[f['name']].append(f['full'])
102+
83103
for i, func in enumerate(all_funcs):
84104
# Skip common names that would produce false positives
85105
if func['name'] in ('__init__', 'constructor', 'toString', 'valueOf'):
86106
continue
87107

108+
if i > 0 and (i % self.progress_every) == 0:
109+
log.debug(
110+
"Similarity progress: %d/%d (%.2fs)",
111+
i,
112+
len(all_funcs),
113+
time.time() - start,
114+
)
115+
88116
matches = process.extract(
89117
func['name'],
90118
names[:i] + names[i+1:],
@@ -95,15 +123,13 @@ def find_similar_functions(self, modules: List[ModuleInfo]) -> Dict[str, List[st
95123
sim_list = []
96124
for match_name, score, _ in matches:
97125
if score >= self.threshold and match_name != func['name']:
98-
# Find full name
99-
for other in all_funcs:
100-
if other['name'] == match_name:
101-
sim_list.append(f"{other['full']} ({score}%)")
102-
break
126+
for full in name_to_fulls.get(match_name, [])[:3]:
127+
sim_list.append(f"{full} ({score}%)")
103128

104129
if sim_list:
105130
similar[func['full']] = sim_list
106131

132+
log.debug("Similarity finished: funcs=%d matches=%d time=%.2fs", len(all_funcs), len(similar), time.time() - start)
107133
return similar
108134

109135
def find_duplicate_signatures(self, modules: List[ModuleInfo]) -> Dict[str, List[str]]:

logic2code/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,5 @@
1414
from .generator import CodeGenerator, GeneratorConfig, GenerationResult
1515
from .renderers import PythonRenderer
1616

17-
__version__ = '1.0.44'
17+
__version__ = '1.0.45'
1818
__all__ = ['CodeGenerator', 'GeneratorConfig', 'GenerationResult', 'PythonRenderer']

logic2test/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@
1515
from .parsers import LogicParser
1616
from .templates import TestTemplate
1717

18-
__version__ = '1.0.44'
18+
__version__ = '1.0.45'
1919
__all__ = ['TestGenerator', 'GeneratorConfig', 'GenerationResult', 'LogicParser', 'TestTemplate']

0 commit comments

Comments
 (0)