Skip to content

Commit b92fe20

Browse files
refactor(goal): code analysis engine
changes: - file: analyzer.py area: analyzer added: [_get_git_nonignored_files] modified: [_scan_files, ProjectAnalyzer, __init__] - file: cli.py area: cli modified: [main] dependencies: flow: "cli→analyzer" - cli.py -> analyzer.py stats: lines: "+115/-4 (net +111)" files: 2 complexity: "Large structural change (normalized)"
1 parent 7f963d8 commit b92fe20

File tree

10 files changed

+134
-11
lines changed

10 files changed

+134
-11
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
## [1.0.47] - 2026-02-26
2+
3+
### Summary
4+
5+
refactor(goal): code analysis engine
6+
7+
### Other
8+
9+
- update code2logic/analyzer.py
10+
- update code2logic/cli.py
11+
12+
113
## [1.0.46] - 2026-02-26
214

315
### Summary

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.0.46
1+
1.0.47

code2logic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
>>> print(output)
1919
"""
2020

21-
__version__ = "1.0.46"
21+
__version__ = "1.0.47"
2222
__author__ = "Softreck"
2323
__email__ = "info@softreck.dev"
2424
__license__ = "MIT"

code2logic/analyzer.py

Lines changed: 109 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import logging
88
import os
9+
import subprocess
910
import sys
1011
import time
1112
from collections import defaultdict
@@ -109,6 +110,7 @@ def __init__(
109110
verbose: bool = False,
110111
include_private: bool = False,
111112
enable_similarity: bool = True,
113+
respect_gitignore: bool = True,
112114
):
113115
"""
114116
Initialize the project analyzer.
@@ -124,6 +126,7 @@ def __init__(
124126
self.verbose = verbose
125127
self.include_private = include_private
126128
self.enable_similarity = enable_similarity
129+
self.respect_gitignore = respect_gitignore
127130
self.modules: List[ModuleInfo] = []
128131
self.languages: Dict[str, int] = defaultdict(int)
129132

@@ -225,11 +228,16 @@ def _scan_files(self):
225228
files_matched = 0
226229
scan_progress_every = 500
227230

228-
for root, dirnames, filenames in os.walk(self.root_path):
229-
dirnames[:] = [d for d in dirnames if d not in self.IGNORE_DIRS]
230-
for filename in filenames:
231+
git_file_list: Optional[List[Path]] = None
232+
if self.respect_gitignore:
233+
git_file_list = self._get_git_nonignored_files()
234+
if git_file_list is not None and self.verbose:
235+
log.info("Using git file list (non-ignored): files=%d", len(git_file_list))
236+
237+
if git_file_list is not None:
238+
for fp in git_file_list:
231239
files_seen += 1
232-
fp = Path(root) / filename
240+
filename = fp.name
233241

234242
if filename in self.IGNORE_FILES:
235243
continue
@@ -293,6 +301,75 @@ def _scan_files(self):
293301
module.file_bytes = len(content.encode('utf-8', errors='ignore'))
294302
self.modules.append(module)
295303

304+
else:
305+
for root, dirnames, filenames in os.walk(self.root_path):
306+
dirnames[:] = [d for d in dirnames if d not in self.IGNORE_DIRS]
307+
for filename in filenames:
308+
files_seen += 1
309+
fp = Path(root) / filename
310+
311+
if filename in self.IGNORE_FILES:
312+
continue
313+
314+
ext = fp.suffix.lower()
315+
language = self.LANGUAGE_EXTENSIONS.get(ext)
316+
if language is None and ext == '':
317+
try:
318+
with fp.open('r', encoding='utf-8', errors='ignore') as f:
319+
language = self._language_from_shebang(f.readline())
320+
except Exception:
321+
language = None
322+
323+
if language is None:
324+
continue
325+
326+
files_matched += 1
327+
self.languages[language] += 1
328+
329+
if self.verbose and files_seen > 0 and (files_seen % scan_progress_every) == 0:
330+
log.info(
331+
"Scan progress: seen=%d matched=%d parsed=%d modules=%d time=%.2fs",
332+
files_seen,
333+
files_matched,
334+
files_parsed,
335+
len(self.modules),
336+
time.time() - scan_start,
337+
)
338+
339+
try:
340+
content = fp.read_text(encoding='utf-8', errors='ignore')
341+
except Exception:
342+
continue
343+
344+
try:
345+
rel_path = str(fp.relative_to(self.root_path))
346+
except Exception:
347+
rel_path = str(fp)
348+
349+
module = None
350+
try:
351+
if self.ts_parser and self.ts_parser.is_available(language):
352+
module = self.ts_parser.parse(rel_path, content, language)
353+
except Exception as e:
354+
if self.verbose:
355+
log.debug("Tree-sitter parser failed for %s: %s", rel_path, e)
356+
357+
if module is None:
358+
try:
359+
module = self.fallback_parser.parse(rel_path, content, language)
360+
except Exception as e:
361+
if self.verbose:
362+
log.debug("Fallback parser failed for %s: %s", rel_path, e)
363+
continue
364+
365+
if module:
366+
files_parsed += 1
367+
try:
368+
module.file_bytes = fp.stat().st_size
369+
except Exception:
370+
module.file_bytes = len(content.encode('utf-8', errors='ignore'))
371+
self.modules.append(module)
372+
296373
if self.verbose:
297374
log.info(
298375
"Scan finished: seen=%d matched=%d parsed=%d modules=%d time=%.2fs",
@@ -303,6 +380,34 @@ def _scan_files(self):
303380
time.time() - scan_start,
304381
)
305382

383+
def _get_git_nonignored_files(self) -> Optional[List[Path]]:
384+
"""Return list of non-ignored files according to git, or None if unavailable."""
385+
git_dir = self.root_path / '.git'
386+
if not git_dir.exists():
387+
return None
388+
389+
try:
390+
proc = subprocess.run(
391+
['git', '-C', str(self.root_path), 'ls-files', '-co', '--exclude-standard'],
392+
check=False,
393+
stdout=subprocess.PIPE,
394+
stderr=subprocess.DEVNULL,
395+
text=True,
396+
)
397+
except Exception:
398+
return None
399+
400+
if proc.returncode != 0:
401+
return None
402+
403+
files: List[Path] = []
404+
for line in (proc.stdout or '').splitlines():
405+
rel = (line or '').strip()
406+
if not rel:
407+
continue
408+
files.append(self.root_path / rel)
409+
return files
410+
306411
def _detect_entrypoints(self) -> List[str]:
307412
"""Detect project entry points."""
308413
eps = []

code2logic/cli.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,11 @@ def main(argv=None):
645645
action='store_true',
646646
help='Disable Tree-sitter (use fallback parser)'
647647
)
648+
parser.add_argument(
649+
'--no-gitignore',
650+
action='store_true',
651+
help='Do not respect .gitignore (scan all files under path)'
652+
)
648653
parser.add_argument(
649654
'--no-similarity',
650655
action='store_true',
@@ -829,6 +834,7 @@ def main(argv=None):
829834
use_treesitter=not args.no_treesitter,
830835
verbose=args.verbose or args.debug,
831836
enable_similarity=not args.no_similarity,
837+
respect_gitignore=not args.no_gitignore,
832838
)
833839
project = analyzer.analyze()
834840
analyze_time = time.time() - analyze_start

logic2code/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,5 @@
1414
from .generator import CodeGenerator, GeneratorConfig, GenerationResult
1515
from .renderers import PythonRenderer
1616

17-
__version__ = '1.0.46'
17+
__version__ = '1.0.47'
1818
__all__ = ['CodeGenerator', 'GeneratorConfig', 'GenerationResult', 'PythonRenderer']

logic2test/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@
1515
from .parsers import LogicParser
1616
from .templates import TestTemplate
1717

18-
__version__ = '1.0.46'
18+
__version__ = '1.0.47'
1919
__all__ = ['TestGenerator', 'GeneratorConfig', 'GenerationResult', 'LogicParser', 'TestTemplate']

lolm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
)
7777
from .clients import LLMRateLimitError
7878

79-
__version__ = '1.0.46'
79+
__version__ = '1.0.47'
8080
__all__ = [
8181
# Config
8282
'LLMConfig',

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "code2logic"
7-
version = "1.0.46"
7+
version = "1.0.47"
88
description = "Code2Logic - Source code to logical representation converter for LLM analysis, featuring Tree-sitter parsing, dependency graph analysis, and multi-language support."
99
readme = "README.md"
1010
license = "Apache-2.0"

tests/samples/sample_reexport/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@
1818
"ProcessingError",
1919
]
2020

21-
__version__ = "1.0.46"
21+
__version__ = "1.0.47"

0 commit comments

Comments
 (0)