Skip to content

Commit bd82d65

Browse files
feat(docs): CLI interface improvements
changes: - file: common.py area: core modified: [generate_spec] - file: cli.py area: cli modified: [main] - file: logicml.py area: core modified: [LogicMLGenerator, _generate_functions, _generate_method, _generate_module, generate, _generate_class] dependencies: flow: "cli→logicml" - cli.py -> logicml.py stats: lines: "+170/-28 (net +142)" files: 6 complexity: "+138% complexity (monitor)"
1 parent ca7af3e commit bd82d65

File tree

14 files changed

+196
-35
lines changed

14 files changed

+196
-35
lines changed

CHANGELOG.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,22 @@
1+
## [1.0.42] - 2026-02-25
2+
3+
### Summary
4+
5+
feat(docs): CLI interface improvements
6+
7+
### Docs
8+
9+
- docs: update 00-index.md
10+
- docs: update 20-llm-benchmarks-claude.md
11+
12+
### Other
13+
14+
- update code2logic/benchmarks/common.py
15+
- update code2logic/cli.py
16+
- update code2logic/logicml.py
17+
- update project.toon
18+
19+
120
## [1.0.41] - 2026-02-25
221

322
### Summary

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.0.41
1+
1.0.42

code2logic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
>>> print(output)
1919
"""
2020

21-
__version__ = "1.0.41"
21+
__version__ = "1.0.42"
2222
__author__ = "Softreck"
2323
__email__ = "info@softreck.dev"
2424
__license__ = "MIT"

code2logic/benchmarks/common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def generate_spec(project: ProjectInfo, fmt: str) -> str:
4141
no_repeat_name=True,
4242
no_repeat_details=True,
4343
include_does=True,
44+
context="minimal",
4445
)
4546
if fmt == "csv":
4647
gen = CSVGenerator()

code2logic/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,6 +1051,7 @@ def _maybe_print_pretty_help() -> bool:
10511051
no_repeat_name=args.no_repeat_module,
10521052
no_repeat_details=args.no_repeat_details,
10531053
include_does=args.does,
1054+
context=getattr(args, 'function_logic_context', 'none') or 'none',
10541055
)
10551056
else:
10561057
logic_out = logic_gen.generate(project, detail=args.detail)

code2logic/logicml.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,22 @@ class LogicMLGenerator:
8080
def __init__(self, verbose: bool = False) -> None:
8181
self.verbose = verbose
8282

83-
def generate(self, project: ProjectInfo, detail: str = 'standard') -> LogicMLSpec:
84-
"""Generate LogicML specification for a project."""
83+
def generate(self, project: ProjectInfo, detail: str = 'standard', level: str = 'typed') -> LogicMLSpec:
84+
"""Generate LogicML specification for a project.
85+
86+
Args:
87+
detail: Content detail ('minimal', 'standard', 'full')
88+
level: Signature richness level:
89+
'compact' - short params (6 max), minimal types
90+
'typed' - full params with types (10 max), return types always shown
91+
'full' - typed + calls/raises always shown
92+
"""
8593
parts: List[str] = []
8694
total_classes = 0
8795
total_functions = 0
8896

8997
for module in project.modules:
90-
module_spec = self._generate_module(module, detail)
98+
module_spec = self._generate_module(module, detail, level)
9199
if module_spec.strip():
92100
parts.append(module_spec)
93101
total_classes += len(module.classes)
@@ -104,7 +112,7 @@ def generate(self, project: ProjectInfo, detail: str = 'standard') -> LogicMLSpe
104112
function_count=total_functions,
105113
)
106114

107-
def _generate_module(self, module: ModuleInfo, detail: str) -> str:
115+
def _generate_module(self, module: ModuleInfo, detail: str, level: str = 'typed') -> str:
108116
"""Generate LogicML for a single module."""
109117
lines: List[str] = []
110118
path = Path(module.path)
@@ -158,12 +166,12 @@ def _generate_module(self, module: ModuleInfo, detail: str) -> str:
158166

159167
# Classes
160168
for cls in module.classes:
161-
class_yaml = self._generate_class(cls, detail)
169+
class_yaml = self._generate_class(cls, detail, level)
162170
lines.append(class_yaml)
163171

164172
# Top-level functions
165173
if module.functions:
166-
funcs_yaml = self._generate_functions(module.functions, detail)
174+
funcs_yaml = self._generate_functions(module.functions, detail, level)
167175
lines.append(funcs_yaml)
168176

169177
return '\n'.join(lines)
@@ -196,7 +204,7 @@ def _generate_imports(self, imports: List[str]) -> str:
196204

197205
return '\n'.join(lines) if len(lines) > 1 else ''
198206

199-
def _generate_class(self, cls: ClassInfo, detail: str) -> str:
207+
def _generate_class(self, cls: ClassInfo, detail: str, level: str = 'typed') -> str:
200208
"""Generate LogicML for a class."""
201209
lines: List[str] = [f'\n{cls.name}:']
202210

@@ -245,22 +253,27 @@ def _generate_class(self, cls: ClassInfo, detail: str) -> str:
245253
if cls.methods:
246254
lines.append(' methods:')
247255
for method in cls.methods[:20]:
248-
method_yaml = self._generate_method(method, detail, indent=4)
256+
method_yaml = self._generate_method(method, detail, level, indent=4)
249257
lines.append(method_yaml)
250258

251259
return '\n'.join(lines)
252260

253-
def _generate_method(self, method: FunctionInfo, detail: str, indent: int = 2) -> str:
254-
"""Generate LogicML for a method."""
261+
def _generate_method(self, method: FunctionInfo, detail: str, level: str = 'typed', indent: int = 2) -> str:
262+
"""Generate LogicML for a method.
263+
264+
Args:
265+
level: 'compact' (6 params), 'typed' (10 params, full types), 'full' (typed + calls/raises)
266+
"""
255267
prefix = ' ' * indent
256268
lines: List[str] = [f'{prefix}{method.name}:']
257269

258270
# Check for property decorator
259271
is_property = 'property' in method.decorators
260272

261-
# Signature - remove self/cls for compactness
262-
clean_params = remove_self_from_params(method.params[:7])
263-
params = ', '.join(clean_params[:6])
273+
# Signature - param count depends on level
274+
max_params = 6 if level == 'compact' else 10
275+
clean_params = remove_self_from_params(method.params[:max_params + 1])
276+
params = ', '.join(clean_params[:max_params])
264277
ret = method.return_type or 'None'
265278

266279
sig = f'({params}) -> {ret}'
@@ -271,19 +284,31 @@ def _generate_method(self, method: FunctionInfo, detail: str, indent: int = 2) -
271284

272285
lines.append(f'{prefix} sig: {sig}')
273286

274-
# Intent/docstring as "does" - truncated for efficiency
287+
# Intent/docstring as "does" - longer for typed/full levels
288+
does_max = 80 if level in ('typed', 'full') else 60
275289
if method.docstring:
276-
does = truncate_docstring(method.docstring, max_length=60)
290+
does = truncate_docstring(method.docstring, max_length=does_max)
277291
if does:
278292
lines.append(f'{prefix} does: "{does}"')
279293
elif method.intent:
280-
intent = method.intent[:60].replace('\n', ' ').replace('"', "'")
294+
intent = method.intent[:does_max].replace('\n', ' ').replace('"', "'")
281295
lines.append(f'{prefix} does: "{intent}"')
282296

283297
# Edge cases (from raises)
284298
if method.raises and detail in ('standard', 'full'):
285299
for exc in method.raises[:2]:
286300
lines.append(f'{prefix} edge: "error → raise {exc}"')
301+
# In 'full' level, also emit raises as list for LLM reconstruction
302+
if level == 'full':
303+
raises_str = ", ".join(method.raises[:5])
304+
lines.append(f'{prefix} raises: [{raises_str}]')
305+
306+
# Calls (only in 'full' level or detail='full')
307+
if level == 'full' and getattr(method, 'calls', None):
308+
calls = (method.calls or [])[:10]
309+
if calls:
310+
calls_str = ", ".join(calls)
311+
lines.append(f'{prefix} calls: [{calls_str}]')
287312

288313
# Side effects
289314
side_effects = self._detect_side_effects(method)
@@ -298,12 +323,12 @@ def _generate_method(self, method: FunctionInfo, detail: str, indent: int = 2) -
298323

299324
return '\n'.join(lines)
300325

301-
def _generate_functions(self, functions: List[FunctionInfo], detail: str) -> str:
326+
def _generate_functions(self, functions: List[FunctionInfo], detail: str, level: str = 'typed') -> str:
302327
"""Generate LogicML for top-level functions."""
303328
lines: List[str] = ['\nfunctions:']
304329

305330
for func in functions[:20]:
306-
func_yaml = self._generate_method(func, detail, indent=2)
331+
func_yaml = self._generate_method(func, detail, level, indent=2)
307332
lines.append(func_yaml)
308333

309334
return '\n'.join(lines)

docs/00-index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Convert source code to logical representation for LLM analysis
2727
| 17 | [LOLM](17-lolm.md) | LLM provider management |
2828
| 18 | [Reproduction Testing](18-reproduction-testing.md) | Format validation and code regeneration |
2929
| 19 | [Monorepo Workflow](19-monorepo-workflow.md) | Managing all packages from repo root |
30+
| 20 | [LLM Benchmarks + Claude](20-llm-benchmarks-claude.md) | Run benchmarks with LLM enabled and force Claude (Anthropic) |
3031

3132
## Repository Links
3233

docs/20-llm-benchmarks-claude.md

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# LLM Benchmarks + Claude (Anthropic)
2+
3+
[← Docs Index](00-index.md) | [← Benchmarking](10-benchmark.md) | [← LLM Integration](08-llm-integration.md)
4+
5+
This document focuses on running Code2Logic benchmarks **with an LLM enabled**, including how to force **Claude (Anthropic)** via provider/model selection.
6+
7+
## What benchmarks measure (important)
8+
9+
- **Format / project benchmarks** measure **reproduction quality from a spec** (structure + syntax + similarity heuristics).
10+
- **High scores are not proof of runtime equivalence.** Runtime equivalence is validated only by tests / behavioral checks.
11+
- `--no-llm` is a **pipeline/sanity mode** (template fallback), not meaningful for comparing LLM quality.
12+
13+
## Key artifacts
14+
15+
### `project.toon`
16+
17+
Project-level TOON (structure of modules/classes/functions). Good for “big picture”.
18+
19+
### `function.toon`
20+
21+
Function-logic TOON (detailed per-function index). In this repo, `function.toon` is generated by:
22+
23+
```bash
24+
code2logic ./ -f toon --compact --no-repeat-module \
25+
--function-logic function.toon --with-schema --name project -o ./
26+
```
27+
28+
Schema (optional): `function-schema.json`.
29+
30+
## Quickstart: run the repo benchmarks
31+
32+
### Offline (no API calls)
33+
34+
```bash
35+
make benchmark BENCH_USE_LLM=0
36+
```
37+
38+
### With LLM enabled
39+
40+
```bash
41+
make benchmark BENCH_USE_LLM=1
42+
```
43+
44+
Notes:
45+
46+
- `BENCH_USE_LLM=1` requires at least one configured provider (see `08-llm-integration.md`).
47+
- Output artifacts are written to `examples/output/`.
48+
49+
## Force Claude (Anthropic)
50+
51+
You can use Claude in two common ways:
52+
53+
### Option A: Claude via OpenRouter
54+
55+
Requirements:
56+
57+
- `OPENROUTER_API_KEY=...`
58+
59+
Example (format benchmark):
60+
61+
```bash
62+
python examples/15_unified_benchmark.py \
63+
--type format \
64+
--folder tests/samples/ \
65+
--formats yaml toon logicml json markdown csv gherkin function.toon \
66+
--limit 20 --verbose \
67+
--provider openrouter \
68+
--model anthropic/claude-3.5-sonnet
69+
```
70+
71+
### Option B: Claude via Anthropic API (through LiteLLM)
72+
73+
Requirements:
74+
75+
- `ANTHROPIC_API_KEY=...`
76+
77+
Example (project benchmark):
78+
79+
```bash
80+
python examples/15_unified_benchmark.py \
81+
--type project \
82+
--folder tests/samples/ \
83+
--formats yaml toon logicml json markdown csv gherkin function.toon \
84+
--limit 20 --verbose \
85+
--provider litellm \
86+
--model anthropic/claude-3.5-sonnet
87+
```
88+
89+
## Speed & cost knobs
90+
91+
### Concurrency
92+
93+
Use fewer workers if you hit rate limits:
94+
95+
```bash
96+
python examples/15_unified_benchmark.py --type format --workers 2
97+
```
98+
99+
### Output token limit
100+
101+
```bash
102+
python examples/15_unified_benchmark.py --type format --max-tokens 2500
103+
```
104+
105+
Guidance:
106+
107+
- Lower `--max-tokens` reduces cost and latency, but may reduce reproduction quality.
108+
- Increase `--max-tokens` for larger files/specs, but expect slower runs.
109+
110+
## Troubleshooting
111+
112+
- **No provider available**: configure keys/models in `.env` or via `code2logic llm ...` (see `08-llm-integration.md`).
113+
- **Rate limited**: reduce `--workers`, consider cheaper/faster model (e.g. Haiku), or switch provider.
114+
- **Weird output (explanations instead of code)**: use stricter prompts or lower temperature on provider side; benchmark runner already tries to extract fenced code blocks.

logic2code/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,5 @@
1414
from .generator import CodeGenerator, GeneratorConfig, GenerationResult
1515
from .renderers import PythonRenderer
1616

17-
__version__ = '1.0.41'
17+
__version__ = '1.0.42'
1818
__all__ = ['CodeGenerator', 'GeneratorConfig', 'GenerationResult', 'PythonRenderer']

logic2test/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@
1515
from .parsers import LogicParser
1616
from .templates import TestTemplate
1717

18-
__version__ = '1.0.41'
18+
__version__ = '1.0.42'
1919
__all__ = ['TestGenerator', 'GeneratorConfig', 'GenerationResult', 'LogicParser', 'TestTemplate']

0 commit comments

Comments
 (0)