feat(docs): CLI interface improvements

tom-sapletta-com · tom-sapletta-com · commit bd82d65eb7d8 · 2026-02-25T19:29:33.000+01:00
changes:
  - file: common.py
    area: core
    modified: [generate_spec]
  - file: cli.py
    area: cli
    modified: [main]
  - file: logicml.py
    area: core
    modified: [LogicMLGenerator, _generate_functions, _generate_method, _generate_module, generate, _generate_class]

dependencies:
  flow: "cli→logicml"
  - cli.py -&gt; logicml.py

stats:
  lines: "+170/-28 (net +142)"
  files: 6
  complexity: "+138% complexity (monitor)"
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,22 @@
+## [1.0.42] - 2026-02-25
+
+### Summary
+
+feat(docs): CLI interface improvements
+
+### Docs
+
+- docs: update 00-index.md
+- docs: update 20-llm-benchmarks-claude.md
+
+### Other
+
+- update code2logic/benchmarks/common.py
+- update code2logic/cli.py
+- update code2logic/logicml.py
+- update project.toon
+
+
 ## [1.0.41] - 2026-02-25
 
 ### Summary
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.0.41
+1.0.42
diff --git a/code2logic/__init__.py b/code2logic/__init__.py
@@ -18,7 +18,7 @@
     >>> print(output)
 """
 
-__version__ = "1.0.41"
+__version__ = "1.0.42"
 __author__ = "Softreck"
 __email__ = "info@softreck.dev"
 __license__ = "MIT"
diff --git a/code2logic/benchmarks/common.py b/code2logic/benchmarks/common.py
@@ -41,6 +41,7 @@ def generate_spec(project: ProjectInfo, fmt: str) -> str:
             no_repeat_name=True,
             no_repeat_details=True,
             include_does=True,
+            context="minimal",
         )
     if fmt == "csv":
         gen = CSVGenerator()
diff --git a/code2logic/cli.py b/code2logic/cli.py
@@ -1051,6 +1051,7 @@ def _maybe_print_pretty_help() -> bool:
                 no_repeat_name=args.no_repeat_module,
                 no_repeat_details=args.no_repeat_details,
                 include_does=args.does,
+                context=getattr(args, 'function_logic_context', 'none') or 'none',
             )
         else:
             logic_out = logic_gen.generate(project, detail=args.detail)
diff --git a/code2logic/logicml.py b/code2logic/logicml.py
@@ -80,14 +80,22 @@ class LogicMLGenerator:
     def __init__(self, verbose: bool = False) -> None:
         self.verbose = verbose
 
-    def generate(self, project: ProjectInfo, detail: str = 'standard') -> LogicMLSpec:
-        """Generate LogicML specification for a project."""
+    def generate(self, project: ProjectInfo, detail: str = 'standard', level: str = 'typed') -> LogicMLSpec:
+        """Generate LogicML specification for a project.
+
+        Args:
+            detail: Content detail ('minimal', 'standard', 'full')
+            level: Signature richness level:
+                'compact' - short params (6 max), minimal types
+                'typed'   - full params with types (10 max), return types always shown
+                'full'    - typed + calls/raises always shown
+        """
         parts: List[str] = []
         total_classes = 0
         total_functions = 0
 
         for module in project.modules:
-            module_spec = self._generate_module(module, detail)
+            module_spec = self._generate_module(module, detail, level)
             if module_spec.strip():
                 parts.append(module_spec)
             total_classes += len(module.classes)
@@ -104,7 +112,7 @@ def generate(self, project: ProjectInfo, detail: str = 'standard') -> LogicMLSpe
             function_count=total_functions,
         )
 
-    def _generate_module(self, module: ModuleInfo, detail: str) -> str:
+    def _generate_module(self, module: ModuleInfo, detail: str, level: str = 'typed') -> str:
         """Generate LogicML for a single module."""
         lines: List[str] = []
         path = Path(module.path)
@@ -158,12 +166,12 @@ def _generate_module(self, module: ModuleInfo, detail: str) -> str:
 
         # Classes
         for cls in module.classes:
-            class_yaml = self._generate_class(cls, detail)
+            class_yaml = self._generate_class(cls, detail, level)
             lines.append(class_yaml)
 
         # Top-level functions
         if module.functions:
-            funcs_yaml = self._generate_functions(module.functions, detail)
+            funcs_yaml = self._generate_functions(module.functions, detail, level)
             lines.append(funcs_yaml)
 
         return '\n'.join(lines)
@@ -196,7 +204,7 @@ def _generate_imports(self, imports: List[str]) -> str:
 
         return '\n'.join(lines) if len(lines) > 1 else ''
 
-    def _generate_class(self, cls: ClassInfo, detail: str) -> str:
+    def _generate_class(self, cls: ClassInfo, detail: str, level: str = 'typed') -> str:
         """Generate LogicML for a class."""
         lines: List[str] = [f'\n{cls.name}:']
 
@@ -245,22 +253,27 @@ def _generate_class(self, cls: ClassInfo, detail: str) -> str:
         if cls.methods:
             lines.append('  methods:')
             for method in cls.methods[:20]:
-                method_yaml = self._generate_method(method, detail, indent=4)
+                method_yaml = self._generate_method(method, detail, level, indent=4)
                 lines.append(method_yaml)
 
         return '\n'.join(lines)
 
-    def _generate_method(self, method: FunctionInfo, detail: str, indent: int = 2) -> str:
-        """Generate LogicML for a method."""
+    def _generate_method(self, method: FunctionInfo, detail: str, level: str = 'typed', indent: int = 2) -> str:
+        """Generate LogicML for a method.
+
+        Args:
+            level: 'compact' (6 params), 'typed' (10 params, full types), 'full' (typed + calls/raises)
+        """
         prefix = ' ' * indent
         lines: List[str] = [f'{prefix}{method.name}:']
 
         # Check for property decorator
         is_property = 'property' in method.decorators
 
-        # Signature - remove self/cls for compactness
-        clean_params = remove_self_from_params(method.params[:7])
-        params = ', '.join(clean_params[:6])
+        # Signature - param count depends on level
+        max_params = 6 if level == 'compact' else 10
+        clean_params = remove_self_from_params(method.params[:max_params + 1])
+        params = ', '.join(clean_params[:max_params])
         ret = method.return_type or 'None'
 
         sig = f'({params}) -> {ret}'
@@ -271,19 +284,31 @@ def _generate_method(self, method: FunctionInfo, detail: str, indent: int = 2) -
 
         lines.append(f'{prefix}  sig: {sig}')
 
-        # Intent/docstring as "does" - truncated for efficiency
+        # Intent/docstring as "does" - longer for typed/full levels
+        does_max = 80 if level in ('typed', 'full') else 60
         if method.docstring:
-            does = truncate_docstring(method.docstring, max_length=60)
+            does = truncate_docstring(method.docstring, max_length=does_max)
             if does:
                 lines.append(f'{prefix}  does: "{does}"')
         elif method.intent:
-            intent = method.intent[:60].replace('\n', ' ').replace('"', "'")
+            intent = method.intent[:does_max].replace('\n', ' ').replace('"', "'")
             lines.append(f'{prefix}  does: "{intent}"')
 
         # Edge cases (from raises)
         if method.raises and detail in ('standard', 'full'):
             for exc in method.raises[:2]:
                 lines.append(f'{prefix}  edge: "error → raise {exc}"')
+            # In 'full' level, also emit raises as list for LLM reconstruction
+            if level == 'full':
+                raises_str = ", ".join(method.raises[:5])
+                lines.append(f'{prefix}  raises: [{raises_str}]')
+
+        # Calls (only in 'full' level or detail='full')
+        if level == 'full' and getattr(method, 'calls', None):
+            calls = (method.calls or [])[:10]
+            if calls:
+                calls_str = ", ".join(calls)
+                lines.append(f'{prefix}  calls: [{calls_str}]')
 
         # Side effects
         side_effects = self._detect_side_effects(method)
@@ -298,12 +323,12 @@ def _generate_method(self, method: FunctionInfo, detail: str, indent: int = 2) -
 
         return '\n'.join(lines)
 
-    def _generate_functions(self, functions: List[FunctionInfo], detail: str) -> str:
+    def _generate_functions(self, functions: List[FunctionInfo], detail: str, level: str = 'typed') -> str:
         """Generate LogicML for top-level functions."""
         lines: List[str] = ['\nfunctions:']
 
         for func in functions[:20]:
-            func_yaml = self._generate_method(func, detail, indent=2)
+            func_yaml = self._generate_method(func, detail, level, indent=2)
             lines.append(func_yaml)
 
         return '\n'.join(lines)
diff --git a/docs/00-index.md b/docs/00-index.md
@@ -27,6 +27,7 @@ Convert source code to logical representation for LLM analysis
 | 17 | [LOLM](17-lolm.md) | LLM provider management |
 | 18 | [Reproduction Testing](18-reproduction-testing.md) | Format validation and code regeneration |
 | 19 | [Monorepo Workflow](19-monorepo-workflow.md) | Managing all packages from repo root |
+| 20 | [LLM Benchmarks + Claude](20-llm-benchmarks-claude.md) | Run benchmarks with LLM enabled and force Claude (Anthropic) |
 
 ## Repository Links
 
diff --git a/docs/20-llm-benchmarks-claude.md b/docs/20-llm-benchmarks-claude.md
@@ -0,0 +1,114 @@
+# LLM Benchmarks + Claude (Anthropic)
+
+[← Docs Index](00-index.md) | [← Benchmarking](10-benchmark.md) | [← LLM Integration](08-llm-integration.md)
+
+This document focuses on running Code2Logic benchmarks **with an LLM enabled**, including how to force **Claude (Anthropic)** via provider/model selection.
+
+## What benchmarks measure (important)
+
+- **Format / project benchmarks** measure **reproduction quality from a spec** (structure + syntax + similarity heuristics).
+- **High scores are not proof of runtime equivalence.** Runtime equivalence is validated only by tests / behavioral checks.
+- `--no-llm` is a **pipeline/sanity mode** (template fallback), not meaningful for comparing LLM quality.
+
+## Key artifacts
+
+### `project.toon`
+
+Project-level TOON (structure of modules/classes/functions). Good for “big picture”.
+
+### `function.toon`
+
+Function-logic TOON (detailed per-function index). In this repo, `function.toon` is generated by:
+
+```bash
+code2logic ./ -f toon --compact --no-repeat-module \
+  --function-logic function.toon --with-schema --name project -o ./
+```
+
+Schema (optional): `function-schema.json`.
+
+## Quickstart: run the repo benchmarks
+
+### Offline (no API calls)
+
+```bash
+make benchmark BENCH_USE_LLM=0
+```
+
+### With LLM enabled
+
+```bash
+make benchmark BENCH_USE_LLM=1
+```
+
+Notes:
+
+- `BENCH_USE_LLM=1` requires at least one configured provider (see `08-llm-integration.md`).
+- Output artifacts are written to `examples/output/`.
+
+## Force Claude (Anthropic)
+
+You can use Claude in two common ways:
+
+### Option A: Claude via OpenRouter
+
+Requirements:
+
+- `OPENROUTER_API_KEY=...`
+
+Example (format benchmark):
+
+```bash
+python examples/15_unified_benchmark.py \
+  --type format \
+  --folder tests/samples/ \
+  --formats yaml toon logicml json markdown csv gherkin function.toon \
+  --limit 20 --verbose \
+  --provider openrouter \
+  --model anthropic/claude-3.5-sonnet
+```
+
+### Option B: Claude via Anthropic API (through LiteLLM)
+
+Requirements:
+
+- `ANTHROPIC_API_KEY=...`
+
+Example (project benchmark):
+
+```bash
+python examples/15_unified_benchmark.py \
+  --type project \
+  --folder tests/samples/ \
+  --formats yaml toon logicml json markdown csv gherkin function.toon \
+  --limit 20 --verbose \
+  --provider litellm \
+  --model anthropic/claude-3.5-sonnet
+```
+
+## Speed & cost knobs
+
+### Concurrency
+
+Use fewer workers if you hit rate limits:
+
+```bash
+python examples/15_unified_benchmark.py --type format --workers 2
+```
+
+### Output token limit
+
+```bash
+python examples/15_unified_benchmark.py --type format --max-tokens 2500
+```
+
+Guidance:
+
+- Lower `--max-tokens` reduces cost and latency, but may reduce reproduction quality.
+- Increase `--max-tokens` for larger files/specs, but expect slower runs.
+
+## Troubleshooting
+
+- **No provider available**: configure keys/models in `.env` or via `code2logic llm ...` (see `08-llm-integration.md`).
+- **Rate limited**: reduce `--workers`, consider cheaper/faster model (e.g. Haiku), or switch provider.
+- **Weird output (explanations instead of code)**: use stricter prompts or lower temperature on provider side; benchmark runner already tries to extract fenced code blocks.
diff --git a/logic2code/__init__.py b/logic2code/__init__.py
@@ -14,5 +14,5 @@
 from .generator import CodeGenerator, GeneratorConfig, GenerationResult
 from .renderers import PythonRenderer
 
-__version__ = '1.0.41'
+__version__ = '1.0.42'
 __all__ = ['CodeGenerator', 'GeneratorConfig', 'GenerationResult', 'PythonRenderer']
diff --git a/logic2test/__init__.py b/logic2test/__init__.py
@@ -15,5 +15,5 @@
 from .parsers import LogicParser
 from .templates import TestTemplate
 
-__version__ = '1.0.41'
+__version__ = '1.0.42'
 __all__ = ['TestGenerator', 'GeneratorConfig', 'GenerationResult', 'LogicParser', 'TestTemplate']
diff --git a/lolm/__init__.py b/lolm/__init__.py
@@ -76,7 +76,7 @@
 )
 from .clients import LLMRateLimitError
 
-__version__ = '1.0.41'
+__version__ = '1.0.42'
 __all__ = [
     # Config
     'LLMConfig',
diff --git a/project.toon b/project.toon
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/tests/samples/sample_reexport/__init__.py b/tests/samples/sample_reexport/__init__.py

Original file line number	Diff line number	Diff line change
`@@ -41,6 +41,7 @@ def generate_spec(project: ProjectInfo, fmt: str) -> str:`
`41`	`41`	`no_repeat_name=True,`
`42`	`42`	`no_repeat_details=True,`
`43`	`43`	`include_does=True,`
	`44`	`+ context="minimal",`
`44`	`45`	`)`
`45`	`46`	`if fmt == "csv":`
`46`	`47`	`gen = CSVGenerator()`
Original file line number	Diff line number	Diff line change
`@@ -1051,6 +1051,7 @@ def _maybe_print_pretty_help() -> bool:`
`1051`	`1051`	`no_repeat_name=args.no_repeat_module,`
`1052`	`1052`	`no_repeat_details=args.no_repeat_details,`
`1053`	`1053`	`include_does=args.does,`
	`1054`	`+ context=getattr(args, 'function_logic_context', 'none') or 'none',`
`1054`	`1055`	`)`
`1055`	`1056`	`else:`
`1056`	`1057`	`logic_out = logic_gen.generate(project, detail=args.detail)`