Skip to content

Commit 2556e8a

Browse files
refactoring
1 parent 378f58c commit 2556e8a

File tree

17 files changed

+72
-41
lines changed

17 files changed

+72
-41
lines changed

benchmarks/llm_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,7 @@ def generate_command_errors_report(results: BenchmarkResults) -> str:
10971097
lines.append("")
10981098
lines.append(f"Generated: `{ts}`")
10991099
lines.append("")
1100-
lines.append("Źródło: `examples/benchmark_nlp2cmd.py` + `benchmark_results.json`")
1100+
lines.append("Źródło: `benchmarks/llm_benchmark.py` + `benchmark_results.json`")
11011101
lines.append("")
11021102

11031103
failures = [

docs/development/BENCHMARKING.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@ make benchmark-clean
2929

3030
```bash
3131
# Run the main benchmark script
32-
python3 examples/02_benchmarks/performance_testing/benchmark.py
32+
PYTHONPATH=src python3 benchmarks/llm_benchmark.py
3333

3434
# Run benchmark WITHOUT cache (forces fresh LLM calls for every query)
35-
python3 examples/02_benchmarks/performance_testing/benchmark.py --no-cache
35+
PYTHONPATH=src python3 benchmarks/llm_benchmark.py --no-cache
3636

3737
# Run the sequential commands example
38-
python3 examples/02_benchmarks/sequential_testing/run_sequential.py
38+
python3 examples/02_benchmarks/sequential_testing/benchmark.py
3939
```
4040

4141
### Benchmark Without Cache
@@ -47,7 +47,7 @@ For true LLM performance testing without cache influence:
4747
make benchmark
4848

4949
# Benchmark without cache (pure LLM performance)
50-
python3 examples/02_benchmarks/performance_testing/benchmark.py --no-cache
50+
PYTHONPATH=src python3 benchmarks/llm_benchmark.py --no-cache
5151

5252
# The --no-cache flag disables:
5353
# - Cache lookups (exact, fuzzy, similarity)
@@ -107,7 +107,7 @@ Here's a sample benchmark output:
107107
2. **benchmark_results.csv**: CSV file suitable for plotting in Excel or other tools
108108
3. **sequential_benchmark_results.json**: Results from the sequential commands example
109109

110-
When using the LLM benchmark (`examples/02_benchmarks/performance_testing/benchmark.py`), additional files are generated in `benchmark_output/`:
110+
When using the canonical LLM benchmark (`benchmarks/llm_benchmark.py`), additional files are generated in `benchmark_output/`:
111111

112112
1. **benchmark_results.json**: Raw per-query results + aggregated accuracy stats
113113
2. **benchmark_results.html**: Interactive charts
@@ -152,7 +152,7 @@ Based on benchmark results, you can:
152152

153153
### Adding New Commands
154154

155-
Edit `examples/02_benchmarks/performance_testing/benchmark.py` and modify the command lists:
155+
Edit `benchmarks/llm_benchmark.py` for the canonical benchmark, or `examples/02_benchmarks/performance_testing/benchmark.py` for the lightweight adapter example, and modify the command lists:
156156

157157
```python
158158
commands = {
@@ -210,7 +210,7 @@ Add benchmarking to your CI pipeline:
210210
# .github/workflows/benchmark.yml
211211
- name: Run Benchmark
212212
run: |
213-
python3 examples/02_benchmarks/performance_testing/benchmark.py
213+
python3 benchmarks/llm_benchmark.py
214214
215215
- name: Upload Results
216216
uses: actions/upload-artifact@v3

docs/reference/examples-guide.md

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -170,16 +170,21 @@ python examples/_dynamic_orchestrator.py --prompt "Create a dashboard with chart
170170

171171
## 📊 Performance Benchmarks
172172

173-
**Location:** `examples/02_benchmarks/`
173+
**Lightweight examples:** `examples/02_benchmarks/`
174+
175+
**Canonical benchmark suite:** `benchmarks/`
174176

175177
```bash
176-
# Run performance tests
177-
cd examples/02_benchmarks/performance_testing
178-
python benchmark.py
178+
# Run lightweight performance examples
179+
python3 examples/02_benchmarks/performance_testing/benchmark.py
179180

180181
# Sequential testing
181-
cd examples/02_benchmarks/sequential_testing
182-
python run_sequential.py
182+
python3 examples/02_benchmarks/sequential_testing/benchmark.py
183+
184+
# Run canonical benchmark scripts
185+
PYTHONPATH=src python3 benchmarks/llm_benchmark.py
186+
PYTHONPATH=src python3 benchmarks/learning_benchmark.py
187+
PYTHONPATH=src python3 benchmarks/thermodynamic_benchmark.py
183188
```
184189

185190
## 🔧 Domain-Specific Examples

examples/02_benchmarks/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Benchmarks - Performance Testing
22

3-
This section contains tools for benchmarking and testing NLP2CMD performance.
3+
This section contains lightweight examples for benchmarking and testing NLP2CMD performance.
4+
5+
For the canonical benchmark suite, see `./benchmarks/` at the repository root.
46

57
## Categories
68

examples/02_benchmarks/performance_testing/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Performance Testing Benchmark
22

3-
Comprehensive performance benchmarking for NLP2CMD.
3+
Lightweight performance benchmarking example for NLP2CMD.
4+
5+
For the canonical benchmark suite, see `benchmarks/` at the repository root.
46

57
## Overview
68

examples/02_benchmarks/performance_testing/benchmark.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
from datetime import datetime
1919
from typing import List, Dict, Any
2020

21-
# Add src to path
22-
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
23-
sys.path.append(str(Path(__file__).resolve().parents[2]))
21+
PROJECT_ROOT = Path(__file__).resolve().parents[3]
22+
sys.path.insert(0, str(PROJECT_ROOT / "src"))
23+
sys.path.insert(0, str(PROJECT_ROOT / "examples"))
2424

2525
from _example_helpers import print_separator
2626

examples/02_benchmarks/sequential_testing/benchmark.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@
77
batch processing of commands.
88
99
Usage:
10-
python examples/benchmark_sequential_commands.py
10+
python3 examples/02_benchmarks/sequential_testing/benchmark.py
1111
"""
1212

1313
import sys
1414
import time
1515
from pathlib import Path
1616
from typing import List, Tuple
1717

18-
# Add src to path
19-
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
20-
sys.path.append(str(Path(__file__).resolve().parents[2]))
18+
PROJECT_ROOT = Path(__file__).resolve().parents[3]
19+
sys.path.insert(0, str(PROJECT_ROOT / "src"))
20+
sys.path.insert(0, str(PROJECT_ROOT / "examples"))
2121

2222
from _example_helpers import print_separator
2323

examples/05_advanced_features/dynamic_schemas/demo_intelligent_nlp2cmd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def demo_intelligent_nlp2cmd():
239239

240240
# Initialize system
241241
nlp = IntelligentNLP2CMD(
242-
storage_dir="./migrated_schemas"
242+
storage_dir=str(PROJECT_ROOT / "generated" / "migrated_schemas")
243243
)
244244

245245
# Test queries

examples/05_advanced_features/dynamic_schemas/demo_persistent_storage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def demonstrate_persistent_storage():
1818
print("=" * 60)
1919

2020
# Initialize registry with persistent storage
21-
storage_dir = "./my_command_schemas"
21+
storage_dir = str(PROJECT_ROOT / "generated" / "my_command_schemas")
2222
registry = DynamicSchemaRegistry(
2323
use_per_command_storage=True,
2424
storage_dir=storage_dir,

examples/05_advanced_features/dynamic_schemas/demo_schema_flow.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,12 @@ def demonstrate_schema_flow():
3030

3131
# Step 1: Initialize registry
3232
print("\n1. Initializing Schema Registry...")
33+
command_schemas_dir = PROJECT_ROOT / "command_schemas"
3334
registry = DynamicSchemaRegistry(
3435
use_per_command_storage=True,
35-
storage_dir="./command_schemas"
36+
storage_dir=str(command_schemas_dir)
3637
)
37-
print(f" Storage location: ./command_schemas")
38+
print(f" Storage location: {command_schemas_dir}")
3839
print(f" Loaded schemas: {len(registry.schemas)}")
3940

4041
# Step 2: Extract schema from command
@@ -54,7 +55,7 @@ def demonstrate_schema_flow():
5455

5556
# Step 3: Show stored schema file
5657
print(f"\n3. Schema stored in file system...")
57-
schema_file = Path("./command_schemas/commands") / f"{command}.json"
58+
schema_file = command_schemas_dir / "commands" / f"{command}.json"
5859
if schema_file.exists():
5960
print(f" File: {schema_file}")
6061
print(f" Size: {schema_file.stat().st_size} bytes")

0 commit comments

Comments
 (0)