|
1 | 1 | .PHONY: help install install-dev install-full clean build test lint format typecheck publish publish-test docs docker |
| 2 | +.PHONY: benchmark benchmark-format benchmark-function benchmark-project benchmark-token benchmark-compare benchmark-toon examples |
2 | 3 |
|
3 | 4 | POETRY := $(shell command -v poetry 2>/dev/null) |
4 | 5 | ifeq ($(POETRY),) |
@@ -298,6 +299,149 @@ run-compare: ## Compare sizes of all formats |
298 | 299 | status: ## Show library status |
299 | 300 | code2logic --status |
300 | 301 |
|
| 302 | +# ============================================================================ |
| 303 | +# Benchmarks |
| 304 | +# ============================================================================ |
| 305 | + |
| 306 | +BENCH_SAMPLES := tests/samples |
| 307 | +BENCH_OUTPUT := examples/output |
| 308 | +BENCH_LIMIT := 20 |
| 309 | +BENCH_FORMATS := yaml toon logicml json |
| 310 | + |
| 311 | +benchmark: benchmark-format benchmark-function benchmark-token benchmark-project benchmark-toon benchmark-compare ## Run all benchmarks (no LLM) |
| 312 | + @echo "" |
| 313 | + @echo "$(GREEN)All benchmarks completed!$(NC)" |
| 314 | + @echo "Results in $(BENCH_OUTPUT)/" |
| 315 | + @ls -lhS $(BENCH_OUTPUT)/*.json 2>/dev/null |
| 316 | + |
| 317 | +benchmark-format: ## Benchmark format reproduction (yaml/toon/logicml/json) |
| 318 | + @echo "$(BLUE)━━━ Format Benchmark ━━━$(NC)" |
| 319 | + $(PYTHON) examples/15_unified_benchmark.py \ |
| 320 | + --no-llm --type format \ |
| 321 | + --folder $(BENCH_SAMPLES)/ \ |
| 322 | + --formats $(BENCH_FORMATS) \ |
| 323 | + --limit $(BENCH_LIMIT) --verbose \ |
| 324 | + --output $(BENCH_OUTPUT)/benchmark_format.json |
| 325 | + |
| 326 | +benchmark-function: ## Benchmark function-level reproduction |
| 327 | + @echo "$(BLUE)━━━ Function Benchmark ━━━$(NC)" |
| 328 | + $(PYTHON) examples/15_unified_benchmark.py \ |
| 329 | + --no-llm --type function \ |
| 330 | + --file $(BENCH_SAMPLES)/sample_functions.py \ |
| 331 | + --limit 10 --verbose \ |
| 332 | + --output $(BENCH_OUTPUT)/benchmark_function.json |
| 333 | + |
| 334 | +benchmark-token: ## Benchmark token efficiency across formats |
| 335 | + @echo "$(BLUE)━━━ Token Efficiency Benchmark ━━━$(NC)" |
| 336 | + $(PYTHON) examples/11_token_benchmark.py \ |
| 337 | + --no-llm \ |
| 338 | + --folder $(BENCH_SAMPLES)/ \ |
| 339 | + --formats $(BENCH_FORMATS) \ |
| 340 | + --limit $(BENCH_LIMIT) --verbose \ |
| 341 | + --output $(BENCH_OUTPUT)/benchmark_token.json |
| 342 | + |
| 343 | +benchmark-project: ## Benchmark project-level reproduction |
| 344 | + @echo "$(BLUE)━━━ Project Benchmark ━━━$(NC)" |
| 345 | + $(PYTHON) examples/15_unified_benchmark.py \ |
| 346 | + --no-llm --type project \ |
| 347 | + --folder $(BENCH_SAMPLES)/ \ |
| 348 | + --formats $(BENCH_FORMATS) \ |
| 349 | + --limit $(BENCH_LIMIT) --verbose \ |
| 350 | + --output $(BENCH_OUTPUT)/benchmark_project.json |
| 351 | + |
| 352 | +benchmark-toon: ## Generate TOON + function-logic for self-analysis |
| 353 | + @echo "$(BLUE)━━━ TOON Self-Analysis ━━━$(NC)" |
| 354 | + @mkdir -p $(BENCH_OUTPUT) |
| 355 | + $(PYTHON) -m code2logic ./ -f toon --compact --name project -o ./ |
| 356 | + $(PYTHON) -m code2logic ./ -f toon --compact --no-repeat-module --function-logic --with-schema --name project -o ./ |
| 357 | + $(PYTHON) -m code2logic ./ -f yaml --compact --name project -o $(BENCH_OUTPUT)/ |
| 358 | + $(PYTHON) -m code2logic ./ -f json --name project -o $(BENCH_OUTPUT)/ |
| 359 | + $(PYTHON) -m code2logic ./ -f markdown --name project -o $(BENCH_OUTPUT)/ |
| 360 | + $(PYTHON) -m code2logic ./ -f compact --name project -o $(BENCH_OUTPUT)/ |
| 361 | + $(PYTHON) -m code2logic ./ -f csv -d standard --name project -o $(BENCH_OUTPUT)/ |
| 362 | + @echo "" |
| 363 | + @echo "$(BLUE)Format size comparison (self-analysis):$(NC)" |
| 364 | + @printf " %-25s %10s %10s\n" "Format" "Size" "~Tokens" |
| 365 | + @printf " %-25s %10s %10s\n" "-------------------------" "----------" "----------" |
| 366 | + @for f in project.toon project.functions.toon $(BENCH_OUTPUT)/project.yaml $(BENCH_OUTPUT)/project.json $(BENCH_OUTPUT)/project.md $(BENCH_OUTPUT)/project.txt $(BENCH_OUTPUT)/project.csv; do \ |
| 367 | + if [ -f "$$f" ]; then \ |
| 368 | + sz=$$(wc -c < "$$f"); \ |
| 369 | + tok=$$((sz / 4)); \ |
| 370 | + printf " %-25s %8s B %8s\n" "$$(basename $$f)" "$$sz" "$$tok"; \ |
| 371 | + fi; \ |
| 372 | + done |
| 373 | + @echo "" |
| 374 | + @echo "$(GREEN)TOON files:$(NC)" |
| 375 | + @ls -lh project.toon project.functions.toon project.toon-schema.json project.functions-schema.json 2>/dev/null |
| 376 | + |
| 377 | +benchmark-compare: ## Show summary comparison of all benchmark results |
| 378 | + @echo "" |
| 379 | + @echo "$(BLUE)━━━ Benchmark Summary ━━━$(NC)" |
| 380 | + @$(PYTHON) examples/benchmark_summary.py $(BENCH_OUTPUT) |
| 381 | + |
| 382 | +# ============================================================================ |
| 383 | +# Examples (step by step) |
| 384 | +# ============================================================================ |
| 385 | + |
| 386 | +examples: ## Run all examples step by step (no LLM required) |
| 387 | + @mkdir -p $(BENCH_OUTPUT) |
| 388 | + @echo "$(BLUE)━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━$(NC)" |
| 389 | + @echo "$(BLUE) Running all code2logic examples (--no-llm where needed)$(NC)" |
| 390 | + @echo "$(BLUE)━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━$(NC)" |
| 391 | + @echo "" |
| 392 | + @echo "$(YELLOW)[1/16] Quick Start — basic analysis$(NC)" |
| 393 | + $(PYTHON) examples/01_quick_start.py |
| 394 | + @echo "" |
| 395 | + @echo "$(YELLOW)[2/16] Refactoring — suggest improvements$(NC)" |
| 396 | + $(PYTHON) examples/02_refactoring.py |
| 397 | + @echo "" |
| 398 | + @echo "$(YELLOW)[3/16] Reproduction — code ↔ spec round-trip$(NC)" |
| 399 | + $(PYTHON) examples/03_reproduction.py --no-llm |
| 400 | + @echo "" |
| 401 | + @echo "$(YELLOW)[4/16] Project Analysis — multi-file$(NC)" |
| 402 | + $(PYTHON) examples/04_project.py --no-llm |
| 403 | + @echo "" |
| 404 | + @echo "$(YELLOW)[5/16] LLM Integration — provider detection$(NC)" |
| 405 | + $(PYTHON) examples/05_llm_integration.py --no-llm |
| 406 | + @echo "" |
| 407 | + @echo "$(YELLOW)[6/16] Metrics — reproduction quality$(NC)" |
| 408 | + $(PYTHON) examples/06_metrics.py --no-llm |
| 409 | + @echo "" |
| 410 | + @echo "$(YELLOW)[7/16] Format Benchmark — compare formats$(NC)" |
| 411 | + $(PYTHON) examples/08_format_benchmark.py --no-llm |
| 412 | + @echo "" |
| 413 | + @echo "$(YELLOW)[8/16] Async Benchmark — async code analysis$(NC)" |
| 414 | + $(PYTHON) examples/09_async_benchmark.py --no-llm |
| 415 | + @echo "" |
| 416 | + @echo "$(YELLOW)[9/16] Function Reproduction — per-function$(NC)" |
| 417 | + $(PYTHON) examples/10_function_reproduction.py --no-llm |
| 418 | + @echo "" |
| 419 | + @echo "$(YELLOW)[10/16] Token Benchmark — token efficiency$(NC)" |
| 420 | + $(PYTHON) examples/11_token_benchmark.py --no-llm |
| 421 | + @echo "" |
| 422 | + @echo "$(YELLOW)[11/16] Comprehensive Analysis — full pipeline$(NC)" |
| 423 | + $(PYTHON) examples/12_comprehensive_analysis.py --no-llm --limit 3 |
| 424 | + @echo "" |
| 425 | + @echo "$(YELLOW)[12/16] Project Benchmark — project-level scores$(NC)" |
| 426 | + $(PYTHON) examples/13_project_benchmark.py --no-llm |
| 427 | + @echo "" |
| 428 | + @echo "$(YELLOW)[13/16] Repeatability Test — determinism check$(NC)" |
| 429 | + $(PYTHON) examples/14_repeatability_test.py --no-llm --file tests/samples/sample_functions.py --runs 2 |
| 430 | + @echo "" |
| 431 | + @echo "$(YELLOW)[14/16] Unified Benchmark — all-in-one$(NC)" |
| 432 | + $(PYTHON) examples/15_unified_benchmark.py --no-llm --verbose |
| 433 | + @echo "" |
| 434 | + @echo "$(YELLOW)[15/16] Terminal Demo — rich output$(NC)" |
| 435 | + $(PYTHON) examples/16_terminal_demo.py |
| 436 | + @echo "" |
| 437 | + @echo "$(YELLOW)[16/16] Duplicate Detection$(NC)" |
| 438 | + $(PYTHON) examples/duplicate_detection.py |
| 439 | + @echo "" |
| 440 | + @echo "$(GREEN)━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━$(NC)" |
| 441 | + @echo "$(GREEN) All 16 examples completed successfully!$(NC)" |
| 442 | + @echo "$(GREEN)━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━$(NC)" |
| 443 | + @echo " Output: $(BENCH_OUTPUT)/" |
| 444 | + |
301 | 445 | # ============================================================================ |
302 | 446 | # LLM Integration |
303 | 447 | # ============================================================================ |
|
0 commit comments