@@ -313,9 +313,18 @@ benchmark: benchmark-format benchmark-function benchmark-token benchmark-project
313313 @echo " $( GREEN) All benchmarks completed!$( NC) "
314314 @echo " Results in $( BENCH_OUTPUT) /"
315315 @ls -lhS $(BENCH_OUTPUT ) /* .json 2> /dev/null
316+ @echo " "
317+ @echo " $( BLUE) Generating Markdown report...$( NC) "
318+ @BENCH_OUTPUT=$(BENCH_OUTPUT ) $(PYTHON ) examples/benchmark_report.py > /dev/null
319+ @echo " Report: $( BENCH_OUTPUT) /BENCHMARK_REPORT.md"
316320
317321benchmark-format : # # Benchmark format reproduction (yaml/toon/logicml/json)
318322 @echo " $( BLUE) ━━━ Format Benchmark ━━━$( NC) "
323+ @mkdir -p $(BENCH_OUTPUT )
324+ @echo " # Auto-generated by make benchmark" > $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
325+ @echo " set -euo pipefail" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
326+ @echo " " >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
327+ @printf ' %s\n' " $( PYTHON) examples/15_unified_benchmark.py --no-llm --type format --folder $( BENCH_SAMPLES) / --formats $( BENCH_FORMATS) --limit $( BENCH_LIMIT) --verbose --output $( BENCH_OUTPUT) /benchmark_format.json" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
319328 $(PYTHON ) examples/15_unified_benchmark.py \
320329 --no-llm --type format \
321330 --folder $(BENCH_SAMPLES ) / \
@@ -325,14 +334,20 @@ benchmark-format: ## Benchmark format reproduction (yaml/toon/logicml/json)
325334
326335benchmark-function : # # Benchmark function-level reproduction
327336 @echo " $( BLUE) ━━━ Function Benchmark ━━━$( NC) "
337+ @printf ' %s\n' " $( PYTHON) examples/15_unified_benchmark.py --no-llm --type function --file $( BENCH_SAMPLES) /sample_functions.py --limit 10 --verbose --output $( BENCH_OUTPUT) /benchmark_function.json" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
328338 $(PYTHON ) examples/15_unified_benchmark.py \
329339 --no-llm --type function \
330340 --file $(BENCH_SAMPLES ) /sample_functions.py \
331341 --limit 10 --verbose \
332342 --output $(BENCH_OUTPUT ) /benchmark_function.json
343+ @echo " $( BLUE) ━━━ Behavioral Benchmark (runtime equivalence) ━━━$( NC) "
344+ @printf ' %s\n' " $( PYTHON) examples/behavioral_benchmark.py" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
345+ @BENCH_OUTPUT=$(BENCH_OUTPUT ) BENCH_FUNCTION_JSON=$(BENCH_OUTPUT ) /benchmark_function.json BENCH_FUNCTION_SOURCE=$(BENCH_SAMPLES ) /sample_functions.py \
346+ $(PYTHON ) examples/behavioral_benchmark.py > /dev/null
333347
334348benchmark-token : # # Benchmark token efficiency across formats
335349 @echo " $( BLUE) ━━━ Token Efficiency Benchmark ━━━$( NC) "
350+ @printf ' %s\n' " $( PYTHON) examples/11_token_benchmark.py --no-llm --folder $( BENCH_SAMPLES) / --formats $( BENCH_FORMATS) --limit $( BENCH_LIMIT) --verbose --output $( BENCH_OUTPUT) /benchmark_token.json" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
336351 $(PYTHON ) examples/11_token_benchmark.py \
337352 --no-llm \
338353 --folder $(BENCH_SAMPLES ) / \
@@ -342,6 +357,7 @@ benchmark-token: ## Benchmark token efficiency across formats
342357
343358benchmark-project : # # Benchmark project-level reproduction
344359 @echo " $( BLUE) ━━━ Project Benchmark ━━━$( NC) "
360+ @printf ' %s\n' " $( PYTHON) examples/15_unified_benchmark.py --no-llm --type project --folder $( BENCH_SAMPLES) / --formats $( BENCH_FORMATS) --limit $( BENCH_LIMIT) --verbose --output $( BENCH_OUTPUT) /benchmark_project.json" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
345361 $(PYTHON ) examples/15_unified_benchmark.py \
346362 --no-llm --type project \
347363 --folder $(BENCH_SAMPLES ) / \
@@ -352,18 +368,29 @@ benchmark-project: ## Benchmark project-level reproduction
352368benchmark-toon : # # Generate TOON + function-logic for self-analysis
353369 @echo " $( BLUE) ━━━ TOON Self-Analysis ━━━$( NC) "
354370 @mkdir -p $(BENCH_OUTPUT )
371+ @printf ' %s\n' " $( PYTHON) -m code2logic ./ -f toon --compact --name project -o ./" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
355372 $(PYTHON ) -m code2logic ./ -f toon --compact --name project -o ./
356- $(PYTHON ) -m code2logic ./ -f toon --compact --no-repeat-module --function-logic --with-schema --name project -o ./
373+ @printf ' %s\n' " $( PYTHON) -m code2logic ./ -f toon --compact --no-repeat-module --function-logic function.toon --with-schema --name project -o ./" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
374+ $(PYTHON ) -m code2logic ./ -f toon --compact --no-repeat-module --function-logic function.toon --with-schema --name project -o ./
375+ @cp -f project.toon $(BENCH_OUTPUT ) /project.toon 2> /dev/null || true
376+ @cp -f project.toon-schema.json $(BENCH_OUTPUT ) /project.toon-schema.json 2> /dev/null || true
377+ @cp -f function.toon $(BENCH_OUTPUT ) /function.toon 2> /dev/null || true
378+ @cp -f function-schema.json $(BENCH_OUTPUT ) /function-schema.json 2> /dev/null || true
379+ @printf ' %s\n' " $( PYTHON) -m code2logic ./ -f yaml --compact --name project -o $( BENCH_OUTPUT) /" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
357380 $(PYTHON ) -m code2logic ./ -f yaml --compact --name project -o $(BENCH_OUTPUT ) /
381+ @printf ' %s\n' " $( PYTHON) -m code2logic ./ -f json --name project -o $( BENCH_OUTPUT) /" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
358382 $(PYTHON ) -m code2logic ./ -f json --name project -o $(BENCH_OUTPUT ) /
383+ @printf ' %s\n' " $( PYTHON) -m code2logic ./ -f markdown --name project -o $( BENCH_OUTPUT) /" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
359384 $(PYTHON ) -m code2logic ./ -f markdown --name project -o $(BENCH_OUTPUT ) /
385+ @printf ' %s\n' " $( PYTHON) -m code2logic ./ -f compact --name project -o $( BENCH_OUTPUT) /" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
360386 $(PYTHON ) -m code2logic ./ -f compact --name project -o $(BENCH_OUTPUT ) /
387+ @printf ' %s\n' " $( PYTHON) -m code2logic ./ -f csv -d standard --name project -o $( BENCH_OUTPUT) /" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
361388 $(PYTHON ) -m code2logic ./ -f csv -d standard --name project -o $(BENCH_OUTPUT ) /
362389 @echo " "
363390 @echo " $( BLUE) Format size comparison (self-analysis):$( NC) "
364391 @printf " %-25s %10s %10s\n" " Format" " Size" " ~Tokens"
365392 @printf " %-25s %10s %10s\n" " -------------------------" " ----------" " ----------"
366- @for f in project.toon project.functions .toon $(BENCH_OUTPUT ) /project.yaml $(BENCH_OUTPUT ) /project.json $(BENCH_OUTPUT ) /project.md $(BENCH_OUTPUT ) /project.txt $(BENCH_OUTPUT ) /project.csv; do \
393+ @for f in $( BENCH_OUTPUT ) / project.toon $( BENCH_OUTPUT ) /function .toon $(BENCH_OUTPUT ) /project.yaml $(BENCH_OUTPUT ) /project.json $(BENCH_OUTPUT ) /project.md $(BENCH_OUTPUT ) /project.txt $(BENCH_OUTPUT ) /project.csv; do \
367394 if [ -f " $$ f" ]; then \
368395 sz=$$(wc -c < "$$f" ) ; \
369396 tok=$$((sz / 4 ) ); \
@@ -372,7 +399,7 @@ benchmark-toon: ## Generate TOON + function-logic for self-analysis
372399 done
373400 @echo " "
374401 @echo " $( GREEN) TOON files:$( NC) "
375- @ls -lh project.toon project.functions. toon project.toon-schema.json project.functions -schema.json 2> /dev/null
402+ @ls -lh $( BENCH_OUTPUT ) / project.toon $( BENCH_OUTPUT ) /function. toon $( BENCH_OUTPUT ) / project.toon-schema.json $( BENCH_OUTPUT ) /function -schema.json 2> /dev/null
376403
377404benchmark-compare : # # Show summary comparison of all benchmark results
378405 @echo " "
0 commit comments