Skip to content

Commit 08a3412

Browse files
committed
some other tests
1 parent c6c9d95 commit 08a3412

File tree

2 files changed

+719
-97
lines changed

2 files changed

+719
-97
lines changed

tests/test_critic.py

Lines changed: 163 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,13 @@
1414
TestResults,
1515
TestType,
1616
)
17-
from codeflash.result.critic import coverage_critic, performance_gain, quantity_of_tests_critic, speedup_critic
17+
from codeflash.result.critic import (
18+
coverage_critic,
19+
performance_gain,
20+
quantity_of_tests_critic,
21+
speedup_critic,
22+
throughput_gain,
23+
)
1824

1925

2026
def test_performance_gain() -> None:
@@ -429,3 +435,159 @@ def test_coverage_critic() -> None:
429435
)
430436

431437
assert coverage_critic(unittest_coverage, "unittest") is True
438+
439+
440+
def test_throughput_gain() -> None:
441+
"""Test throughput_gain calculation."""
442+
# Test basic throughput improvement
443+
assert throughput_gain(original_throughput=100, optimized_throughput=150) == 0.5 # 50% improvement
444+
445+
# Test no improvement
446+
assert throughput_gain(original_throughput=100, optimized_throughput=100) == 0.0
447+
448+
# Test regression
449+
assert throughput_gain(original_throughput=100, optimized_throughput=80) == -0.2 # 20% regression
450+
451+
# Test zero original throughput (edge case)
452+
assert throughput_gain(original_throughput=0, optimized_throughput=50) == 0.0
453+
454+
# Test large improvement
455+
assert throughput_gain(original_throughput=50, optimized_throughput=200) == 3.0 # 300% improvement
456+
457+
458+
def test_speedup_critic_with_async_throughput() -> None:
459+
"""Test speedup_critic with async throughput evaluation."""
460+
original_code_runtime = 10000 # 10 microseconds
461+
original_async_throughput = 100
462+
463+
# Test case 1: Both runtime and throughput improve significantly
464+
candidate_result = OptimizedCandidateResult(
465+
max_loop_count=5,
466+
best_test_runtime=8000, # 20% runtime improvement
467+
behavior_test_results=TestResults(),
468+
benchmarking_test_results=TestResults(),
469+
optimization_candidate_index=0,
470+
total_candidate_timing=8000,
471+
async_throughput=120, # 20% throughput improvement
472+
)
473+
474+
assert speedup_critic(
475+
candidate_result=candidate_result,
476+
original_code_runtime=original_code_runtime,
477+
best_runtime_until_now=None,
478+
original_async_throughput=original_async_throughput,
479+
best_throughput_until_now=None,
480+
disable_gh_action_noise=True
481+
)
482+
483+
# Test case 2: Runtime improves significantly, throughput doesn't meet threshold (should pass)
484+
candidate_result = OptimizedCandidateResult(
485+
max_loop_count=5,
486+
best_test_runtime=8000, # 20% runtime improvement
487+
behavior_test_results=TestResults(),
488+
benchmarking_test_results=TestResults(),
489+
optimization_candidate_index=0,
490+
total_candidate_timing=8000,
491+
async_throughput=105, # Only 5% throughput improvement (below 10% threshold)
492+
)
493+
494+
assert speedup_critic(
495+
candidate_result=candidate_result,
496+
original_code_runtime=original_code_runtime,
497+
best_runtime_until_now=None,
498+
original_async_throughput=original_async_throughput,
499+
best_throughput_until_now=None,
500+
disable_gh_action_noise=True
501+
)
502+
503+
# Test case 3: Throughput improves significantly, runtime doesn't meet threshold (should pass)
504+
candidate_result = OptimizedCandidateResult(
505+
max_loop_count=5,
506+
best_test_runtime=9800, # Only 2% runtime improvement (below 5% threshold)
507+
behavior_test_results=TestResults(),
508+
benchmarking_test_results=TestResults(),
509+
optimization_candidate_index=0,
510+
total_candidate_timing=9800,
511+
async_throughput=120, # 20% throughput improvement
512+
)
513+
514+
assert speedup_critic(
515+
candidate_result=candidate_result,
516+
original_code_runtime=original_code_runtime,
517+
best_runtime_until_now=None,
518+
original_async_throughput=original_async_throughput,
519+
best_throughput_until_now=None,
520+
disable_gh_action_noise=True
521+
)
522+
523+
# Test case 4: No throughput data - should fall back to runtime-only evaluation
524+
candidate_result = OptimizedCandidateResult(
525+
max_loop_count=5,
526+
best_test_runtime=8000, # 20% runtime improvement
527+
behavior_test_results=TestResults(),
528+
benchmarking_test_results=TestResults(),
529+
optimization_candidate_index=0,
530+
total_candidate_timing=8000,
531+
async_throughput=None, # No throughput data
532+
)
533+
534+
assert speedup_critic(
535+
candidate_result=candidate_result,
536+
original_code_runtime=original_code_runtime,
537+
best_runtime_until_now=None,
538+
original_async_throughput=None, # No original throughput data
539+
best_throughput_until_now=None,
540+
disable_gh_action_noise=True
541+
)
542+
543+
# Test case 5: Test best_throughput_until_now comparison
544+
candidate_result = OptimizedCandidateResult(
545+
max_loop_count=5,
546+
best_test_runtime=8000, # 20% runtime improvement
547+
behavior_test_results=TestResults(),
548+
benchmarking_test_results=TestResults(),
549+
optimization_candidate_index=0,
550+
total_candidate_timing=8000,
551+
async_throughput=115, # 15% throughput improvement
552+
)
553+
554+
# Should pass when no best throughput yet
555+
assert speedup_critic(
556+
candidate_result=candidate_result,
557+
original_code_runtime=original_code_runtime,
558+
best_runtime_until_now=None,
559+
original_async_throughput=original_async_throughput,
560+
best_throughput_until_now=None,
561+
disable_gh_action_noise=True
562+
)
563+
564+
# Should fail when there's a better throughput already
565+
assert not speedup_critic(
566+
candidate_result=candidate_result,
567+
original_code_runtime=original_code_runtime,
568+
best_runtime_until_now=7000, # Better runtime already exists
569+
original_async_throughput=original_async_throughput,
570+
best_throughput_until_now=120, # Better throughput already exists
571+
disable_gh_action_noise=True
572+
)
573+
574+
# Test case 6: Zero original throughput (edge case)
575+
candidate_result = OptimizedCandidateResult(
576+
max_loop_count=5,
577+
best_test_runtime=8000, # 20% runtime improvement
578+
behavior_test_results=TestResults(),
579+
benchmarking_test_results=TestResults(),
580+
optimization_candidate_index=0,
581+
total_candidate_timing=8000,
582+
async_throughput=50,
583+
)
584+
585+
# Should pass when original throughput is 0 (throughput evaluation skipped)
586+
assert speedup_critic(
587+
candidate_result=candidate_result,
588+
original_code_runtime=original_code_runtime,
589+
best_runtime_until_now=None,
590+
original_async_throughput=0, # Zero original throughput
591+
best_throughput_until_now=None,
592+
disable_gh_action_noise=True
593+
)

0 commit comments

Comments
 (0)