|
14 | 14 | TestResults, |
15 | 15 | TestType, |
16 | 16 | ) |
17 | | -from codeflash.result.critic import coverage_critic, performance_gain, quantity_of_tests_critic, speedup_critic |
| 17 | +from codeflash.result.critic import ( |
| 18 | + coverage_critic, |
| 19 | + performance_gain, |
| 20 | + quantity_of_tests_critic, |
| 21 | + speedup_critic, |
| 22 | + throughput_gain, |
| 23 | +) |
18 | 24 |
|
19 | 25 |
|
20 | 26 | def test_performance_gain() -> None: |
@@ -429,3 +435,159 @@ def test_coverage_critic() -> None: |
429 | 435 | ) |
430 | 436 |
|
431 | 437 | assert coverage_critic(unittest_coverage, "unittest") is True |
| 438 | + |
| 439 | + |
| 440 | +def test_throughput_gain() -> None: |
| 441 | + """Test throughput_gain calculation.""" |
| 442 | + # Test basic throughput improvement |
| 443 | + assert throughput_gain(original_throughput=100, optimized_throughput=150) == 0.5 # 50% improvement |
| 444 | + |
| 445 | + # Test no improvement |
| 446 | + assert throughput_gain(original_throughput=100, optimized_throughput=100) == 0.0 |
| 447 | + |
| 448 | + # Test regression |
| 449 | + assert throughput_gain(original_throughput=100, optimized_throughput=80) == -0.2 # 20% regression |
| 450 | + |
| 451 | + # Test zero original throughput (edge case) |
| 452 | + assert throughput_gain(original_throughput=0, optimized_throughput=50) == 0.0 |
| 453 | + |
| 454 | + # Test large improvement |
| 455 | + assert throughput_gain(original_throughput=50, optimized_throughput=200) == 3.0 # 300% improvement |
| 456 | + |
| 457 | + |
| 458 | +def test_speedup_critic_with_async_throughput() -> None: |
| 459 | + """Test speedup_critic with async throughput evaluation.""" |
| 460 | + original_code_runtime = 10000 # 10 microseconds |
| 461 | + original_async_throughput = 100 |
| 462 | + |
| 463 | + # Test case 1: Both runtime and throughput improve significantly |
| 464 | + candidate_result = OptimizedCandidateResult( |
| 465 | + max_loop_count=5, |
| 466 | + best_test_runtime=8000, # 20% runtime improvement |
| 467 | + behavior_test_results=TestResults(), |
| 468 | + benchmarking_test_results=TestResults(), |
| 469 | + optimization_candidate_index=0, |
| 470 | + total_candidate_timing=8000, |
| 471 | + async_throughput=120, # 20% throughput improvement |
| 472 | + ) |
| 473 | + |
| 474 | + assert speedup_critic( |
| 475 | + candidate_result=candidate_result, |
| 476 | + original_code_runtime=original_code_runtime, |
| 477 | + best_runtime_until_now=None, |
| 478 | + original_async_throughput=original_async_throughput, |
| 479 | + best_throughput_until_now=None, |
| 480 | + disable_gh_action_noise=True |
| 481 | + ) |
| 482 | + |
| 483 | + # Test case 2: Runtime improves significantly, throughput doesn't meet threshold (should pass) |
| 484 | + candidate_result = OptimizedCandidateResult( |
| 485 | + max_loop_count=5, |
| 486 | + best_test_runtime=8000, # 20% runtime improvement |
| 487 | + behavior_test_results=TestResults(), |
| 488 | + benchmarking_test_results=TestResults(), |
| 489 | + optimization_candidate_index=0, |
| 490 | + total_candidate_timing=8000, |
| 491 | + async_throughput=105, # Only 5% throughput improvement (below 10% threshold) |
| 492 | + ) |
| 493 | + |
| 494 | + assert speedup_critic( |
| 495 | + candidate_result=candidate_result, |
| 496 | + original_code_runtime=original_code_runtime, |
| 497 | + best_runtime_until_now=None, |
| 498 | + original_async_throughput=original_async_throughput, |
| 499 | + best_throughput_until_now=None, |
| 500 | + disable_gh_action_noise=True |
| 501 | + ) |
| 502 | + |
| 503 | + # Test case 3: Throughput improves significantly, runtime doesn't meet threshold (should pass) |
| 504 | + candidate_result = OptimizedCandidateResult( |
| 505 | + max_loop_count=5, |
| 506 | + best_test_runtime=9800, # Only 2% runtime improvement (below 5% threshold) |
| 507 | + behavior_test_results=TestResults(), |
| 508 | + benchmarking_test_results=TestResults(), |
| 509 | + optimization_candidate_index=0, |
| 510 | + total_candidate_timing=9800, |
| 511 | + async_throughput=120, # 20% throughput improvement |
| 512 | + ) |
| 513 | + |
| 514 | + assert speedup_critic( |
| 515 | + candidate_result=candidate_result, |
| 516 | + original_code_runtime=original_code_runtime, |
| 517 | + best_runtime_until_now=None, |
| 518 | + original_async_throughput=original_async_throughput, |
| 519 | + best_throughput_until_now=None, |
| 520 | + disable_gh_action_noise=True |
| 521 | + ) |
| 522 | + |
| 523 | + # Test case 4: No throughput data - should fall back to runtime-only evaluation |
| 524 | + candidate_result = OptimizedCandidateResult( |
| 525 | + max_loop_count=5, |
| 526 | + best_test_runtime=8000, # 20% runtime improvement |
| 527 | + behavior_test_results=TestResults(), |
| 528 | + benchmarking_test_results=TestResults(), |
| 529 | + optimization_candidate_index=0, |
| 530 | + total_candidate_timing=8000, |
| 531 | + async_throughput=None, # No throughput data |
| 532 | + ) |
| 533 | + |
| 534 | + assert speedup_critic( |
| 535 | + candidate_result=candidate_result, |
| 536 | + original_code_runtime=original_code_runtime, |
| 537 | + best_runtime_until_now=None, |
| 538 | + original_async_throughput=None, # No original throughput data |
| 539 | + best_throughput_until_now=None, |
| 540 | + disable_gh_action_noise=True |
| 541 | + ) |
| 542 | + |
| 543 | + # Test case 5: Test best_throughput_until_now comparison |
| 544 | + candidate_result = OptimizedCandidateResult( |
| 545 | + max_loop_count=5, |
| 546 | + best_test_runtime=8000, # 20% runtime improvement |
| 547 | + behavior_test_results=TestResults(), |
| 548 | + benchmarking_test_results=TestResults(), |
| 549 | + optimization_candidate_index=0, |
| 550 | + total_candidate_timing=8000, |
| 551 | + async_throughput=115, # 15% throughput improvement |
| 552 | + ) |
| 553 | + |
| 554 | + # Should pass when no best throughput yet |
| 555 | + assert speedup_critic( |
| 556 | + candidate_result=candidate_result, |
| 557 | + original_code_runtime=original_code_runtime, |
| 558 | + best_runtime_until_now=None, |
| 559 | + original_async_throughput=original_async_throughput, |
| 560 | + best_throughput_until_now=None, |
| 561 | + disable_gh_action_noise=True |
| 562 | + ) |
| 563 | + |
| 564 | + # Should fail when there's a better throughput already |
| 565 | + assert not speedup_critic( |
| 566 | + candidate_result=candidate_result, |
| 567 | + original_code_runtime=original_code_runtime, |
| 568 | + best_runtime_until_now=7000, # Better runtime already exists |
| 569 | + original_async_throughput=original_async_throughput, |
| 570 | + best_throughput_until_now=120, # Better throughput already exists |
| 571 | + disable_gh_action_noise=True |
| 572 | + ) |
| 573 | + |
| 574 | + # Test case 6: Zero original throughput (edge case) |
| 575 | + candidate_result = OptimizedCandidateResult( |
| 576 | + max_loop_count=5, |
| 577 | + best_test_runtime=8000, # 20% runtime improvement |
| 578 | + behavior_test_results=TestResults(), |
| 579 | + benchmarking_test_results=TestResults(), |
| 580 | + optimization_candidate_index=0, |
| 581 | + total_candidate_timing=8000, |
| 582 | + async_throughput=50, |
| 583 | + ) |
| 584 | + |
| 585 | + # Should pass when original throughput is 0 (throughput evaluation skipped) |
| 586 | + assert speedup_critic( |
| 587 | + candidate_result=candidate_result, |
| 588 | + original_code_runtime=original_code_runtime, |
| 589 | + best_runtime_until_now=None, |
| 590 | + original_async_throughput=0, # Zero original throughput |
| 591 | + best_throughput_until_now=None, |
| 592 | + disable_gh_action_noise=True |
| 593 | + ) |
0 commit comments