Skip to content

Commit 2812dd5

Browse files
Merge pull request codeflash-ai#517 from codeflash-ai/early-skip-if-insufficient-number-of-tests-passed
early skip if quantity of tests not sufficient for original baseline
2 parents e56428e + 70d0c69 commit 2812dd5

File tree

6 files changed

+18
-17
lines changed

6 files changed

+18
-17
lines changed

codeflash/github/PrComment.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ class PrComment:
1818
relative_file_path: str
1919
speedup_x: str
2020
speedup_pct: str
21-
winning_behavioral_test_results: TestResults
21+
winning_behavior_test_results: TestResults
2222
winning_benchmarking_test_results: TestResults
2323
benchmark_details: Optional[list[BenchmarkDetail]] = None
2424

2525
def to_json(self) -> dict[str, Union[dict[str, dict[str, int]], int, str, Optional[list[BenchmarkDetail]]]]:
2626
report_table = {
2727
test_type.to_name(): result
28-
for test_type, result in self.winning_behavioral_test_results.get_test_pass_fail_report_by_type().items()
28+
for test_type, result in self.winning_behavior_test_results.get_test_pass_fail_report_by_type().items()
2929
if test_type.to_name()
3030
}
3131

codeflash/models/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class BestOptimization(BaseModel):
7878
helper_functions: list[FunctionSource]
7979
runtime: int
8080
replay_performance_gain: Optional[dict[BenchmarkKey, float]] = None
81-
winning_behavioral_test_results: TestResults
81+
winning_behavior_test_results: TestResults
8282
winning_benchmarking_test_results: TestResults
8383
winning_replay_benchmarking_test_results: Optional[TestResults] = None
8484

@@ -278,7 +278,7 @@ class FunctionParent:
278278

279279

280280
class OriginalCodeBaseline(BaseModel):
281-
behavioral_test_results: TestResults
281+
behavior_test_results: TestResults
282282
benchmarking_test_results: TestResults
283283
replay_benchmarking_test_results: Optional[dict[BenchmarkKey, TestResults]] = None
284284
line_profile_results: dict

codeflash/optimization/function_optimizer.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ def determine_best_candidate(
488488
candidate=candidate,
489489
helper_functions=code_context.helper_functions,
490490
runtime=best_test_runtime,
491-
winning_behavioral_test_results=candidate_result.behavior_test_results,
491+
winning_behavior_test_results=candidate_result.behavior_test_results,
492492
replay_performance_gain=replay_perf_gain if self.args.benchmark else None,
493493
winning_benchmarking_test_results=candidate_result.benchmarking_test_results,
494494
winning_replay_benchmarking_test_results=candidate_result.benchmarking_test_results,
@@ -575,7 +575,7 @@ def log_successful_optimization(
575575
"original_runtime": explanation.original_runtime_ns,
576576
"winning_test_results": {
577577
tt.to_name(): v
578-
for tt, v in explanation.winning_behavioral_test_results.get_test_pass_fail_report_by_type().items()
578+
for tt, v in explanation.winning_behavior_test_results.get_test_pass_fail_report_by_type().items()
579579
},
580580
},
581581
)
@@ -898,8 +898,9 @@ def setup_and_establish_baseline(
898898
return Failure(baseline_result.failure())
899899

900900
original_code_baseline, test_functions_to_remove = baseline_result.unwrap()
901-
if isinstance(original_code_baseline, OriginalCodeBaseline) and not coverage_critic(
902-
original_code_baseline.coverage_results, self.args.test_framework
901+
if isinstance(original_code_baseline, OriginalCodeBaseline) and (
902+
not coverage_critic(original_code_baseline.coverage_results, self.args.test_framework)
903+
or not quantity_of_tests_critic(original_code_baseline)
903904
):
904905
if self.args.override_fixtures:
905906
restore_conftest(original_conftest_content)
@@ -971,7 +972,7 @@ def find_and_process_best_optimization(
971972
)
972973
explanation = Explanation(
973974
raw_explanation_message=best_optimization.candidate.explanation,
974-
winning_behavioral_test_results=best_optimization.winning_behavioral_test_results,
975+
winning_behavior_test_results=best_optimization.winning_behavior_test_results,
975976
winning_benchmarking_test_results=best_optimization.winning_benchmarking_test_results,
976977
original_runtime_ns=original_code_baseline.runtime,
977978
best_runtime_ns=best_optimization.runtime,
@@ -1203,7 +1204,7 @@ def establish_original_code_baseline(
12031204
return Success(
12041205
(
12051206
OriginalCodeBaseline(
1206-
behavioral_test_results=behavioral_results,
1207+
behavior_test_results=behavioral_results,
12071208
benchmarking_test_results=benchmarking_results,
12081209
replay_benchmarking_test_results=replay_benchmarking_test_results
12091210
if self.args.benchmark
@@ -1267,7 +1268,7 @@ def run_optimized_candidate(
12671268
)
12681269
)
12691270
console.rule()
1270-
if compare_test_results(baseline_results.behavioral_test_results, candidate_behavior_results):
1271+
if compare_test_results(baseline_results.behavior_test_results, candidate_behavior_results):
12711272
logger.info("Test results matched!")
12721273
console.rule()
12731274
else:

codeflash/result/create_pr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def check_create_pr(
163163
relative_file_path=relative_path,
164164
speedup_x=explanation.speedup_x,
165165
speedup_pct=explanation.speedup_pct,
166-
winning_behavioral_test_results=explanation.winning_behavioral_test_results,
166+
winning_behavior_test_results=explanation.winning_behavior_test_results,
167167
winning_benchmarking_test_results=explanation.winning_benchmarking_test_results,
168168
benchmark_details=explanation.benchmark_details,
169169
),
@@ -210,7 +210,7 @@ def check_create_pr(
210210
relative_file_path=relative_path,
211211
speedup_x=explanation.speedup_x,
212212
speedup_pct=explanation.speedup_pct,
213-
winning_behavioral_test_results=explanation.winning_behavioral_test_results,
213+
winning_behavior_test_results=explanation.winning_behavior_test_results,
214214
winning_benchmarking_test_results=explanation.winning_benchmarking_test_results,
215215
benchmark_details=explanation.benchmark_details,
216216
),

codeflash/result/critic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from codeflash.models.models import TestType
1313

1414
if TYPE_CHECKING:
15-
from codeflash.models.models import CoverageData, OptimizedCandidateResult
15+
from codeflash.models.models import CoverageData, OptimizedCandidateResult, OriginalCodeBaseline
1616

1717

1818
def performance_gain(*, original_runtime_ns: int, optimized_runtime_ns: int) -> float:
@@ -50,7 +50,7 @@ def speedup_critic(
5050
return bool(perf_gain > noise_floor and candidate_result.best_test_runtime < best_runtime_until_now)
5151

5252

53-
def quantity_of_tests_critic(candidate_result: OptimizedCandidateResult) -> bool:
53+
def quantity_of_tests_critic(candidate_result: OptimizedCandidateResult | OriginalCodeBaseline) -> bool:
5454
test_results = candidate_result.behavior_test_results
5555
report = test_results.get_test_pass_fail_report_by_type()
5656

codeflash/result/explanation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
@dataclass(frozen=True, config={"arbitrary_types_allowed": True})
1717
class Explanation:
1818
raw_explanation_message: str
19-
winning_behavioral_test_results: TestResults
19+
winning_behavior_test_results: TestResults
2020
winning_benchmarking_test_results: TestResults
2121
original_runtime_ns: int
2222
best_runtime_ns: int
@@ -93,7 +93,7 @@ def to_console_string(self) -> str:
9393
+ self.raw_explanation_message
9494
+ " \n\n"
9595
+ "The new optimized code was tested for correctness. The results are listed below.\n"
96-
+ f"{TestResults.report_to_string(self.winning_behavioral_test_results.get_test_pass_fail_report_by_type())}\n"
96+
+ f"{TestResults.report_to_string(self.winning_behavior_test_results.get_test_pass_fail_report_by_type())}\n"
9797
)
9898

9999
def explanation_message(self) -> str:

0 commit comments

Comments
 (0)