1+ import collections
12import dataclasses
23import datetime
34import os
2021_MIN_TEST_RETRY_BUDGET_DURATION = datetime .timedelta (seconds = 4 )
2122
2223
24+ @dataclasses .dataclass
25+ class _NewTestMetrics :
26+ "Represents metrics collected for a new test."
27+
28+ initial_duration : datetime .timedelta = dataclasses .field (
29+ default_factory = datetime .timedelta
30+ )
31+ "Represents the duration of the initial execution of the test."
32+
33+ retry_count : int = dataclasses .field (default = 0 )
34+ "Represents the number of times the test has been retried so far."
35+
36+ scheduled_retry_count : int = dataclasses .field (default = 0 )
37+ "Represents the number of retries that have been scheduled for this test depending on the budget."
38+
39+ total_duration : datetime .timedelta = dataclasses .field (
40+ default_factory = datetime .timedelta
41+ )
42+ "Represents the total duration spent executing this test, including retries."
43+
44+ def add_duration (self , duration : datetime .timedelta ) -> None :
45+ if not self .initial_duration :
46+ self .initial_duration = duration
47+
48+ self .retry_count += 1
49+ self .total_duration += duration
50+
51+
2352@dataclasses .dataclass
2453class FlakyDetector :
2554 token : str
@@ -37,11 +66,8 @@ class FlakyDetector:
3766 init = False ,
3867 default_factory = list ,
3968 )
40- _new_test_durations : typing .Dict [str , datetime .timedelta ] = dataclasses .field (
41- init = False , default_factory = dict
42- )
43- _new_test_retries : typing .DefaultDict [str , int ] = dataclasses .field (
44- init = False , default_factory = lambda : typing .DefaultDict (int )
69+ _new_test_metrics : typing .Dict [str , _NewTestMetrics ] = dataclasses .field (
70+ init = False , default_factory = lambda : collections .defaultdict (_NewTestMetrics )
4571 )
4672 _over_length_tests : typing .Set [str ] = dataclasses .field (
4773 init = False , default_factory = set
@@ -93,14 +119,12 @@ def detect_from_report(self, report: _pytest.reports.TestReport) -> bool:
93119 if test in self ._existing_tests :
94120 return False
95121
96- if test in self ._new_test_durations :
97- return True
98-
99122 if len (test ) > _MAX_TEST_NAME_LENGTH :
100123 self ._over_length_tests .add (test )
101124 return False
102125
103- self ._new_test_durations [test ] = duration
126+ self ._new_test_metrics [test ].add_duration (duration )
127+
104128 return True
105129
106130 def _get_budget_deadline (self ) -> datetime .datetime :
@@ -122,15 +146,18 @@ def _get_remaining_items(
122146 # its duration yet, so allocate max retries directly and rely on the
123147 # budget deadline instead of going through the budget allocation.
124148 if (
125- len (self ._new_test_durations ) == 0
149+ len (self ._new_test_metrics ) == 0
126150 and self .last_collected_test
127151 and self .last_collected_test not in self ._existing_tests
128152 ):
129153 allocation = {self .last_collected_test : _MAX_TEST_RETRY_COUNT }
130154 else :
131155 allocation = _allocate_test_retries (
132156 self ._get_budget_duration (),
133- self ._new_test_durations ,
157+ {
158+ test : metrics .initial_duration
159+ for test , metrics in self ._new_test_metrics .items ()
160+ },
134161 )
135162
136163 items_to_retry = [item for item in session .items if item .nodeid in allocation ]
@@ -139,7 +166,7 @@ def _get_remaining_items(
139166 for item in items_to_retry :
140167 expected_retries = int (allocation [item .nodeid ])
141168 existing_retries = int (
142- self ._new_test_retries . get ( item .nodeid , 0 ) ,
169+ self ._new_test_metrics [ item .nodeid ]. scheduled_retry_count ,
143170 )
144171
145172 remaining_retries = max (0 , expected_retries - existing_retries )
@@ -149,7 +176,7 @@ def _get_remaining_items(
149176 if not item .parent :
150177 continue
151178
152- self ._new_test_retries [item .nodeid ] += 1
179+ self ._new_test_metrics [item .nodeid ]. scheduled_retry_count += 1
153180
154181 clone = item .__class__ .from_parent (
155182 name = item .name ,
@@ -207,48 +234,57 @@ def handle_item(
207234 def make_report (self ) -> str :
208235 result = "🐛 Flaky detection"
209236 if self ._over_length_tests :
210- result += f"{ os .linesep } - Skipped { len (self ._over_length_tests )} test(s):"
237+ result += (
238+ f"{ os .linesep } - Skipped { len (self ._over_length_tests )} "
239+ f"test{ 's' if len (self ._over_length_tests ) > 1 else '' } :"
240+ )
211241 for test in self ._over_length_tests :
212242 result += (
213- f"{ os .linesep } • '{ test } ' has not been tested multiple "
214- f"times because the name of the test exceeds our limit of "
215- f"{ _MAX_TEST_NAME_LENGTH } characters"
243+ f"{ os .linesep } • '{ test } ' has not been tested multiple times because the name of the test "
244+ f"exceeds our limit of { _MAX_TEST_NAME_LENGTH } characters"
216245 )
217246
218- if not self ._new_test_durations :
247+ if not self ._new_test_metrics :
219248 result += f"{ os .linesep } - No new tests detected, but we are watching 👀"
220249
221250 return result
222251
223252 total_retry_duration_seconds = sum (
224- self ._new_test_durations [test_name ].total_seconds () * retry_count
225- for test_name , retry_count in self ._new_test_retries .items ()
226- if retry_count > 0
253+ metrics .total_duration .total_seconds ()
254+ for metrics in self ._new_test_metrics .values ()
227255 )
228256 budget_duration_seconds = self ._get_budget_duration ().total_seconds ()
229257 result += (
230- f"{ os .linesep } - Used { total_retry_duration_seconds / budget_duration_seconds * 100 :.2f} % "
231- f"of the budget ({ total_retry_duration_seconds :.2f} s/{ budget_duration_seconds :.2f} s)"
258+ f"{ os .linesep } - Used { total_retry_duration_seconds / budget_duration_seconds * 100 :.2f} % of the budget "
259+ f"({ total_retry_duration_seconds :.2f} s/{ budget_duration_seconds :.2f} s)"
232260 )
233261
234262 result += (
235- f"{ os .linesep } - Active for { len (self ._new_test_durations )} new test(s):"
263+ f"{ os .linesep } - Active for { len (self ._new_test_metrics )} new "
264+ f"test{ 's' if len (self ._new_test_metrics ) > 1 else '' } :"
236265 )
237- for test , duration in self ._new_test_durations .items ():
238- retry_count = self ._new_test_retries .get (test , 0 )
239- if retry_count == 0 :
240- result += f"{ os .linesep } • '{ test } ' is too slow to be tested at least { _MIN_TEST_RETRY_COUNT } times within the budget"
241- continue
242- elif retry_count < _MIN_TEST_RETRY_COUNT :
243- result += f"{ os .linesep } • '{ test } ' has been tested only { retry_count } times to avoid exceeding the budget"
266+ for test , metrics in self ._new_test_metrics .items ():
267+ if metrics .scheduled_retry_count == 0 :
268+ result += (
269+ f"{ os .linesep } • '{ test } ' is too slow to be tested at least { _MIN_TEST_RETRY_COUNT } times "
270+ "within the budget"
271+ )
244272 continue
245273
246- retry_duration_seconds = duration .total_seconds () * retry_count
274+ if metrics .retry_count < metrics .scheduled_retry_count :
275+ result += (
276+ f"{ os .linesep } • '{ test } ' has been tested only { metrics .retry_count } "
277+ f"time{ 's' if metrics .retry_count > 1 else '' } instead of { metrics .scheduled_retry_count } "
278+ f"time{ 's' if metrics .scheduled_retry_count > 1 else '' } to avoid exceeding the budget"
279+ )
280+ continue
247281
282+ retry_duration_seconds = metrics .total_duration .total_seconds ()
248283 result += (
249- f"{ os .linesep } • '{ test } ' has been tested { retry_count } "
250- f"times using approx. { retry_duration_seconds / budget_duration_seconds * 100 :.2f} % "
251- f"of the budget ({ retry_duration_seconds :.2f} s/{ budget_duration_seconds :.2f} s)"
284+ f"{ os .linesep } • '{ test } ' has been tested { metrics .retry_count } "
285+ f"time{ 's' if metrics .retry_count > 1 else '' } using approx. "
286+ f"{ retry_duration_seconds / budget_duration_seconds * 100 :.2f} % of the budget "
287+ f"({ retry_duration_seconds :.2f} s/{ budget_duration_seconds :.2f} s)"
252288 )
253289
254290 return result
0 commit comments