@@ -69,12 +69,7 @@ def _save_current_regression_range_indices(
6969 if task_output .HasField ('last_regression_max' ):
7070 last_regression_max = task_output .last_regression_max
7171
72- last_regression_next = None
73- if task_output .HasField ('last_regression_next' ):
74- last_regression_next = task_output .last_regression_next
75-
76- if (last_regression_min is None and last_regression_max is None and
77- last_regression_next is None ):
72+ if last_regression_min is None and last_regression_max is None :
7873 return # Optimization to avoid useless load/put.
7974
8075 testcase = data_handler .get_testcase_by_id (testcase_id )
@@ -85,9 +80,6 @@ def _save_current_regression_range_indices(
8580 testcase .set_metadata (
8681 'last_regression_max' , last_regression_max , update_testcase = False )
8782
88- testcase .set_metadata (
89- 'last_regression_next' , last_regression_next , update_testcase = False )
90-
9183 testcase .put ()
9284
9385
@@ -168,7 +160,6 @@ def find_min_revision(
168160 deadline : float ,
169161 revision_list : List [int ],
170162 max_index : int ,
171- next_revision : Optional [int ],
172163 regression_task_output : uworker_msg_pb2 .RegressionTaskOutput , # pylint: disable=no-member
173164) -> Optional [uworker_msg_pb2 .Output ]: # pylint: disable=no-member
174165 """Attempts to find a min revision to start bisecting from. Such a revision
@@ -185,8 +176,6 @@ def find_min_revision(
185176 max_index: The index of the known max revision for bisection. Must be a
186177 valid index within `revision_list`. It is assumed that the testcase
187178 reproduces at the pointed-to revision.
188- next_revision: The next revision at which to continue searching backwards
189- for a min revision. Can be used to resume execution after timing out.
190179 regression_task_output: Output argument. Any bad builds encountered while
191180 searching for the earliest good build are appended to `build_data_list`.
192181 See also below for values set in different return conditions.
@@ -205,7 +194,6 @@ def find_min_revision(
205194
206195 regression_task_output.last_regression_min is set
207196 regression_task_output.last_regression_max is set
208- regression_task_output.last_regression_next is cleared
209197
210198 b. If no such revision can be found - i.e. the earliest good revision X
211199 still reproduces the testcase:
@@ -227,7 +215,6 @@ def find_min_revision(
227215 output.error_type = REGRESSION_TIMED_OUT
228216 output.regression_task_output = regression_task_output
229217 output.regression_task_output.last_regression_max is set
230- output.regression_task_output.last_regression_next is set
231218
232219 d. If another error occurred:
233220
@@ -238,34 +225,75 @@ def find_min_revision(
238225 output.error_type indicates the error that occurred
239226 output.regression_task_output = regression_task_output
240227 output.regression_task_output.last_regression_max is set
241- output.regression_task_output.last_regression_next is set
242228
243229 """
244- # Save this value so we can calculate exponential distances correctly even if
245- # we find earlier builds that reproduce.
246- original_max_index = max_index
247-
248- if next_revision is None :
249- # Start from the top.
230+ assert max_index >= 0 , max_index
231+ assert max_index < len (revision_list ), max_index
232+
233+ # Note that we search exponentially through the indices in the revision list,
234+ # not through the revisions themselves. If revisions are fairly evenly
235+ # distributed, then this distinction is irrelevant. If however there are large
236+ # irregular gaps in between revisions, this might appear a bit strange at a
237+ # glance. Consider:
238+ #
239+ # Revisions: 1, 2, 3, 4, 5, 50, 51, 127, 128
240+ # Search order: 4 3 2 1
241+ #
242+ # Appears as trying: 127, 51, 5, 1
243+ # Instead of: 127, 126, 124, 120, 112, 96, 64, 1
244+ #
245+ # Both would work, but searching through indices in the revision list is both
246+ # easier to express in code and more efficient since what we care about is
247+ # searching through revisions that we *can* test against, not through all
248+ # revisions in the source code.
249+ #
250+ # The later bisection stage (once we have found a min revision) similarly
251+ # operates on indices and not revisions.
252+
253+ # Find the index of the original crashing revision so that we can keep
254+ # doubling the step size in our exponential search backwards.
255+ crash_index = revisions .find_max_revision_index (revision_list ,
256+ testcase .crash_revision )
257+ if crash_index is None :
258+ # If the crash revision is no longer in the revision list, nor does there
259+ # exist any later revision, just use the last revision in the list instead.
260+ # This will reduce the step size for our exponential search by as little as
261+ # possible.
262+ crash_index = len (revision_list ) - 1
263+
264+ if max_index == crash_index :
265+ # Starting from scratch.
250266 next_index = max_index - 1
267+ elif crash_index > max_index :
268+ # Double the distance to the original crash index.
269+ distance = crash_index - max_index
270+ next_index = max_index - distance
251271 else :
252- # Find the index of `next_revision`, or the next earlier revision's index
253- # if `next_revision` does not exist anymore.
254- next_index = revisions .find_min_revision_index (revision_list , next_revision )
272+ # If `max_index` is higher than `crash_index`, this means that in some
273+ # previous iteration the original crash revision could not be found, so a
274+ # higher revision was used instead, *and* we timed out before we could
275+ # search below `crash_revision`. Now, for some reason, the crash revision is
276+ # found again, so just use it and restart from scratch.
277+ max_index = crash_index
278+ next_index = max_index - 1
255279
256- # If there is no good revision <= next_revision, use the earliest revision.
257- if next_index is None :
258- next_index = 0
280+ assert next_index < max_index , (next_index , max_index )
281+ assert max_index <= crash_index , (max_index , crash_index )
259282
260283 while True :
261284 # If we fall off the end of the revision list, try the earliest revision.
262285 # Note that if the earliest revision is bad, we will skip it and try the
263286 # next one. This will go on until we find the first good revision, at which
264287 # point we will stop looping.
265288 next_index = max (next_index , 0 )
266-
267289 next_revision = revision_list [next_index ]
268- regression_task_output .last_regression_next = next_revision
290+
291+ if next_index == max_index :
292+ # The first good build crashes, there is no min revision to be found.
293+ regression_task_output .regression_range_start = 0
294+ regression_task_output .regression_range_end = next_revision
295+ return None , None , uworker_msg_pb2 .Output ( # pylint: disable=no-member
296+ regression_task_output = regression_task_output )
269297
270298 if time .time () > deadline :
271299 return None , None , uworker_msg_pb2 .Output ( # pylint: disable=no-member
@@ -275,13 +303,6 @@ def find_min_revision(
275303 f'next revision: r{ next_revision } ' ,
276304 regression_task_output = regression_task_output )
277305
278- if next_index == max_index :
279- # The first good build crashes, there is no min revision to be found.
280- regression_task_output .regression_range_start = 0
281- regression_task_output .regression_range_end = next_revision
282- return None , None , uworker_msg_pb2 .Output ( # pylint: disable=no-member
283- regression_task_output = regression_task_output )
284-
285306 is_crash , error = _testcase_reproduces_in_revision (
286307 testcase ,
287308 testcase_file_path ,
@@ -306,7 +327,6 @@ def find_min_revision(
306327
307328 if not is_crash :
308329 # We found a suitable min revision, success!
309- regression_task_output .ClearField ('last_regression_next' )
310330 regression_task_output .last_regression_min = next_revision
311331 return next_index , max_index , None
312332
@@ -316,29 +336,13 @@ def find_min_revision(
316336
317337 # Continue exponential search backwards. Double the distance (in indices)
318338 # from our start point.
319- #
320- # Note that this means we search exponentially through the indices in the
321- # revision list, not through the revisions themselves. If revisions are
322- # fairly evenly distributed, then this distinction is irrelevant. If however
323- # there are large irregular gaps in between revisions, this might appear a
324- # bit strange at a glance. Consider:
325- #
326- # Revisions: 1, 2, 3, 4, 5, 50, 51, 127, 128
327- # Search order: 4 3 2 1
328- #
329- # Appears as trying: 127, 51, 5, 1
330- # Instead of: 127, 126, 124, 120, 112, 96, 64, 1
331- #
332- # Both would work, but searching through indices in the revision list is
333- # both easier to express in code and more efficient since what we care
334- # about is searching through revisions that we *can* test against, not
335- # through all revisions in the source code.
336- #
337- # The later bisection stage (once we have found a min revision) similarly
338- # operates on indices and not revisions.
339- distance = original_max_index - next_index
339+ distance = crash_index - next_index
340340 next_index -= distance
341341
342+ # Assert forward progress.
343+ # Note that `max_index` stores the previous value of `next_index`.
344+ assert distance >= 0 , (distance , crash_index , max_index )
345+
342346
343347def validate_regression_range (
344348 testcase : data_types .Testcase ,
@@ -407,45 +411,28 @@ def find_regression_range(
407411 # Pick up where left off in a previous run if necessary.
408412 min_revision = testcase .get_metadata ('last_regression_min' )
409413 max_revision = testcase .get_metadata ('last_regression_max' )
410- next_revision = testcase .get_metadata ('last_regression_next' )
411414
412415 logs .info ('Build set up, starting search for regression range. State: ' +
413416 f'crash_revision = { testcase .crash_revision } , ' +
414417 f'max_revision = { max_revision } , ' +
415- f'min_revision = { min_revision } , ' +
416- f'next_revision = { next_revision } . ' )
418+ f'min_revision = { min_revision } .' )
417419
418420 if max_revision is None :
419421 logs .info ('Starting search for min revision from scratch.' )
420422
421- if min_revision is not None or next_revision is not None :
423+ if min_revision is not None :
422424 logs .error ('Inconsistent regression state: ' +
423- 'resetting min_revision and next_revision to None.' )
425+ 'resetting min_revision to None.' )
424426 min_revision = None
425- next_revision = None
426427
427428 elif min_revision is None :
428429 # max_revision is not None.
429430 logs .info ('Resuming search for min revision.' )
430431
431- if next_revision is None :
432- logs .error ('Inconsistent regression state: missing next_revision. ' +
433- 'Restarting search from scratch.' )
434-
435432 else :
436433 # max_revision and min_revision are not None.
437434 logs .info ('Resuming bisection.' )
438435
439- if next_revision is not None :
440- logs .error ('Inconsistent regression state. ' +
441- 'Resetting next_revision to None.' )
442- next_revision = None
443-
444- # Notice that regardless of whether `max_revision` was None or not, if
445- # `min_revision` is None then we should search exponentially backwards. Even
446- # though we are overwriting `max_revision` here, `next_revision` will tell us
447- # whether we should start from scratch or whether we should pick up where we
448- # left off.
449436 if not max_revision :
450437 max_revision = testcase .crash_revision
451438
@@ -501,7 +488,7 @@ def find_regression_range(
501488 if not min_index :
502489 min_index , max_index , output = find_min_revision (
503490 testcase , testcase_file_path , job_type , fuzz_target , deadline ,
504- revision_list , max_index , next_revision , regression_task_output )
491+ revision_list , max_index , regression_task_output )
505492 if output :
506493 # Either we encountered an error, or there is no good revision and the
507494 # regression range is `0:revision_list[0]`.
0 commit comments