Skip to content

Commit f5c9df9

Browse files
committed
Remove last_regression_next.
1 parent faa4900 commit f5c9df9

File tree

2 files changed

+209
-297
lines changed

2 files changed

+209
-297
lines changed

src/clusterfuzz/_internal/bot/tasks/utasks/regression_task.py

Lines changed: 66 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,7 @@ def _save_current_regression_range_indices(
6969
if task_output.HasField('last_regression_max'):
7070
last_regression_max = task_output.last_regression_max
7171

72-
last_regression_next = None
73-
if task_output.HasField('last_regression_next'):
74-
last_regression_next = task_output.last_regression_next
75-
76-
if (last_regression_min is None and last_regression_max is None and
77-
last_regression_next is None):
72+
if last_regression_min is None and last_regression_max is None:
7873
return # Optimization to avoid useless load/put.
7974

8075
testcase = data_handler.get_testcase_by_id(testcase_id)
@@ -85,9 +80,6 @@ def _save_current_regression_range_indices(
8580
testcase.set_metadata(
8681
'last_regression_max', last_regression_max, update_testcase=False)
8782

88-
testcase.set_metadata(
89-
'last_regression_next', last_regression_next, update_testcase=False)
90-
9183
testcase.put()
9284

9385

@@ -168,7 +160,6 @@ def find_min_revision(
168160
deadline: float,
169161
revision_list: List[int],
170162
max_index: int,
171-
next_revision: Optional[int],
172163
regression_task_output: uworker_msg_pb2.RegressionTaskOutput, # pylint: disable=no-member
173164
) -> Optional[uworker_msg_pb2.Output]: # pylint: disable=no-member
174165
"""Attempts to find a min revision to start bisecting from. Such a revision
@@ -185,8 +176,6 @@ def find_min_revision(
185176
max_index: The index of the known max revision for bisection. Must be a
186177
valid index within `revision_list`. It is assumed that the testcase
187178
reproduces at the pointed-to revision.
188-
next_revision: The next revision at which to continue searching backwards
189-
for a min revision. Can be used to resume execution after timing out.
190179
regression_task_output: Output argument. Any bad builds encountered while
191180
searching for the earliest good build are appended to `build_data_list`.
192181
See also below for values set in different return conditions.
@@ -205,7 +194,6 @@ def find_min_revision(
205194
206195
regression_task_output.last_regression_min is set
207196
regression_task_output.last_regression_max is set
208-
regression_task_output.last_regression_next is cleared
209197
210198
b. If no such revision can be found - i.e. the earliest good revision X
211199
still reproduces the testcase:
@@ -227,7 +215,6 @@ def find_min_revision(
227215
output.error_type = REGRESSION_TIMED_OUT
228216
output.regression_task_output = regression_task_output
229217
output.regression_task_output.last_regression_max is set
230-
output.regression_task_output.last_regression_next is set
231218
232219
d. If another error occurred:
233220
@@ -238,34 +225,75 @@ def find_min_revision(
238225
output.error_type indicates the error that occurred
239226
output.regression_task_output = regression_task_output
240227
output.regression_task_output.last_regression_max is set
241-
output.regression_task_output.last_regression_next is set
242228
243229
"""
244-
# Save this value so we can calculate exponential distances correctly even if
245-
# we find earlier builds that reproduce.
246-
original_max_index = max_index
247-
248-
if next_revision is None:
249-
# Start from the top.
230+
assert max_index >= 0, max_index
231+
assert max_index < len(revision_list), max_index
232+
233+
# Note that we search exponentially through the indices in the revision list,
234+
# not through the revisions themselves. If revisions are fairly evenly
235+
# distributed, then this distinction is irrelevant. If however there are large
236+
# irregular gaps in between revisions, this might appear a bit strange at a
237+
# glance. Consider:
238+
#
239+
# Revisions: 1, 2, 3, 4, 5, 50, 51, 127, 128
240+
# Search order: 4 3 2 1
241+
#
242+
# Appears as trying: 127, 51, 5, 1
243+
# Instead of: 127, 126, 124, 120, 112, 96, 64, 1
244+
#
245+
# Both would work, but searching through indices in the revision list is both
246+
# easier to express in code and more efficient since what we care about is
247+
# searching through revisions that we *can* test against, not through all
248+
# revisions in the source code.
249+
#
250+
# The later bisection stage (once we have found a min revision) similarly
251+
# operates on indices and not revisions.
252+
253+
# Find the index of the original crashing revision so that we can keep
254+
# doubling the step size in our exponential search backwards.
255+
crash_index = revisions.find_max_revision_index(revision_list,
256+
testcase.crash_revision)
257+
if crash_index is None:
258+
# If the crash revision is no longer in the revision list, nor does there
259+
# exist any later revision, just use the last revision in the list instead.
260+
# This will reduce the step size for our exponential search by as little as
261+
# possible.
262+
crash_index = len(revision_list) - 1
263+
264+
if max_index == crash_index:
265+
# Starting from scratch.
250266
next_index = max_index - 1
267+
elif crash_index > max_index:
268+
# Double the distance to the original crash index.
269+
distance = crash_index - max_index
270+
next_index = max_index - distance
251271
else:
252-
# Find the index of `next_revision`, or the next earlier revision's index
253-
# if `next_revision` does not exist anymore.
254-
next_index = revisions.find_min_revision_index(revision_list, next_revision)
272+
# If `max_index` is higher than `crash_index`, this means that in some
273+
# previous iteration the original crash revision could not be found, so a
274+
# higher revision was used instead, *and* we timed out before we could
275+
# search below `crash_revision`. Now, for some reason, the crash revision is
276+
# found again, so just use it and restart from scratch.
277+
max_index = crash_index
278+
next_index = max_index - 1
255279

256-
# If there is no good revision <= next_revision, use the earliest revision.
257-
if next_index is None:
258-
next_index = 0
280+
assert next_index < max_index, (next_index, max_index)
281+
assert max_index <= crash_index, (max_index, crash_index)
259282

260283
while True:
261284
# If we fall off the end of the revision list, try the earliest revision.
262285
# Note that if the earliest revision is bad, we will skip it and try the
263286
# next one. This will go on until we find the first good revision, at which
264287
# point we will stop looping.
265288
next_index = max(next_index, 0)
266-
267289
next_revision = revision_list[next_index]
268-
regression_task_output.last_regression_next = next_revision
290+
291+
if next_index == max_index:
292+
# The first good build crashes, there is no min revision to be found.
293+
regression_task_output.regression_range_start = 0
294+
regression_task_output.regression_range_end = next_revision
295+
return None, None, uworker_msg_pb2.Output( # pylint: disable=no-member
296+
regression_task_output=regression_task_output)
269297

270298
if time.time() > deadline:
271299
return None, None, uworker_msg_pb2.Output( # pylint: disable=no-member
@@ -275,13 +303,6 @@ def find_min_revision(
275303
f'next revision: r{next_revision}',
276304
regression_task_output=regression_task_output)
277305

278-
if next_index == max_index:
279-
# The first good build crashes, there is no min revision to be found.
280-
regression_task_output.regression_range_start = 0
281-
regression_task_output.regression_range_end = next_revision
282-
return None, None, uworker_msg_pb2.Output( # pylint: disable=no-member
283-
regression_task_output=regression_task_output)
284-
285306
is_crash, error = _testcase_reproduces_in_revision(
286307
testcase,
287308
testcase_file_path,
@@ -306,7 +327,6 @@ def find_min_revision(
306327

307328
if not is_crash:
308329
# We found a suitable min revision, success!
309-
regression_task_output.ClearField('last_regression_next')
310330
regression_task_output.last_regression_min = next_revision
311331
return next_index, max_index, None
312332

@@ -316,29 +336,13 @@ def find_min_revision(
316336

317337
# Continue exponential search backwards. Double the distance (in indices)
318338
# from our start point.
319-
#
320-
# Note that this means we search exponentially through the indices in the
321-
# revision list, not through the revisions themselves. If revisions are
322-
# fairly evenly distributed, then this distinction is irrelevant. If however
323-
# there are large irregular gaps in between revisions, this might appear a
324-
# bit strange at a glance. Consider:
325-
#
326-
# Revisions: 1, 2, 3, 4, 5, 50, 51, 127, 128
327-
# Search order: 4 3 2 1
328-
#
329-
# Appears as trying: 127, 51, 5, 1
330-
# Instead of: 127, 126, 124, 120, 112, 96, 64, 1
331-
#
332-
# Both would work, but searching through indices in the revision list is
333-
# both easier to express in code and more efficient since what we care
334-
# about is searching through revisions that we *can* test against, not
335-
# through all revisions in the source code.
336-
#
337-
# The later bisection stage (once we have found a min revision) similarly
338-
# operates on indices and not revisions.
339-
distance = original_max_index - next_index
339+
distance = crash_index - next_index
340340
next_index -= distance
341341

342+
# Assert forward progress.
343+
# Note that `max_index` stores the previous value of `next_index`.
344+
assert distance >= 0, (distance, crash_index, max_index)
345+
342346

343347
def validate_regression_range(
344348
testcase: data_types.Testcase,
@@ -407,45 +411,28 @@ def find_regression_range(
407411
# Pick up where left off in a previous run if necessary.
408412
min_revision = testcase.get_metadata('last_regression_min')
409413
max_revision = testcase.get_metadata('last_regression_max')
410-
next_revision = testcase.get_metadata('last_regression_next')
411414

412415
logs.info('Build set up, starting search for regression range. State: ' +
413416
f'crash_revision = {testcase.crash_revision}, ' +
414417
f'max_revision = {max_revision}, ' +
415-
f'min_revision = {min_revision}, ' +
416-
f'next_revision = {next_revision}. ')
418+
f'min_revision = {min_revision}.')
417419

418420
if max_revision is None:
419421
logs.info('Starting search for min revision from scratch.')
420422

421-
if min_revision is not None or next_revision is not None:
423+
if min_revision is not None:
422424
logs.error('Inconsistent regression state: ' +
423-
'resetting min_revision and next_revision to None.')
425+
'resetting min_revision to None.')
424426
min_revision = None
425-
next_revision = None
426427

427428
elif min_revision is None:
428429
# max_revision is not None.
429430
logs.info('Resuming search for min revision.')
430431

431-
if next_revision is None:
432-
logs.error('Inconsistent regression state: missing next_revision. ' +
433-
'Restarting search from scratch.')
434-
435432
else:
436433
# max_revision and min_revision are not None.
437434
logs.info('Resuming bisection.')
438435

439-
if next_revision is not None:
440-
logs.error('Inconsistent regression state. ' +
441-
'Resetting next_revision to None.')
442-
next_revision = None
443-
444-
# Notice that regardless of whether `max_revision` was None or not, if
445-
# `min_revision` is None then we should search exponentially backwards. Even
446-
# though we are overwriting `max_revision` here, `next_revision` will tell us
447-
# whether we should start from scratch or whether we should pick up where we
448-
# left off.
449436
if not max_revision:
450437
max_revision = testcase.crash_revision
451438

@@ -501,7 +488,7 @@ def find_regression_range(
501488
if not min_index:
502489
min_index, max_index, output = find_min_revision(
503490
testcase, testcase_file_path, job_type, fuzz_target, deadline,
504-
revision_list, max_index, next_revision, regression_task_output)
491+
revision_list, max_index, regression_task_output)
505492
if output:
506493
# Either we encountered an error, or there is no good revision and the
507494
# regression range is `0:revision_list[0]`.

0 commit comments

Comments
 (0)