Skip to content

Commit 0ef1017

Browse files
committed
Auto-adjust timeout based on actual durations
Automatically reduce the "--timeout" value when actual jobs are executed quicker than its value. Because timings vary hugely in practice, we multiply measured durations by 10x. The auto-adjustment can save a lot of time during late stages of the reduction, when the interestingness test finishes quickly normally, but some unsuccessful reductions may hang (something we observe happening a lot in practice with C/C++ compilers).
1 parent b9613c8 commit 0ef1017

File tree

5 files changed

+88
-17
lines changed

5 files changed

+88
-17
lines changed

cvise-cli.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,11 @@ def main():
237237
default=300,
238238
help='Interestingness test timeout in seconds',
239239
)
240+
parser.add_argument(
241+
'--no-auto-adjust-timeout',
242+
type=bool,
243+
help='Disable automatic timeout calculation based on actual execution durations.',
244+
)
240245
parser.add_argument('--no-cache', action='store_true', help="Don't cache behavior of passes")
241246
parser.add_argument(
242247
'--skip-key-off',
@@ -471,6 +476,7 @@ def do_reduce(args):
471476
args.start_with_pass,
472477
args.skip_after_n_transforms,
473478
args.stopping_threshold,
479+
args.no_auto_adjust_timeout,
474480
) as test_manager:
475481
reducer = CVise(test_manager, args.skip_interestingness_test_check)
476482

cvise/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ set(SOURCE_FILES
141141
"utils/readkey_posix.py"
142142
"utils/readkey_windows.py"
143143
"utils/readkey.py"
144+
"utils/resource.py"
144145
"utils/sigmonitor.py"
145146
"utils/statistics.py"
146147
"utils/testing.py"

cvise/tests/test_test_manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ def manager(tmp_path: Path, input_path: Path, interestingness_script: str, job_t
304304
START_WITH_PASS = None
305305
SKIP_AFTER_N_TRANSFORMS = None
306306
STOPPING_THRESHOLD = 1.0
307+
NO_AUTO_ADJUST_TIMEOUT = False
307308
pass_statistic = statistics.PassStatistic()
308309

309310
script_path = tmp_path / 'check.sh'
@@ -328,6 +329,7 @@ def manager(tmp_path: Path, input_path: Path, interestingness_script: str, job_t
328329
START_WITH_PASS,
329330
SKIP_AFTER_N_TRANSFORMS,
330331
STOPPING_THRESHOLD,
332+
NO_AUTO_ADJUST_TIMEOUT,
331333
)
332334
test_manager.__enter__()
333335
try:

cvise/utils/resource.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import time
2+
from collections import deque
3+
4+
5+
class TimeoutEstimator:
6+
"""Estimates how long a job's timeout should be set, given the recent durations."""
7+
8+
_HISTORY_LEN = 30
9+
_MEASUREMENT_LOWER_BOUND = 1 # seconds
10+
_MEASUREMENTS_MULTIPLIER = 10
11+
12+
def __init__(self, initial_timeout: float):
13+
self._initial_timeout = initial_timeout
14+
self._recent_durations: deque[float] = deque(maxlen=self._HISTORY_LEN)
15+
16+
def update(self, start_time: float) -> None:
17+
duration = time.monotonic() - start_time
18+
self._recent_durations.append(duration)
19+
20+
def estimate(self) -> float:
21+
if not self._recent_durations:
22+
return self._initial_timeout # not enough stats
23+
worst = max(self._recent_durations)
24+
estimation = self._MEASUREMENTS_MULTIPLIER * max(worst, self._MEASUREMENT_LOWER_BOUND)
25+
return min(estimation, self._initial_timeout)

cvise/utils/testing.py

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from cvise.utils.hint import is_special_hint_type, load_hints
4141
from cvise.utils.process import MPContextHook, MPTaskLossWorkaround, ProcessEventNotifier, ProcessMonitor
4242
from cvise.utils.readkey import KeyLogger
43+
from cvise.utils.resource import TimeoutEstimator
4344

4445
MAX_PASS_INCREASEMENT_THRESHOLD = 3
4546

@@ -338,6 +339,15 @@ def should_reinit_after_test_case_update(self, other_contexts: Sequence[PassCont
338339
return False
339340

340341

342+
@dataclass
343+
class PassGlobalInfo:
344+
init_job_timeout_estimator: TimeoutEstimator
345+
346+
@staticmethod
347+
def create(starting_init_job_timeout: float) -> PassGlobalInfo:
348+
return PassGlobalInfo(init_job_timeout_estimator=TimeoutEstimator(starting_init_job_timeout))
349+
350+
341351
@unique
342352
class JobType(Enum):
343353
INIT = auto()
@@ -425,7 +435,7 @@ def __init__(
425435
self,
426436
pass_statistic,
427437
test_script: Path,
428-
timeout,
438+
user_specified_timeout,
429439
save_temps,
430440
test_cases: list[Path],
431441
parallel_tests,
@@ -440,9 +450,10 @@ def __init__(
440450
start_with_pass,
441451
skip_after_n_transforms,
442452
stopping_threshold,
453+
no_auto_adjust_timeout: bool,
443454
):
444455
self.test_script: Path = test_script.absolute()
445-
self.timeout = timeout
456+
self.user_specified_timeout = user_specified_timeout
446457
self.save_temps = save_temps
447458
self.pass_statistic = pass_statistic
448459
self.test_cases: set[Path] = set()
@@ -459,6 +470,7 @@ def __init__(
459470
self.start_with_pass = start_with_pass
460471
self.skip_after_n_transforms = skip_after_n_transforms
461472
self.stopping_threshold = stopping_threshold
473+
self.no_auto_adjust_timeout = no_auto_adjust_timeout
462474
self.exit_stack = contextlib.ExitStack()
463475

464476
for test_case in test_cases:
@@ -471,6 +483,8 @@ def __init__(
471483
raise ScriptInsideTestCaseError(test_case, self.test_script)
472484
self.test_cases.add(test_case)
473485

486+
self.transform_job_timeout_estimator = TimeoutEstimator(user_specified_timeout)
487+
self.pass_global_infos: dict[str, PassGlobalInfo] = {}
474488
self.orig_total_file_size = self.total_file_size
475489
self.cache = None if self.no_cache else cache.Cache(f'{self.TEMP_PREFIX}cache-')
476490
self.pass_contexts: list[PassContext] = []
@@ -793,6 +807,10 @@ def handle_finished_init_job(self, job: Job) -> None:
793807
job.temporary_folder = None
794808

795809
self.pass_statistic.add_initialized(job.pass_, job.start_time)
810+
811+
pass_name = repr(job.pass_)
812+
self.pass_global_infos[pass_name].init_job_timeout_estimator.update(job.start_time)
813+
796814
if isinstance(ctx.pass_, HintBasedPass):
797815
ctx.hint_bundle_paths = {} if ctx.state is None else ctx.state.hint_bundle_paths()
798816

@@ -816,6 +834,7 @@ def handle_finished_transform_job(self, job: Job) -> None:
816834
return
817835
assert outcome == PassCheckingOutcome.ACCEPT
818836
self.pass_statistic.add_success(job.pass_)
837+
self.transform_job_timeout_estimator.update(job.start_time)
819838
self.maybe_update_success_candidate(job.order, job.pass_, job.pass_id, env)
820839
if self.interleaving:
821840
self.folding_manager.on_transform_job_success(env.state)
@@ -953,6 +972,11 @@ def run_passes(self, passes: Sequence[AbstractPass], interleaving: bool):
953972
else:
954973
return
955974

975+
for pass_ in passes:
976+
pass_name = repr(pass_)
977+
if pass_name not in self.pass_global_infos:
978+
self.pass_global_infos[pass_name] = PassGlobalInfo.create(self.get_starting_init_job_timeout())
979+
956980
self.order = 1
957981
self.last_restart_job_order = None
958982
self.pass_restart_queue = []
@@ -1049,6 +1073,9 @@ def run_passes(self, passes: Sequence[AbstractPass], interleaving: bool):
10491073
self.remove_roots()
10501074
sys.exit(1)
10511075

1076+
def get_starting_init_job_timeout(self) -> float:
1077+
return self.INIT_TIMEOUT_FACTOR * self.transform_job_timeout_estimator.estimate()
1078+
10521079
def process_result(self) -> None:
10531080
assert self.success_candidate
10541081
new_test_case = self.success_candidate.test_case_path
@@ -1205,6 +1232,13 @@ def schedule_init(self, pass_id: int, ready_hint_types: set[bytes]) -> None:
12051232
ctx = self.pass_contexts[pass_id]
12061233
assert ctx.can_init_now(ready_hint_types)
12071234

1235+
pass_name = repr(ctx.pass_)
1236+
timeout = (
1237+
self.INIT_TIMEOUT_FACTOR * self.user_specified_timeout
1238+
if self.no_auto_adjust_timeout
1239+
else self.pass_global_infos[pass_name].init_job_timeout_estimator.estimate()
1240+
)
1241+
12081242
dependee_types = set(ctx.pass_.input_hint_types()) if isinstance(ctx.pass_, HintBasedPass) else set()
12091243
dependee_bundle_paths = []
12101244
for other in self.pass_contexts:
@@ -1223,7 +1257,7 @@ def schedule_init(self, pass_id: int, ready_hint_types: set[bytes]) -> None:
12231257
pass_new=ctx.pass_.new,
12241258
test_case=self.current_test_case,
12251259
tmp_dir=tmp_dir,
1226-
job_timeout=self.timeout,
1260+
job_timeout=timeout,
12271261
pid_queue=self.process_monitor.pid_queue,
12281262
dependee_bundle_paths=dependee_bundle_paths,
12291263
)
@@ -1234,14 +1268,11 @@ def schedule_init(self, pass_id: int, ready_hint_types: set[bytes]) -> None:
12341268
pass_previous_state=ctx.state,
12351269
new_tmp_dir=tmp_dir,
12361270
pass_succeeded_state=ctx.taken_succeeded_state,
1237-
job_timeout=self.timeout,
1271+
job_timeout=timeout,
12381272
pid_queue=self.process_monitor.pid_queue,
12391273
dependee_bundle_paths=dependee_bundle_paths,
12401274
)
1241-
init_timeout = self.INIT_TIMEOUT_FACTOR * self.timeout
1242-
future = self.worker_pool.schedule(
1243-
_worker_process_job_wrapper, args=[self.order, env.run], timeout=init_timeout
1244-
)
1275+
future = self.worker_pool.schedule(_worker_process_job_wrapper, args=[self.order, env.run], timeout=timeout)
12451276
self.jobs.append(
12461277
Job(
12471278
type=JobType.INIT,
@@ -1252,7 +1283,7 @@ def schedule_init(self, pass_id: int, ready_hint_types: set[bytes]) -> None:
12521283
pass_user_visible_name=ctx.pass_.user_visible_name(),
12531284
pass_job_counter=ctx.pass_job_counter,
12541285
start_time=time.monotonic(),
1255-
timeout=init_timeout,
1286+
timeout=timeout,
12561287
temporary_folder=tmp_dir,
12571288
)
12581289
)
@@ -1271,6 +1302,11 @@ def schedule_transform(self, pass_id: int) -> None:
12711302
# simply hardcode that hint-based passes are capable of this (and they actually need the original files anyway).
12721303
should_copy_test_cases = not isinstance(ctx.pass_, HintBasedPass)
12731304

1305+
timeout = (
1306+
self.user_specified_timeout
1307+
if self.no_auto_adjust_timeout
1308+
else self.transform_job_timeout_estimator.estimate()
1309+
)
12741310
folder = Path(tempfile.mkdtemp(prefix=self.TEMP_PREFIX, dir=ctx.temporary_root))
12751311
env = TestEnvironment(
12761312
ctx.state,
@@ -1283,9 +1319,7 @@ def schedule_transform(self, pass_id: int) -> None:
12831319
ctx.pass_.transform,
12841320
self.process_monitor.pid_queue,
12851321
)
1286-
future = self.worker_pool.schedule(
1287-
_worker_process_job_wrapper, args=[self.order, env.run], timeout=self.timeout
1288-
)
1322+
future = self.worker_pool.schedule(_worker_process_job_wrapper, args=[self.order, env.run], timeout=timeout)
12891323
self.jobs.append(
12901324
Job(
12911325
type=JobType.TRANSFORM,
@@ -1296,7 +1330,7 @@ def schedule_transform(self, pass_id: int) -> None:
12961330
pass_user_visible_name=ctx.pass_.user_visible_name(),
12971331
pass_job_counter=ctx.pass_job_counter,
12981332
start_time=time.monotonic(),
1299-
timeout=self.timeout,
1333+
timeout=timeout,
13001334
temporary_folder=folder,
13011335
)
13021336
)
@@ -1311,6 +1345,11 @@ def schedule_fold(self, folding_state: FoldingStateIn) -> None:
13111345
assert self.interleaving
13121346

13131347
should_copy_test_cases = False # the fold transform creates the files itself
1348+
timeout = (
1349+
self.user_specified_timeout
1350+
if self.no_auto_adjust_timeout
1351+
else self.transform_job_timeout_estimator.estimate()
1352+
)
13141353
folder = Path(tempfile.mkdtemp(prefix=self.TEMP_PREFIX + 'folding-'))
13151354
env = TestEnvironment(
13161355
folding_state,
@@ -1323,9 +1362,7 @@ def schedule_fold(self, folding_state: FoldingStateIn) -> None:
13231362
FoldingManager.transform,
13241363
self.process_monitor.pid_queue,
13251364
)
1326-
future = self.worker_pool.schedule(
1327-
_worker_process_job_wrapper, args=[self.order, env.run], timeout=self.timeout
1328-
)
1365+
future = self.worker_pool.schedule(_worker_process_job_wrapper, args=[self.order, env.run], timeout=timeout)
13291366
self.jobs.append(
13301367
Job(
13311368
type=JobType.TRANSFORM,
@@ -1336,7 +1373,7 @@ def schedule_fold(self, folding_state: FoldingStateIn) -> None:
13361373
pass_user_visible_name='Folding',
13371374
pass_job_counter=None,
13381375
start_time=time.monotonic(),
1339-
timeout=self.timeout,
1376+
timeout=timeout,
13401377
temporary_folder=folder,
13411378
)
13421379
)

0 commit comments

Comments
 (0)