Skip to content

Commit 20997be

Browse files
Give corpus pruning enough time to complete (#4396)
Corpus pruning can take up to 24 hours to complete, this is more than the default of 6 hours.
1 parent 597d1e1 commit 20997be

File tree

3 files changed

+52
-19
lines changed

3 files changed

+52
-19
lines changed

src/clusterfuzz/_internal/base/tasks/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def is_done_collecting_messages():
281281
def get_postprocess_task():
282282
"""Gets a postprocess task if one exists."""
283283
# This should only be run on non-preemptible bots.
284-
if not (task_utils.is_remotely_executing_utasks() or
284+
if not (task_utils.is_remotely_executing_utasks() and
285285
task_utils.get_opted_in_tasks()):
286286
return None
287287
# Postprocess is platform-agnostic, so we run all such tasks on our

src/clusterfuzz/_internal/google_cloud_utils/batch.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from google.cloud import batch_v1 as batch
2222

2323
from clusterfuzz._internal.base import retry
24+
from clusterfuzz._internal.base import tasks
2425
from clusterfuzz._internal.base import utils
2526
from clusterfuzz._internal.base.tasks import task_utils
2627
from clusterfuzz._internal.config import local_config
@@ -33,7 +34,6 @@
3334

3435
_local = threading.local()
3536

36-
MAX_DURATION = f'{60 * 60 * 6}s'
3737
RETRY_COUNT = 0
3838

3939
TASK_BUNCH_SIZE = 20
@@ -46,9 +46,20 @@
4646
MAX_CONCURRENT_VMS_PER_JOB = 1000
4747

4848
BatchWorkloadSpec = collections.namedtuple('BatchWorkloadSpec', [
49-
'clusterfuzz_release', 'disk_size_gb', 'disk_type', 'docker_image',
50-
'user_data', 'service_account_email', 'subnetwork', 'preemptible',
51-
'project', 'gce_zone', 'machine_type', 'network', 'gce_region'
49+
'clusterfuzz_release',
50+
'disk_size_gb',
51+
'disk_type',
52+
'docker_image',
53+
'user_data',
54+
'service_account_email',
55+
'subnetwork',
56+
'preemptible',
57+
'project',
58+
'gce_zone',
59+
'machine_type',
60+
'network',
61+
'gce_region',
62+
'max_run_duration',
5263
])
5364

5465

@@ -158,7 +169,7 @@ def _get_task_spec(batch_workload_spec):
158169
task_spec = batch.TaskSpec()
159170
task_spec.runnables = [runnable]
160171
task_spec.max_retry_count = RETRY_COUNT
161-
task_spec.max_run_duration = MAX_DURATION
172+
task_spec.max_run_duration = batch_workload_spec.max_duration
162173
return task_spec
163174

164175

@@ -219,8 +230,7 @@ def _create_job(spec, input_urls):
219230
create_request.job_id = job_name
220231
# The job's parent is the region in which the job will run
221232
project_id = spec.project
222-
create_request.parent = (
223-
f'projects/{project_id}/locations/{spec.gce_region}')
233+
create_request.parent = f'projects/{project_id}/locations/{spec.gce_region}'
224234
job_result = _send_create_job_request(create_request)
225235
logs.info(f'Created batch job id={job_name}.', spec=spec)
226236
return job_result
@@ -274,6 +284,11 @@ def _get_config_name(command, job_name):
274284
return config_name
275285

276286

287+
def _get_task_duration(command):
288+
return tasks.TASK_LEASE_SECONDS_BY_COMMAND.get(command,
289+
tasks.TASK_LEASE_SECONDS)
290+
291+
277292
def _get_spec_from_config(command, job_name):
278293
"""Gets the configured specifications for a batch workload."""
279294
config_name = _get_config_name(command, job_name)
@@ -285,6 +300,7 @@ def _get_spec_from_config(command, job_name):
285300
docker_image = instance_spec['docker_image']
286301
user_data = instance_spec['user_data']
287302
clusterfuzz_release = instance_spec.get('clusterfuzz_release', 'prod')
303+
max_run_duration = f'{_get_task_duration(command)}s'
288304
spec = BatchWorkloadSpec(
289305
clusterfuzz_release=clusterfuzz_release,
290306
docker_image=docker_image,
@@ -298,5 +314,6 @@ def _get_spec_from_config(command, job_name):
298314
network=instance_spec['network'],
299315
subnetwork=instance_spec['subnetwork'],
300316
preemptible=instance_spec['preemptible'],
301-
machine_type=instance_spec['machine_type'])
317+
machine_type=instance_spec['machine_type'],
318+
max_run_duration=max_run_duration)
302319
return spec

src/clusterfuzz/_internal/tests/core/google_cloud_utils/batch_test.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,22 @@
1919
from clusterfuzz._internal.google_cloud_utils import batch
2020
from clusterfuzz._internal.tests.test_libs import test_utils
2121

22+
# pylint: disable=protected-access
23+
2224

2325
@test_utils.with_cloud_emulators('datastore')
2426
class GetSpecFromConfigTest(unittest.TestCase):
2527
"""Tests for get_spec_from_config."""
2628

2729
def setUp(self):
2830
self.maxDiff = None
31+
self.job = data_types.Job(name='libfuzzer_chrome_asan', platform='LINUX')
32+
self.job.put()
2933

30-
def test_nonpreemptible_get_spec_from_config(self):
34+
def test_nonpreemptible(self):
3135
"""Tests that get_spec_from_config works for non-preemptibles as
3236
expected."""
33-
job = data_types.Job(name='libfuzzer_chrome_asan', platform='LINUX')
34-
job.put()
35-
spec = batch._get_spec_from_config('corpus_pruning', job.name) # pylint: disable=protected-access
37+
spec = batch._get_spec_from_config('analyze', self.job.name)
3638
expected_spec = batch.BatchWorkloadSpec(
3739
clusterfuzz_release='prod',
3840
docker_image='gcr.io/clusterfuzz-images/base:a2f4dd6-202202070654',
@@ -47,15 +49,14 @@ def test_nonpreemptible_get_spec_from_config(self):
4749
gce_zone='gce-zone',
4850
project='test-clusterfuzz',
4951
preemptible=False,
50-
machine_type='n1-standard-1')
52+
machine_type='n1-standard-1',
53+
max_run_duration='21600s')
5154

5255
self.assertCountEqual(spec, expected_spec)
5356

54-
def test_preemptible_get_spec_from_config(self):
57+
def test_preemptible(self):
5558
"""Tests that get_spec_from_config works for preemptibles as expected."""
56-
job = data_types.Job(name='libfuzzer_chrome_asan', platform='LINUX')
57-
job.put()
58-
spec = batch._get_spec_from_config('fuzz', job.name) # pylint: disable=protected-access
59+
spec = batch._get_spec_from_config('fuzz', self.job.name)
5960
expected_spec = batch.BatchWorkloadSpec(
6061
clusterfuzz_release='prod',
6162
docker_image='gcr.io/clusterfuzz-images/base:a2f4dd6-202202070654',
@@ -70,6 +71,21 @@ def test_preemptible_get_spec_from_config(self):
7071
gce_region='gce-region',
7172
project='test-clusterfuzz',
7273
preemptible=True,
73-
machine_type='n1-standard-1')
74+
machine_type='n1-standard-1',
75+
max_run_duration='21600s')
7476

7577
self.assertCountEqual(spec, expected_spec)
78+
79+
def test_corpus_pruning(self):
80+
"""Tests that corpus pruning uses a spec of 24 hours and a different one
81+
than normal."""
82+
pruning_spec = batch._get_spec_from_config('corpus_pruning', self.job.name)
83+
self.assertEqual(pruning_spec.max_run_duration, f'{24 * 60 * 60}s')
84+
normal_spec = batch._get_spec_from_config('analyze', self.job.name)
85+
self.assertNotEqual(pruning_spec, normal_spec)
86+
job = data_types.Job(name='libfuzzer_chrome_msan', platform='LINUX')
87+
job.put()
88+
# This behavior is important for grouping batch alike tasks into a single
89+
# batch job.
90+
pruning_spec2 = batch._get_spec_from_config('corpus_pruning', job.name)
91+
self.assertEqual(pruning_spec, pruning_spec2)

0 commit comments

Comments
 (0)