Skip to content

Commit 17a1bbc

Browse files
Fix deadline in manage_vms (#4169)
Fix problem in oss-fuzz where manage_vms doesn't complete, causing old VMs/containers to hang around. Do this by increasing the deadline to 24 hours for manage_vms to compelte. Also, temporarily change the order of manage_vms so that the previously unresized groups will be resized first. This should hopefully get rid of weird errors we are seeing.
1 parent 96e4a48 commit 17a1bbc

File tree

4 files changed

+35
-4
lines changed

4 files changed

+35
-4
lines changed

infra/k8s/manage-vms.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ spec:
2121
concurrencyPolicy: Forbid
2222
jobTemplate:
2323
spec:
24-
activeDeadlineSeconds: 43200 # 12 hours.
24+
activeDeadlineSeconds: 86400 # 24 hours.
2525
template:
2626
spec:
2727
containers:

src/clusterfuzz/_internal/bot/tasks/utasks/fuzz_task.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,9 +1193,13 @@ def run_engine_fuzzer(engine_impl, target_name, sync_corpus_directory,
11931193
"""Run engine for fuzzing."""
11941194
if environment.is_trusted_host():
11951195
from clusterfuzz._internal.bot.untrusted_runner import tasks_host
1196-
return tasks_host.engine_fuzz(engine_impl, target_name,
1197-
sync_corpus_directory, testcase_directory)
1196+
logs.info('Running remote engine fuzz.')
1197+
result = tasks_host.engine_fuzz(engine_impl, target_name,
1198+
sync_corpus_directory, testcase_directory)
1199+
logs.info('Done remote engine fuzz.')
1200+
return result
11981201

1202+
logs.info('Worker engine fuzz.')
11991203
build_dir = environment.get_value('BUILD_DIR')
12001204
target_path = engine_common.find_fuzzer_path(build_dir, target_name)
12011205
if target_path is None:

src/clusterfuzz/_internal/cron/manage_vms.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import itertools
2020
import json
2121
import logging
22+
from typing import List
2223

2324
from google.cloud import ndb
2425

@@ -156,6 +157,21 @@ def _template_needs_update(current_template, new_template, resource_name):
156157
return False
157158

158159

160+
def reverse_pairs(l: List) -> List:
161+
"""Temporary function to speedup resizing of group 8 which hasn't been
162+
updating due to timeouts."""
163+
l = list(reversed(l))
164+
length = len(l)
165+
l_output = []
166+
for idx in range(0, length, 2):
167+
if idx + 1 >= length:
168+
l_output.append(l[idx])
169+
continue
170+
l_output.append(l[idx + 1])
171+
l_output.append(l[idx])
172+
return l_output
173+
174+
159175
class ClustersManager:
160176
"""Manager for clusters in a project."""
161177

@@ -186,7 +202,10 @@ def update_clusters(self):
186202
"""Update all clusters in a project."""
187203
self.start_thread_pool()
188204

189-
for cluster in self.gce_project.clusters:
205+
# TODO(metzman): Remove reverse_pairs.
206+
# Use this function so we don't introduce weird bugs by changing the order
207+
# in which hosts and workers are reassigned.
208+
for cluster in reverse_pairs(self.gce_project.clusters):
190209
self.pending_updates.append(
191210
self.thread_pool.submit(self.update_cluster, cluster, cluster.name,
192211
cluster.instance_count))

src/clusterfuzz/_internal/tests/appengine/handlers/cron/manage_vms_test.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,6 +1084,14 @@ def test_primes(self):
10841084
self.assertEqual(3571, sum(result))
10851085

10861086

1087+
class ReversePairsTest(unittest.TestCase):
1088+
"""Tests reverse_pairs."""
1089+
1090+
def test_reverse_pairs(self):
1091+
l = list(range(6))
1092+
self.assertEqual(manage_vms.reverse_pairs(l), [4, 5, 2, 3, 0, 1])
1093+
1094+
10871095
@test_utils.with_cloud_emulators('datastore')
10881096
class AssignHostWorkerTest(unittest.TestCase):
10891097
"""Tests host -> worker assignment."""

0 commit comments

Comments
 (0)