Skip to content

Commit 854f286

Browse files
Download databundles for non engine jobs (#5085)
This fixes b/446692241 by downloading databundles on blacbox fuzzer runs. It does so without hurting the performance of non-blackbox batch tasks. Since downloading the default databundle takes ~5 minutes. It also optimizes the unzip by about 50% by using the linux unzip binary instead of Python's slow implementation. --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: jonathanmetzman <[email protected]> Co-authored-by: Jonathan Metzman <[email protected]>
1 parent d77bab0 commit 854f286

File tree

5 files changed

+106
-77
lines changed

5 files changed

+106
-77
lines changed

src/clusterfuzz/_internal/bot/tasks/update_task.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
import datetime
1717
import os
1818
import platform
19+
import shutil
20+
import subprocess
1921
import sys
2022
import time
2123

@@ -288,14 +290,15 @@ def update_source_code():
288290
f'(release = {utils.get_clusterfuzz_release()}).')
289291

290292

291-
def update_tests_if_needed():
293+
def update_tests_if_needed(tests_url=None):
292294
"""Updates layout tests every day."""
293295
data_directory = environment.get_value('FUZZ_DATA')
294296
error_occured = False
295297
expected_task_duration = 60 * 60 # 1 hour.
296298
retry_limit = environment.get_value('FAIL_RETRIES')
297299
temp_archive = os.path.join(data_directory, 'temp.zip')
298-
tests_url = environment.get_value('WEB_TESTS_URL')
300+
if not tests_url:
301+
tests_url = environment.get_value('WEB_TESTS_URL')
299302

300303
# Check if we have a valid tests url.
301304
if not tests_url:
@@ -323,9 +326,19 @@ def update_tests_if_needed():
323326
for _ in range(retry_limit):
324327
try:
325328
shell.remove_directory(data_directory, recreate=True)
326-
storage.copy_file_from(tests_url, temp_archive)
327-
with archive.open(temp_archive) as reader:
328-
reader.extract_all(data_directory, trusted=True)
329+
if tests_url.startswith('http'):
330+
storage.download_signed_url_to_file(tests_url, temp_archive)
331+
else:
332+
storage.copy_file_from(tests_url, temp_archive)
333+
334+
if shutil.which('unzip'):
335+
subprocess.run(
336+
['unzip', '-q', '-o', temp_archive, '-d', data_directory],
337+
check=True,
338+
capture_output=True)
339+
else:
340+
with archive.open(temp_archive) as reader:
341+
reader.extract_all(data_directory, trusted=True)
329342
shell.remove_file(temp_archive)
330343
error_occured = False
331344
break

src/clusterfuzz/_internal/bot/tasks/utasks/fuzz_task.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from clusterfuzz._internal.bot.tasks import setup
3939
from clusterfuzz._internal.bot.tasks import task_creation
4040
from clusterfuzz._internal.bot.tasks import trials
41+
from clusterfuzz._internal.bot.tasks import update_task
4142
from clusterfuzz._internal.bot.tasks.utasks import fuzz_task_knobs
4243
from clusterfuzz._internal.bot.tasks.utasks import uworker_handle_errors
4344
from clusterfuzz._internal.bot.tasks.utasks import uworker_io
@@ -2097,6 +2098,10 @@ def _get_fuzz_target(uworker_input):
20972098

20982099
def utask_main(uworker_input):
20992100
"""Runs the given fuzzer for one round."""
2101+
if not engine.get(uworker_input.fuzzer_name):
2102+
update_task.update_tests_if_needed(
2103+
uworker_input.fuzz_task_input.web_tests_url)
2104+
21002105
# Sets fuzzing logs context before running the fuzzer.
21012106
fuzz_target = _get_fuzz_target(uworker_input)
21022107
with logs.fuzzer_log_context(uworker_input.fuzzer_name,
@@ -2191,6 +2196,11 @@ def _utask_preprocess(fuzzer_name, job_type, uworker_env):
21912196
use_backup=True).serialize())
21922197

21932198
fuzz_task_input.trials.extend(trials.preprocess_get_db_trials())
2199+
web_tests_url = environment.get_value('WEB_TESTS_URL')
2200+
if web_tests_url:
2201+
fuzz_task_input.web_tests_url = storage.get_signed_download_url(
2202+
web_tests_url)
2203+
21942204
for _ in range(MAX_CRASHES_UPLOADED):
21952205
url = fuzz_task_input.crash_upload_urls.add()
21962206
url.key = blobs.generate_new_blob_name()

src/clusterfuzz/_internal/protos/uworker_msg.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ message FuzzTaskInput {
5353
repeated string global_blacklisted_functions = 6;
5454
repeated BlobUploadUrl crash_upload_urls = 7;
5555
repeated google.protobuf.Any trials = 8;
56+
optional string web_tests_url = 9;
5657
}
5758

5859
message DataBundleCorpus {

src/clusterfuzz/_internal/protos/uworker_msg_pb2.py

Lines changed: 70 additions & 70 deletions
Large diffs are not rendered by default.

src/clusterfuzz/_internal/protos/uworker_msg_pb2.pyi

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ class FuzzTaskInput(google.protobuf.message.Message):
251251
GLOBAL_BLACKLISTED_FUNCTIONS_FIELD_NUMBER: builtins.int
252252
CRASH_UPLOAD_URLS_FIELD_NUMBER: builtins.int
253253
TRIALS_FIELD_NUMBER: builtins.int
254+
WEB_TESTS_URL_FIELD_NUMBER: builtins.int
254255
sample_testcase_upload_key: builtins.str
255256
sample_testcase_upload_url: builtins.str
256257
script_log_upload_url: builtins.str
@@ -265,6 +266,7 @@ class FuzzTaskInput(google.protobuf.message.Message):
265266
def crash_upload_urls(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___BlobUploadUrl]: ...
266267
@property
267268
def trials(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[google.protobuf.any_pb2.Any]: ...
269+
web_tests_url: builtins.str
268270
def __init__(
269271
self,
270272
*,
@@ -276,9 +278,10 @@ class FuzzTaskInput(google.protobuf.message.Message):
276278
global_blacklisted_functions: collections.abc.Iterable[builtins.str] | None = ...,
277279
crash_upload_urls: collections.abc.Iterable[global___BlobUploadUrl] | None = ...,
278280
trials: collections.abc.Iterable[google.protobuf.any_pb2.Any] | None = ...,
281+
web_tests_url: builtins.str | None = ...,
279282
) -> None: ...
280-
def HasField(self, field_name: typing_extensions.Literal["_corpus", b"_corpus", "_fuzz_target", b"_fuzz_target", "_sample_testcase_upload_key", b"_sample_testcase_upload_key", "_sample_testcase_upload_url", b"_sample_testcase_upload_url", "_script_log_upload_url", b"_script_log_upload_url", "corpus", b"corpus", "fuzz_target", b"fuzz_target", "sample_testcase_upload_key", b"sample_testcase_upload_key", "sample_testcase_upload_url", b"sample_testcase_upload_url", "script_log_upload_url", b"script_log_upload_url"]) -> builtins.bool: ...
281-
def ClearField(self, field_name: typing_extensions.Literal["_corpus", b"_corpus", "_fuzz_target", b"_fuzz_target", "_sample_testcase_upload_key", b"_sample_testcase_upload_key", "_sample_testcase_upload_url", b"_sample_testcase_upload_url", "_script_log_upload_url", b"_script_log_upload_url", "corpus", b"corpus", "crash_upload_urls", b"crash_upload_urls", "fuzz_target", b"fuzz_target", "global_blacklisted_functions", b"global_blacklisted_functions", "sample_testcase_upload_key", b"sample_testcase_upload_key", "sample_testcase_upload_url", b"sample_testcase_upload_url", "script_log_upload_url", b"script_log_upload_url", "trials", b"trials"]) -> None: ...
283+
def HasField(self, field_name: typing_extensions.Literal["_corpus", b"_corpus", "_fuzz_target", b"_fuzz_target", "_sample_testcase_upload_key", b"_sample_testcase_upload_key", "_sample_testcase_upload_url", b"_sample_testcase_upload_url", "_script_log_upload_url", b"_script_log_upload_url", "_web_tests_url", b"_web_tests_url", "corpus", b"corpus", "fuzz_target", b"fuzz_target", "sample_testcase_upload_key", b"sample_testcase_upload_key", "sample_testcase_upload_url", b"sample_testcase_upload_url", "script_log_upload_url", b"script_log_upload_url", "web_tests_url", b"web_tests_url"]) -> builtins.bool: ...
284+
def ClearField(self, field_name: typing_extensions.Literal["_corpus", b"_corpus", "_fuzz_target", b"_fuzz_target", "_sample_testcase_upload_key", b"_sample_testcase_upload_key", "_sample_testcase_upload_url", b"_sample_testcase_upload_url", "_script_log_upload_url", b"_script_log_upload_url", "_web_tests_url", b"_web_tests_url", "corpus", b"corpus", "crash_upload_urls", b"crash_upload_urls", "fuzz_target", b"fuzz_target", "global_blacklisted_functions", b"global_blacklisted_functions", "sample_testcase_upload_key", b"sample_testcase_upload_key", "sample_testcase_upload_url", b"sample_testcase_upload_url", "script_log_upload_url", b"script_log_upload_url", "trials", b"trials", "web_tests_url", b"web_tests_url"]) -> None: ...
282285
@typing.overload
283286
def WhichOneof(self, oneof_group: typing_extensions.Literal["_corpus", b"_corpus"]) -> typing_extensions.Literal["corpus"] | None: ...
284287
@typing.overload
@@ -289,6 +292,8 @@ class FuzzTaskInput(google.protobuf.message.Message):
289292
def WhichOneof(self, oneof_group: typing_extensions.Literal["_sample_testcase_upload_url", b"_sample_testcase_upload_url"]) -> typing_extensions.Literal["sample_testcase_upload_url"] | None: ...
290293
@typing.overload
291294
def WhichOneof(self, oneof_group: typing_extensions.Literal["_script_log_upload_url", b"_script_log_upload_url"]) -> typing_extensions.Literal["script_log_upload_url"] | None: ...
295+
@typing.overload
296+
def WhichOneof(self, oneof_group: typing_extensions.Literal["_web_tests_url", b"_web_tests_url"]) -> typing_extensions.Literal["web_tests_url"] | None: ...
292297

293298
global___FuzzTaskInput = FuzzTaskInput
294299

0 commit comments

Comments
 (0)