Skip to content

Commit e4b0136

Browse files
authored
Allow configuring default corpora bucket location. (#4479)
Allow instances to specify the GCS bucket location for data bundle buckets in `project.yaml` as a new key: `data_bundle_bucket_location`. This will allow creating regional buckets instead of using the default `US` multi-region which results in high data transfer costs in Chrome's instance.
1 parent 65e0e7b commit e4b0136

File tree

6 files changed

+55
-19
lines changed

6 files changed

+55
-19
lines changed

src/clusterfuzz/_internal/datastore/data_handler.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -977,7 +977,8 @@ def add_build_metadata(job_type,
977977
def create_data_bundle_bucket_and_iams(data_bundle_name, emails):
978978
"""Creates a data bundle bucket and adds iams for access."""
979979
bucket_name = get_data_bundle_bucket_name(data_bundle_name)
980-
if not storage.create_bucket_if_needed(bucket_name):
980+
location = local_config.ProjectConfig().get('data_bundle_bucket_location')
981+
if not storage.create_bucket_if_needed(bucket_name, location=location):
981982
return False
982983

983984
client = storage.create_discovery_storage_client()

src/clusterfuzz/_internal/google_cloud_utils/storage.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@
117117
class StorageProvider:
118118
"""Core storage provider interface."""
119119

120-
def create_bucket(self, name, object_lifecycle, cors):
120+
def create_bucket(self, name, object_lifecycle, cors, location):
121121
"""Create a new bucket."""
122122
raise NotImplementedError
123123

@@ -198,7 +198,7 @@ def _chunk_size(self):
198198

199199
return None
200200

201-
def create_bucket(self, name, object_lifecycle, cors):
201+
def create_bucket(self, name, object_lifecycle, cors, location):
202202
"""Create a new bucket."""
203203
project_id = utils.get_application_id()
204204
request_body = {'name': name}
@@ -208,6 +208,9 @@ def create_bucket(self, name, object_lifecycle, cors):
208208
if cors:
209209
request_body['cors'] = cors
210210

211+
if location:
212+
request_body['location'] = location
213+
211214
client = create_discovery_storage_client()
212215
try:
213216
client.buckets().insert(project=project_id, body=request_body).execute()
@@ -543,7 +546,7 @@ def convert_path_for_write(self, remote_path, directory=OBJECTS_DIR):
543546

544547
return fs_path
545548

546-
def create_bucket(self, name, object_lifecycle, cors):
549+
def create_bucket(self, name, object_lifecycle, cors, location):
547550
"""Create a new bucket."""
548551
bucket_path = self._fs_bucket_path(name)
549552
if os.path.exists(bucket_path):
@@ -905,13 +908,16 @@ def set_bucket_iam_policy(client, bucket_name, iam_policy):
905908
return None
906909

907910

908-
def create_bucket_if_needed(bucket_name, object_lifecycle=None, cors=None):
911+
def create_bucket_if_needed(bucket_name,
912+
object_lifecycle=None,
913+
cors=None,
914+
location=None):
909915
"""Creates a GCS bucket."""
910916
provider = _provider()
911917
if provider.get_bucket(bucket_name):
912918
return True
913919

914-
if not provider.create_bucket(bucket_name, object_lifecycle, cors):
920+
if not provider.create_bucket(bucket_name, object_lifecycle, cors, location):
915921
return False
916922

917923
time.sleep(CREATE_BUCKET_DELAY)

src/clusterfuzz/_internal/tests/core/bot/tasks/utasks/corpus_pruning_task_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def setUp(self):
5757
self.local_gcs_buckets_path = tempfile.mkdtemp()
5858
os.environ['LOCAL_GCS_BUCKETS_PATH'] = self.local_gcs_buckets_path
5959
os.environ['TEST_BLOBS_BUCKET'] = 'blobs-bucket'
60-
storage._provider().create_bucket('blobs-bucket', None, None)
60+
storage._provider().create_bucket('blobs-bucket', None, None, None)
6161
helpers.patch(self, [
6262
'clusterfuzz._internal.bot.fuzzers.engine_common.unpack_seed_corpus_if_needed',
6363
'clusterfuzz._internal.bot.tasks.task_creation.create_tasks',

src/clusterfuzz/_internal/tests/core/datastore/data_handler_test.py

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import parameterized
2424
from pyfakefs import fake_filesystem_unittest
2525

26-
from clusterfuzz._internal.config import local_config
2726
from clusterfuzz._internal.datastore import data_handler
2827
from clusterfuzz._internal.datastore import data_types
2928
from clusterfuzz._internal.google_cloud_utils import blobs
@@ -73,14 +72,27 @@ class DataHandlerTest(unittest.TestCase):
7372

7473
def setUp(self):
7574
helpers.patch_environ(self)
76-
project_config_get = local_config.ProjectConfig.get
7775
helpers.patch(self, [
7876
'clusterfuzz._internal.base.utils.default_project_name',
7977
'clusterfuzz._internal.config.db_config.get',
80-
('project_config_get',
81-
'clusterfuzz._internal.config.local_config.ProjectConfig.get'),
78+
'clusterfuzz._internal.config.local_config.ProjectConfig',
79+
('get_storage_provider',
80+
'clusterfuzz._internal.google_cloud_utils.storage._provider'),
81+
'clusterfuzz._internal.google_cloud_utils.storage.create_discovery_storage_client',
82+
'clusterfuzz._internal.google_cloud_utils.storage.get_bucket_iam_policy',
8283
])
8384

85+
self.mock.default_project_name.return_value = 'project'
86+
87+
self.storage_provider = mock.Mock()
88+
self.mock.get_storage_provider.return_value = self.storage_provider
89+
90+
self.project_config = {}
91+
self.mock.ProjectConfig.return_value = self.project_config
92+
93+
# Disable artificial delay when creating buckets.
94+
storage.CREATE_BUCKET_DELAY = 0
95+
8496
self.job = data_types.Job(
8597
name='linux_asan_chrome',
8698
environment_string=('SUMMARY_PREFIX = project\n'
@@ -175,8 +187,6 @@ def setUp(self):
175187

176188
environment.set_value('FUZZ_DATA', '/tmp/inputs/fuzzer-common-data-bundles')
177189
environment.set_value('FUZZERS_DIR', '/tmp/inputs/fuzzers')
178-
self.mock.default_project_name.return_value = 'project'
179-
self.mock.project_config_get.side_effect = project_config_get
180190

181191
def test_find_testcase(self):
182192
"""Ensure that find_testcase behaves as expected."""
@@ -449,15 +459,34 @@ def test_get_issue_summary_bad_cast_without_crash_function(self):
449459
summary, 'project: Bad-cast to blink::LayoutBlock from '
450460
'blink::LayoutTableSection')
451461

462+
def test_create_data_bundle_bucket_and_iams(self):
463+
self.storage_provider.get_bucket.return_value = None
464+
self.storage_provider.create_bucket.return_value = True
465+
466+
self.assertTrue(data_handler.create_data_bundle_bucket_and_iams('test', []))
467+
468+
self.storage_provider.create_bucket.assert_called_with(
469+
'test-corpus.test-clusterfuzz.appspot.com', None, None, None)
470+
471+
def test_create_data_bundle_bucket_and_iams_with_location(self):
472+
self.storage_provider.get_bucket.return_value = None
473+
self.storage_provider.create_bucket.return_value = True
474+
475+
self.project_config['data_bundle_bucket_location'] = 'NORTH-POLE'
476+
477+
self.assertTrue(data_handler.create_data_bundle_bucket_and_iams('test', []))
478+
479+
self.storage_provider.create_bucket.assert_called_with(
480+
'test-corpus.test-clusterfuzz.appspot.com', None, None, 'NORTH-POLE')
481+
452482
def test_get_data_bundle_name_default(self):
453483
"""Test getting the default data bundle bucket name."""
454484
self.assertEqual('test-corpus.test-clusterfuzz.appspot.com',
455485
data_handler.get_data_bundle_bucket_name('test'))
456486

457487
def test_get_data_bundle_name_custom_suffix(self):
458488
"""Test getting the data bundle bucket name with custom suffix."""
459-
self.mock.project_config_get.side_effect = None
460-
self.mock.project_config_get.return_value = 'custom.suffix.com'
489+
self.project_config['bucket_domain_suffix'] = 'custom.suffix.com'
461490
self.assertEqual('test-corpus.custom.suffix.com',
462491
data_handler.get_data_bundle_bucket_name('test'))
463492

@@ -485,7 +514,7 @@ def test_filter_stack_trace_upload(self):
485514
exceeds limit and an upload_url is provided."""
486515
blob_name = blobs.generate_new_blob_name()
487516
blobs_bucket = 'blobs_bucket'
488-
storage._provider().create_bucket(blobs_bucket, None, None) # pylint: disable=protected-access
517+
storage._provider().create_bucket(blobs_bucket, None, None, None) # pylint: disable=protected-access
489518

490519
gcs_path = storage.get_cloud_storage_file_path(blobs_bucket, blob_name)
491520
signed_upload_url = storage.get_signed_upload_url(gcs_path)

src/clusterfuzz/_internal/tests/core/google_cloud_utils/blobs_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def setUp(self):
182182
test_utils.set_up_pyfakefs(self)
183183
os.environ['LOCAL_GCS_BUCKETS_PATH'] = '/local'
184184
os.environ['TEST_BLOBS_BUCKET'] = 'blobs-bucket'
185-
self.provider.create_bucket('blobs-bucket', None, None)
185+
self.provider.create_bucket('blobs-bucket', None, None, None)
186186

187187
def test_get_blob_signed_upload_url_then_delete_blob(self):
188188
"""Tests get_blob_signed_upload_url."""

src/clusterfuzz/_internal/tests/core/google_cloud_utils/storage_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def setUp(self):
8282

8383
def test_create_bucket(self):
8484
"""Test create_bucket."""
85-
self.provider.create_bucket('test-bucket', None, None)
85+
self.provider.create_bucket('test-bucket', None, None, None)
8686
self.assertTrue(os.path.isdir('/local/test-bucket'))
8787

8888
def test_get_bucket(self):
@@ -281,7 +281,7 @@ def test_download_signed_url(self):
281281
def test_upload_signed_url(self):
282282
"""Tests upload_signed_url."""
283283
contents = b'aa'
284-
self.provider.create_bucket('test-bucket', None, None)
284+
self.provider.create_bucket('test-bucket', None, None, None)
285285
self.provider.upload_signed_url(contents, 'gs://test-bucket/a')
286286
with open('/local/test-bucket/objects/a', 'rb') as fp:
287287
return self.assertEqual(fp.read(), contents)

0 commit comments

Comments
 (0)