Skip to content

Commit 426e836

Browse files
berkeleysquareJohn Kranz
andauthored
Support prefix in helpers.get_all_files_in_bucket (#58)
* ?Support refix in helpers.get_all_files_in_bucket * Fix param description --------- Co-authored-by: John Kranz <[email protected]>
1 parent 623f299 commit 426e836

File tree

2 files changed

+26
-8
lines changed

2 files changed

+26
-8
lines changed

ds3/ds3Helpers.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,12 @@ def get_blob(self, bucket: str, get_object: HelperGetObject, offset: int, job_id
420420
stream.close()
421421
return get_object.object_name, offset
422422

423-
def get_all_files_in_bucket(self, destination_dir: str, bucket: str, objects_per_bp_job: int = 1000,
424-
max_threads: int = 5, job_name: str = None) -> List[str]:
423+
def get_all_files_in_bucket(self, destination_dir: str,
424+
bucket: str,
425+
prefix: str = None,
426+
objects_per_bp_job: int = 1000,
427+
max_threads: int = 5,
428+
job_name: str = None) -> List[str]:
425429
"""
426430
Retrieves all objects from a Black Pearl bucket.
427431
@@ -439,6 +443,8 @@ def get_all_files_in_bucket(self, destination_dir: str, bucket: str, objects_per
439443
The number of concurrent objects being transferred at once (default 5).
440444
job_name : str
441445
The name to give the BP get jobs. All BP jobs that are created will have the same name.
446+
prefix : str
447+
Limits the response to objects that begin with the specified prefix.
442448
"""
443449
truncated: str = 'true'
444450
marker = ""
@@ -447,6 +453,7 @@ def get_all_files_in_bucket(self, destination_dir: str, bucket: str, objects_per
447453
list_bucket = self.client.get_bucket(GetBucketRequest(bucket_name=bucket,
448454
max_keys=objects_per_bp_job,
449455
versions=False,
456+
prefix=prefix,
450457
marker=marker))
451458

452459
get_objects: List[HelperGetObject] = []

tests/helpersTests.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def test_put_and_get_objects(self):
163163
destination.cleanup()
164164
client.delete_bucket_spectra_s3(ds3.DeleteBucketSpectraS3Request(bucket_name=bucket, force=True))
165165

166-
def test_put_and_get_all_objects_in_directory(self):
166+
def test_put_and_get_all_objects_in_directory(self, prefix_dir: str = ""):
167167
bucket = f'ds3-python3-sdk-test-{uuid.uuid1()}'
168168
job_name = "python test job"
169169

@@ -201,15 +201,20 @@ def test_put_and_get_all_objects_in_directory(self):
201201

202202
# retrieve the objects from the BP
203203
destination = tempfile.TemporaryDirectory(prefix="ds3-python3-sdk-dst-")
204+
205+
# retrieve the objects in prefix directory
206+
prefixed_objects = [p for p in put_objects if p.object_name.startswith(prefix_dir)]
207+
204208
job_ids = helpers.get_all_files_in_bucket(destination_dir=destination.name,
205209
bucket=bucket,
206-
objects_per_bp_job=10,
207-
job_name=job_name)
210+
objects_per_bp_job=10, job_name=job_name,
211+
prefix=prefix_dir)
208212

209-
self.assertGreaterEqual(len(job_ids), 2, "multiple job ids returned")
213+
if len(prefix_dir) == 0:
214+
self.assertGreaterEqual(len(job_ids), 2, "multiple job ids returned")
210215

211-
# verify all the files and directories were retrieved
212-
for put_object in put_objects:
216+
# verify all the files in prefix were returned
217+
for put_object in prefixed_objects:
213218
obj_destination = os.path.join(destination.name,
214219
ds3Helpers.object_name_to_file_path(put_object.object_name))
215220
if put_object.object_name.endswith('/'):
@@ -228,6 +233,12 @@ def test_put_and_get_all_objects_in_directory(self):
228233
destination.cleanup()
229234
client.delete_bucket_spectra_s3(ds3.DeleteBucketSpectraS3Request(bucket_name=bucket, force=True))
230235

236+
def test_put_and_get_all_objects_in_directory_with_prefix(self):
237+
self.test_put_and_get_all_objects_in_directory(prefix_dir="dir-0/sub-dir-1/")
238+
239+
def test_put_and_get_all_objects_in_directory_with_bad_prefix(self):
240+
self.test_put_and_get_all_objects_in_directory(prefix_dir="not/gonna/match/")
241+
231242
def test_put_all_objects_in_directory_with_md5_checksum(self):
232243
self.put_all_objects_in_directory_with_checksum(checksum_type='MD5')
233244

0 commit comments

Comments
 (0)