Skip to content
This repository was archived by the owner on May 5, 2025. It is now read-only.

Commit 01df5a3

Browse files
authored
Cleanup now-unused delete_files and list_folder_contents (#489)
These have been used in the old repo cleanup code. This has now been replaced by deleting files one-by-one, which is the only operation that GCS actually supports, and which the official GCS client library is also doing behind the scenes.
1 parent 8db93ad commit 01df5a3

File tree

22 files changed

+0
-5087
lines changed

22 files changed

+0
-5087
lines changed

shared/api_archive/archive.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -158,13 +158,6 @@ def delete_file(self, path: str) -> None:
158158
"""
159159
self.storage.delete_file(self.root, path)
160160

161-
@sentry_sdk.trace
162-
def delete_files(self, paths: list[str]) -> None:
163-
"""
164-
Generic method to delete files from the archive.
165-
"""
166-
self.storage.delete_files(bucket_name=self.root, paths=paths)
167-
168161
def read_chunks(self, commit_sha: str) -> str:
169162
"""
170163
Convenience method to read a chunks file from the archive.

shared/storage/aws.py

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -146,54 +146,3 @@ def delete_file(self, bucket_name, path):
146146
return True
147147
except ClientError:
148148
raise
149-
150-
def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]:
151-
"""Batch deletes a list of files from a given bucket
152-
153-
Note:
154-
When trying to delete a file that doesnt exists, AWS SDK will
155-
return 'true' as if it was deleted.
156-
157-
Args:
158-
bucket_name (str): The name of the bucket for the file lives
159-
paths (list): A list of the paths to be deletes (default: {[]})
160-
161-
Raises:
162-
NotImplementedError: If the current instance did not implement this method
163-
164-
Returns:
165-
list: A list of booleans, where each result indicates whether that file was deleted
166-
successfully
167-
"""
168-
objects_to_delete = {"Objects": [{"Key": key} for key in paths]}
169-
try:
170-
response = self.storage_client.delete_objects(
171-
Bucket=bucket_name, Delete=objects_to_delete
172-
)
173-
except ClientError:
174-
raise
175-
deletes = [error.get("Key") for error in response.get("Deleted")]
176-
return [key in deletes for key in paths]
177-
178-
def list_folder_contents(self, bucket_name, prefix=None, recursive=True):
179-
"""List the contents of a specific folder
180-
181-
Args:
182-
bucket_name (str): The name of the bucket for the file lives
183-
prefix: The prefix of the files to be listed (default: {None})
184-
recursive: Whether the listing should be recursive (default: {True})
185-
186-
Raises:
187-
NotImplementedError: If the current instance did not implement this method
188-
"""
189-
try:
190-
response = self.storage_client.list_objects(
191-
Bucket=bucket_name, Prefix=prefix
192-
)
193-
except ClientError:
194-
raise
195-
contents = response.get("Contents")
196-
return [
197-
{"name": content.get("Key"), "size": content.get("Size")}
198-
for content in contents
199-
]

shared/storage/base.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -96,33 +96,3 @@ def delete_file(self, bucket_name, path):
9696
bool: True if the deletion was succesful
9797
"""
9898
raise NotImplementedError()
99-
100-
def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]:
101-
"""Batch deletes a list of files from a given bucket
102-
(what happens to the files that don't exist?)
103-
104-
Args:
105-
bucket_name (str): The name of the bucket for the file lives
106-
paths (list): A list of the paths to be deletes (default: {[]})
107-
108-
Raises:
109-
NotImplementedError: If the current instance did not implement this method
110-
111-
Returns:
112-
list: A list of booleans, where each result indicates whether that file was deleted
113-
successfully
114-
"""
115-
raise NotImplementedError()
116-
117-
def list_folder_contents(self, bucket_name, prefix=None, recursive=True):
118-
"""List the contents of a specific folder
119-
120-
Args:
121-
bucket_name (str): The name of the bucket for the file lives
122-
prefix: The prefix of the files to be listed (default: {None})
123-
recursive: Whether the listing should be recursive (default: {True})
124-
125-
Raises:
126-
NotImplementedError: If the current instance did not implement this method
127-
"""
128-
raise NotImplementedError()

shared/storage/fallback.py

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -88,35 +88,3 @@ def delete_file(self, bucket_name, path):
8888
first_deletion = self.main_service.delete_file(bucket_name, path)
8989
second_deletion = self.fallback_service.delete_file(bucket_name, path)
9090
return first_deletion and second_deletion
91-
92-
def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]:
93-
"""Batch deletes a list of files from a given bucket
94-
(what happens to the files that don't exist?)
95-
96-
Args:
97-
bucket_name (str): The name of the bucket for the file lives
98-
paths (list): A list of the paths to be deletes (default: {[]})
99-
100-
Raises:
101-
NotImplementedError: If the current instance did not implement this method
102-
103-
Returns:
104-
list: A list of booleans, where each result indicates whether that file was deleted
105-
successfully
106-
"""
107-
first_results = self.main_service.delete_files(bucket_name, paths)
108-
second_results = self.fallback_service.delete_files(bucket_name, paths)
109-
return [f and s for f, s in zip(first_results, second_results)]
110-
111-
def list_folder_contents(self, bucket_name, prefix=None, recursive=True):
112-
"""List the contents of a specific folder
113-
114-
Args:
115-
bucket_name (str): The name of the bucket for the file lives
116-
prefix: The prefix of the files to be listed (default: {None})
117-
recursive: Whether the listing should be recursive (default: {True})
118-
119-
Raises:
120-
NotImplementedError: If the current instance did not implement this method
121-
"""
122-
return self.main_service.list_folder_contents(bucket_name, prefix, recursive)

shared/storage/gcp.py

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -142,37 +142,3 @@ def delete_file(self, bucket_name: str, path: str) -> bool:
142142
except google.cloud.exceptions.NotFound:
143143
raise FileNotInStorageError(f"File {path} does not exist in {bucket_name}")
144144
return True
145-
146-
def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]:
147-
"""Batch deletes a list of files from a given bucket
148-
(what happens to the files that don't exist?)
149-
150-
Args:
151-
bucket_name (str): The name of the bucket for the file lives
152-
paths (list): A list of the paths to be deletes (default: {[]})
153-
154-
Returns:
155-
list: A list of booleans, where each result indicates whether that file was deleted
156-
successfully
157-
"""
158-
bucket = self.storage_client.bucket(bucket_name)
159-
blobs = [bucket.blob(path) for path in paths]
160-
blobs_errored: set[storage.Blob] = set()
161-
bucket.delete_blobs(blobs, on_error=blobs_errored.add)
162-
return [b not in blobs_errored for b in blobs]
163-
164-
def list_folder_contents(self, bucket_name: str, prefix=None, recursive=True):
165-
"""List the contents of a specific folder
166-
167-
Attention: google ignores the `recursive` param
168-
169-
Args:
170-
bucket_name (str): The name of the bucket for the file lives
171-
prefix: The prefix of the files to be listed (default: {None})
172-
recursive: Whether the listing should be recursive (default: {True})
173-
"""
174-
assert recursive
175-
bucket = self.storage_client.bucket(bucket_name)
176-
return (
177-
{"name": b.name, "size": b.size} for b in bucket.list_blobs(prefix=prefix)
178-
)

shared/storage/memory.py

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -122,45 +122,3 @@ def delete_file(self, bucket_name, path):
122122
except KeyError:
123123
raise FileNotInStorageError()
124124
return True
125-
126-
def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]:
127-
"""Batch deletes a list of files from a given bucket
128-
(what happens to the files that don't exist?)
129-
130-
Args:
131-
bucket_name (str): The name of the bucket for the file lives
132-
paths (list): A list of the paths to be deletes (default: {[]})
133-
134-
Raises:
135-
NotImplementedError: If the current instance did not implement this method
136-
137-
Returns:
138-
list: A list of booleans, where each result indicates whether that file was deleted
139-
successfully
140-
"""
141-
results = []
142-
for path in paths:
143-
try:
144-
results.append(self.delete_file(bucket_name, path))
145-
except FileNotInStorageError:
146-
results.append(False)
147-
return results
148-
149-
def list_folder_contents(self, bucket_name, prefix=None, recursive=True):
150-
"""List the contents of a specific folder
151-
152-
Args:
153-
bucket_name (str): The name of the bucket for the file lives
154-
prefix: The prefix of the files to be listed (default: {None})
155-
recursive: Whether the listing should be recursive (default: {True})
156-
157-
Raises:
158-
NotImplementedError: If the current instance did not implement this method
159-
"""
160-
res = [
161-
{"name": key, "size": len(self.storage[bucket_name][key].decode())}
162-
for key in self.storage[bucket_name]
163-
if prefix is None or key.startswith(prefix)
164-
]
165-
166-
return res

shared/storage/minio.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import logging
44
import os
55
import shutil
6-
import sys
76
import tempfile
87
from io import BytesIO
98
from typing import BinaryIO, overload
@@ -15,7 +14,6 @@
1514
EnvMinioProvider,
1615
IamAwsProvider,
1716
)
18-
from minio.deleteobjects import DeleteObject
1917
from minio.error import MinioException, S3Error
2018

2119
from shared.storage.base import CHUNK_SIZE, BaseStorageService
@@ -240,27 +238,3 @@ def delete_file(self, bucket_name: str, path: str) -> bool:
240238
return True
241239
except MinioException:
242240
raise
243-
244-
def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]:
245-
try:
246-
for del_err in self.minio_client.remove_objects(
247-
bucket_name, [DeleteObject(path) for path in paths]
248-
):
249-
print("Deletion error: {}".format(del_err)) # noqa: T201
250-
return [True] * len(paths)
251-
except MinioException:
252-
raise
253-
254-
def list_folder_contents(self, bucket_name, prefix=None, recursive=True):
255-
return (
256-
self.object_to_dict(b)
257-
for b in self.minio_client.list_objects(bucket_name, prefix, recursive)
258-
)
259-
260-
def object_to_dict(self, obj):
261-
return {"name": obj.object_name, "size": obj.size}
262-
263-
# TODO remove this function -- just using it for output during testing.
264-
def write(self, string, silence=False):
265-
if not silence:
266-
sys.stdout.write((string or "") + "\n")

shared/storage/new_minio.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import importlib.metadata
33
import json
44
import logging
5-
import sys
65
from io import BytesIO
76
from typing import IO, BinaryIO, Tuple, cast, overload
87

@@ -14,7 +13,6 @@
1413
EnvMinioProvider,
1514
IamAwsProvider,
1615
)
17-
from minio.deleteobjects import DeleteObject
1816
from minio.error import MinioException, S3Error
1917
from minio.helpers import ObjectWriteResult
2018
from urllib3 import HTTPResponse
@@ -304,27 +302,3 @@ def delete_file(self, bucket_name, url):
304302
return True
305303
except MinioException:
306304
raise
307-
308-
def delete_files(self, bucket_name, urls=[]):
309-
try:
310-
for del_err in self.minio_client.remove_objects(
311-
bucket_name, [DeleteObject(url) for url in urls]
312-
):
313-
print("Deletion error: {}".format(del_err)) # noqa: T201
314-
return [True] * len(urls)
315-
except MinioException:
316-
raise
317-
318-
def list_folder_contents(self, bucket_name, prefix=None, recursive=True):
319-
return (
320-
self.object_to_dict(b)
321-
for b in self.minio_client.list_objects(bucket_name, prefix, recursive)
322-
)
323-
324-
def object_to_dict(self, obj):
325-
return {"name": obj.object_name, "size": obj.size}
326-
327-
# TODO remove this function -- just using it for output during testing.
328-
def write(self, string, silence=False):
329-
if not silence:
330-
sys.stdout.write((string or "") + "\n")

0 commit comments

Comments
 (0)