Skip to content

Commit 8675a16

Browse files
authored
PYTHON-4947 - GridFS spec: Add performant 'delete revisions by filena… (#2218)
1 parent 58a41ae commit 8675a16

File tree

12 files changed

+322
-10
lines changed

12 files changed

+322
-10
lines changed

doc/changelog.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ PyMongo 4.12 brings a number of changes including:
99
- Support for configuring DEK cache lifetime via the ``key_expiration_ms`` argument to
1010
:class:`~pymongo.encryption_options.AutoEncryptionOpts`.
1111
- Support for $lookup in CSFLE and QE supported on MongoDB 8.1+.
12+
- Added :meth:`gridfs.asynchronous.grid_file.AsyncGridFSBucket.delete_by_name` and :meth:`gridfs.grid_file.GridFSBucket.delete_by_name`
13+
for more performant deletion of a file with multiple revisions.
1214
- AsyncMongoClient no longer performs DNS resolution for "mongodb+srv://" connection strings on creation.
1315
To avoid blocking the asyncio loop, the resolution is now deferred until the client is first connected.
1416
- Added index hinting support to the

gridfs/asynchronous/grid_file.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,35 @@ async def delete(self, file_id: Any, session: Optional[AsyncClientSession] = Non
834834
if not res.deleted_count:
835835
raise NoFile("no file could be deleted because none matched %s" % file_id)
836836

837+
@_csot.apply
838+
async def delete_by_name(
839+
self, filename: str, session: Optional[AsyncClientSession] = None
840+
) -> None:
841+
"""Given a filename, delete this stored file's files collection document(s)
842+
and associated chunks from a GridFS bucket.
843+
844+
For example::
845+
846+
my_db = AsyncMongoClient().test
847+
fs = AsyncGridFSBucket(my_db)
848+
await fs.upload_from_stream("test_file", "data I want to store!")
849+
await fs.delete_by_name("test_file")
850+
851+
Raises :exc:`~gridfs.errors.NoFile` if no file with the given filename exists.
852+
853+
:param filename: The name of the file to be deleted.
854+
:param session: a :class:`~pymongo.client_session.AsyncClientSession`
855+
856+
.. versionadded:: 4.12
857+
"""
858+
_disallow_transactions(session)
859+
files = self._files.find({"filename": filename}, {"_id": 1}, session=session)
860+
file_ids = [file["_id"] async for file in files]
861+
res = await self._files.delete_many({"_id": {"$in": file_ids}}, session=session)
862+
await self._chunks.delete_many({"files_id": {"$in": file_ids}}, session=session)
863+
if not res.deleted_count:
864+
raise NoFile(f"no file could be deleted because none matched filename {filename!r}")
865+
837866
def find(self, *args: Any, **kwargs: Any) -> AsyncGridOutCursor:
838867
"""Find and return the files collection documents that match ``filter``
839868

gridfs/synchronous/grid_file.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -830,6 +830,33 @@ def delete(self, file_id: Any, session: Optional[ClientSession] = None) -> None:
830830
if not res.deleted_count:
831831
raise NoFile("no file could be deleted because none matched %s" % file_id)
832832

833+
@_csot.apply
834+
def delete_by_name(self, filename: str, session: Optional[ClientSession] = None) -> None:
835+
"""Given a filename, delete this stored file's files collection document(s)
836+
and associated chunks from a GridFS bucket.
837+
838+
For example::
839+
840+
my_db = MongoClient().test
841+
fs = GridFSBucket(my_db)
842+
fs.upload_from_stream("test_file", "data I want to store!")
843+
fs.delete_by_name("test_file")
844+
845+
Raises :exc:`~gridfs.errors.NoFile` if no file with the given filename exists.
846+
847+
:param filename: The name of the file to be deleted.
848+
:param session: a :class:`~pymongo.client_session.ClientSession`
849+
850+
.. versionadded:: 4.12
851+
"""
852+
_disallow_transactions(session)
853+
files = self._files.find({"filename": filename}, {"_id": 1}, session=session)
854+
file_ids = [file["_id"] for file in files]
855+
res = self._files.delete_many({"_id": {"$in": file_ids}}, session=session)
856+
self._chunks.delete_many({"files_id": {"$in": file_ids}}, session=session)
857+
if not res.deleted_count:
858+
raise NoFile(f"no file could be deleted because none matched filename {filename!r}")
859+
833860
def find(self, *args: Any, **kwargs: Any) -> GridOutCursor:
834861
"""Find and return the files collection documents that match ``filter``
835862

test/asynchronous/test_gridfs_bucket.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,17 @@ async def test_multi_chunk_delete(self):
115115
self.assertEqual(0, await self.db.fs.files.count_documents({}))
116116
self.assertEqual(0, await self.db.fs.chunks.count_documents({}))
117117

118+
async def test_delete_by_name(self):
119+
self.assertEqual(0, await self.db.fs.files.count_documents({}))
120+
self.assertEqual(0, await self.db.fs.chunks.count_documents({}))
121+
gfs = gridfs.AsyncGridFSBucket(self.db)
122+
await gfs.upload_from_stream("test_filename", b"hello", chunk_size_bytes=1)
123+
self.assertEqual(1, await self.db.fs.files.count_documents({}))
124+
self.assertEqual(5, await self.db.fs.chunks.count_documents({}))
125+
await gfs.delete_by_name("test_filename")
126+
self.assertEqual(0, await self.db.fs.files.count_documents({}))
127+
self.assertEqual(0, await self.db.fs.chunks.count_documents({}))
128+
118129
async def test_empty_file(self):
119130
oid = await self.fs.upload_from_stream("test_filename", b"")
120131
self.assertEqual(b"", await (await self.fs.open_download_stream(oid)).read())

test/asynchronous/test_session.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545

4646
from bson import DBRef
4747
from gridfs.asynchronous.grid_file import AsyncGridFS, AsyncGridFSBucket
48-
from pymongo import ASCENDING, AsyncMongoClient, monitoring
48+
from pymongo import ASCENDING, AsyncMongoClient, _csot, monitoring
4949
from pymongo.asynchronous.command_cursor import AsyncCommandCursor
5050
from pymongo.asynchronous.cursor import AsyncCursor
5151
from pymongo.asynchronous.helpers import anext
@@ -543,7 +543,7 @@ async def find(session=None):
543543
(bucket.rename, [1, "f2"], {}),
544544
# Delete both files so _test_ops can run these operations twice.
545545
(bucket.delete, [1], {}),
546-
(bucket.delete, [2], {}),
546+
(bucket.delete_by_name, ["f"], {}),
547547
)
548548

549549
async def test_gridfsbucket_cursor(self):

test/asynchronous/test_transactions.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333
from bson import encode
3434
from bson.raw_bson import RawBSONDocument
35-
from pymongo import WriteConcern
35+
from pymongo import WriteConcern, _csot
3636
from pymongo.asynchronous import client_session
3737
from pymongo.asynchronous.client_session import TransactionOptions
3838
from pymongo.asynchronous.command_cursor import AsyncCommandCursor
@@ -295,6 +295,7 @@ async def gridfs_open_upload_stream(*args, **kwargs):
295295
"new-name",
296296
),
297297
),
298+
(bucket.delete_by_name, ("new-name",)),
298299
]
299300

300301
async with client.start_session() as s, await s.start_transaction():

test/asynchronous/unified_format.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
from bson import SON, json_util
6767
from bson.codec_options import DEFAULT_CODEC_OPTIONS
6868
from bson.objectid import ObjectId
69-
from gridfs import AsyncGridFSBucket, GridOut
69+
from gridfs import AsyncGridFSBucket, GridOut, NoFile
7070
from pymongo import ASCENDING, AsyncMongoClient, CursorType, _csot
7171
from pymongo.asynchronous.change_stream import AsyncChangeStream
7272
from pymongo.asynchronous.client_session import AsyncClientSession, TransactionOptions, _TxnState
@@ -632,7 +632,7 @@ def process_error(self, exception, spec):
632632
# Connection errors are considered client errors.
633633
if isinstance(error, ConnectionFailure):
634634
self.assertNotIsInstance(error, NotPrimaryError)
635-
elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError)):
635+
elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError, NoFile)):
636636
pass
637637
else:
638638
self.assertNotIsInstance(error, PyMongoError)

test/gridfs/deleteByName.json

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
{
2+
"description": "gridfs-deleteByName",
3+
"schemaVersion": "1.0",
4+
"createEntities": [
5+
{
6+
"client": {
7+
"id": "client0"
8+
}
9+
},
10+
{
11+
"database": {
12+
"id": "database0",
13+
"client": "client0",
14+
"databaseName": "gridfs-tests"
15+
}
16+
},
17+
{
18+
"bucket": {
19+
"id": "bucket0",
20+
"database": "database0"
21+
}
22+
},
23+
{
24+
"collection": {
25+
"id": "bucket0_files_collection",
26+
"database": "database0",
27+
"collectionName": "fs.files"
28+
}
29+
},
30+
{
31+
"collection": {
32+
"id": "bucket0_chunks_collection",
33+
"database": "database0",
34+
"collectionName": "fs.chunks"
35+
}
36+
}
37+
],
38+
"initialData": [
39+
{
40+
"collectionName": "fs.files",
41+
"databaseName": "gridfs-tests",
42+
"documents": [
43+
{
44+
"_id": {
45+
"$oid": "000000000000000000000001"
46+
},
47+
"length": 0,
48+
"chunkSize": 4,
49+
"uploadDate": {
50+
"$date": "1970-01-01T00:00:00.000Z"
51+
},
52+
"filename": "filename",
53+
"metadata": {}
54+
},
55+
{
56+
"_id": {
57+
"$oid": "000000000000000000000002"
58+
},
59+
"length": 0,
60+
"chunkSize": 4,
61+
"uploadDate": {
62+
"$date": "1970-01-01T00:00:00.000Z"
63+
},
64+
"filename": "filename",
65+
"metadata": {}
66+
},
67+
{
68+
"_id": {
69+
"$oid": "000000000000000000000003"
70+
},
71+
"length": 2,
72+
"chunkSize": 4,
73+
"uploadDate": {
74+
"$date": "1970-01-01T00:00:00.000Z"
75+
},
76+
"filename": "filename",
77+
"metadata": {}
78+
},
79+
{
80+
"_id": {
81+
"$oid": "000000000000000000000004"
82+
},
83+
"length": 8,
84+
"chunkSize": 4,
85+
"uploadDate": {
86+
"$date": "1970-01-01T00:00:00.000Z"
87+
},
88+
"filename": "otherfilename",
89+
"metadata": {}
90+
}
91+
]
92+
},
93+
{
94+
"collectionName": "fs.chunks",
95+
"databaseName": "gridfs-tests",
96+
"documents": [
97+
{
98+
"_id": {
99+
"$oid": "000000000000000000000001"
100+
},
101+
"files_id": {
102+
"$oid": "000000000000000000000002"
103+
},
104+
"n": 0,
105+
"data": {
106+
"$binary": {
107+
"base64": "",
108+
"subType": "00"
109+
}
110+
}
111+
},
112+
{
113+
"_id": {
114+
"$oid": "000000000000000000000002"
115+
},
116+
"files_id": {
117+
"$oid": "000000000000000000000003"
118+
},
119+
"n": 0,
120+
"data": {
121+
"$binary": {
122+
"base64": "",
123+
"subType": "00"
124+
}
125+
}
126+
},
127+
{
128+
"_id": {
129+
"$oid": "000000000000000000000003"
130+
},
131+
"files_id": {
132+
"$oid": "000000000000000000000003"
133+
},
134+
"n": 0,
135+
"data": {
136+
"$binary": {
137+
"base64": "",
138+
"subType": "00"
139+
}
140+
}
141+
},
142+
{
143+
"_id": {
144+
"$oid": "000000000000000000000004"
145+
},
146+
"files_id": {
147+
"$oid": "000000000000000000000004"
148+
},
149+
"n": 0,
150+
"data": {
151+
"$binary": {
152+
"base64": "",
153+
"subType": "00"
154+
}
155+
}
156+
}
157+
]
158+
}
159+
],
160+
"tests": [
161+
{
162+
"description": "delete when multiple revisions of the file exist",
163+
"operations": [
164+
{
165+
"name": "deleteByName",
166+
"object": "bucket0",
167+
"arguments": {
168+
"filename": "filename"
169+
}
170+
}
171+
],
172+
"outcome": [
173+
{
174+
"collectionName": "fs.files",
175+
"databaseName": "gridfs-tests",
176+
"documents": [
177+
{
178+
"_id": {
179+
"$oid": "000000000000000000000004"
180+
},
181+
"length": 8,
182+
"chunkSize": 4,
183+
"uploadDate": {
184+
"$date": "1970-01-01T00:00:00.000Z"
185+
},
186+
"filename": "otherfilename",
187+
"metadata": {}
188+
}
189+
]
190+
},
191+
{
192+
"collectionName": "fs.chunks",
193+
"databaseName": "gridfs-tests",
194+
"documents": [
195+
{
196+
"_id": {
197+
"$oid": "000000000000000000000004"
198+
},
199+
"files_id": {
200+
"$oid": "000000000000000000000004"
201+
},
202+
"n": 0,
203+
"data": {
204+
"$binary": {
205+
"base64": "",
206+
"subType": "00"
207+
}
208+
}
209+
}
210+
]
211+
}
212+
]
213+
},
214+
{
215+
"description": "delete when file name does not exist",
216+
"operations": [
217+
{
218+
"name": "deleteByName",
219+
"object": "bucket0",
220+
"arguments": {
221+
"filename": "missing-file"
222+
},
223+
"expectError": {
224+
"isClientError": true
225+
}
226+
}
227+
]
228+
}
229+
]
230+
}

test/test_gridfs_bucket.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,17 @@ def test_multi_chunk_delete(self):
115115
self.assertEqual(0, self.db.fs.files.count_documents({}))
116116
self.assertEqual(0, self.db.fs.chunks.count_documents({}))
117117

118+
def test_delete_by_name(self):
119+
self.assertEqual(0, self.db.fs.files.count_documents({}))
120+
self.assertEqual(0, self.db.fs.chunks.count_documents({}))
121+
gfs = gridfs.GridFSBucket(self.db)
122+
gfs.upload_from_stream("test_filename", b"hello", chunk_size_bytes=1)
123+
self.assertEqual(1, self.db.fs.files.count_documents({}))
124+
self.assertEqual(5, self.db.fs.chunks.count_documents({}))
125+
gfs.delete_by_name("test_filename")
126+
self.assertEqual(0, self.db.fs.files.count_documents({}))
127+
self.assertEqual(0, self.db.fs.chunks.count_documents({}))
128+
118129
def test_empty_file(self):
119130
oid = self.fs.upload_from_stream("test_filename", b"")
120131
self.assertEqual(b"", (self.fs.open_download_stream(oid)).read())

0 commit comments

Comments
 (0)