-
Notifications
You must be signed in to change notification settings - Fork 1.1k
PYTHON-4947 - GridFS spec: Add performant 'delete revisions by filena… #2218
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
31526f3
3cfa69c
e98c5af
370ba42
bfec2ae
fc1813b
3f112d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -834,6 +834,35 @@ async def delete(self, file_id: Any, session: Optional[AsyncClientSession] = Non | |
if not res.deleted_count: | ||
raise NoFile("no file could be deleted because none matched %s" % file_id) | ||
|
||
@_csot.apply | ||
async def delete_by_name( | ||
self, filename: str, session: Optional[AsyncClientSession] = None | ||
) -> None: | ||
"""Given a filename, delete this stored file's files collection document(s) | ||
and associated chunks from a GridFS bucket. | ||
|
||
For example:: | ||
|
||
my_db = AsyncMongoClient().test | ||
fs = AsyncGridFSBucket(my_db) | ||
await fs.upload_from_stream("test_file", "data I want to store!") | ||
await fs.delete_by_name("test_file") | ||
|
||
Raises :exc:`~gridfs.errors.NoFile` if no file with the given filename exists. | ||
|
||
:param filename: The name of the file to be deleted. | ||
:param session: a :class:`~pymongo.client_session.AsyncClientSession` | ||
|
||
.. versionadded:: 4.12 | ||
""" | ||
_disallow_transactions(session) | ||
files = self._files.find({"filename": filename}, {"_id": 1}, session=session) | ||
file_ids = [file["_id"] async for file in files] | ||
res = await self._files.delete_many({"_id": {"$in": file_ids}}, session=session) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does the spec say anything about what should happen if file_ids is so large/numerous it overflows maxBsonObjectSize? Assuming all ids are ObjectIds, this will happen when a single filename has around 850,000 revisions: >>> len(encode({"_id": {"$in": [ObjectId()]*850000}}))
16888915
>>> MAX_BSON_SIZE
16777216 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It makes no mention of that edge case. Do we have a standard pattern for overflow issues in other APIs? |
||
await self._chunks.delete_many({"files_id": {"$in": file_ids}}, session=session) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could improve this by using client.bulk_write on 8.0+ servers to combine both deletes into one command but that's a spec issue. |
||
if not res.deleted_count: | ||
raise NoFile(f"no file could be deleted because none matched filename {filename!r}") | ||
|
||
def find(self, *args: Any, **kwargs: Any) -> AsyncGridOutCursor: | ||
"""Find and return the files collection documents that match ``filter`` | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -115,6 +115,17 @@ async def test_multi_chunk_delete(self): | |
self.assertEqual(0, await self.db.fs.files.count_documents({})) | ||
self.assertEqual(0, await self.db.fs.chunks.count_documents({})) | ||
|
||
async def test_delete_by_name(self): | ||
self.assertEqual(0, await self.db.fs.files.count_documents({})) | ||
self.assertEqual(0, await self.db.fs.chunks.count_documents({})) | ||
gfs = gridfs.AsyncGridFSBucket(self.db) | ||
await gfs.upload_from_stream("test_filename", b"hello", chunk_size_bytes=1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test should upload multiple versions of test_filename and assert all are deleted. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh I see the spec test already does this so you can disregard. |
||
self.assertEqual(1, await self.db.fs.files.count_documents({})) | ||
self.assertEqual(5, await self.db.fs.chunks.count_documents({})) | ||
await gfs.delete_by_name("test_filename") | ||
self.assertEqual(0, await self.db.fs.files.count_documents({})) | ||
self.assertEqual(0, await self.db.fs.chunks.count_documents({})) | ||
|
||
async def test_empty_file(self): | ||
oid = await self.fs.upload_from_stream("test_filename", b"") | ||
self.assertEqual(b"", await (await self.fs.open_download_stream(oid)).read()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
{ | ||
"description": "gridfs-deleteByName", | ||
"schemaVersion": "1.0", | ||
"createEntities": [ | ||
{ | ||
"client": { | ||
"id": "client0" | ||
} | ||
}, | ||
{ | ||
"database": { | ||
"id": "database0", | ||
"client": "client0", | ||
"databaseName": "gridfs-tests" | ||
} | ||
}, | ||
{ | ||
"bucket": { | ||
"id": "bucket0", | ||
"database": "database0" | ||
} | ||
}, | ||
{ | ||
"collection": { | ||
"id": "bucket0_files_collection", | ||
"database": "database0", | ||
"collectionName": "fs.files" | ||
} | ||
}, | ||
{ | ||
"collection": { | ||
"id": "bucket0_chunks_collection", | ||
"database": "database0", | ||
"collectionName": "fs.chunks" | ||
} | ||
} | ||
], | ||
"initialData": [ | ||
{ | ||
"collectionName": "fs.files", | ||
"databaseName": "gridfs-tests", | ||
"documents": [ | ||
{ | ||
"_id": { | ||
"$oid": "000000000000000000000001" | ||
}, | ||
"length": 0, | ||
"chunkSize": 4, | ||
"uploadDate": { | ||
"$date": "1970-01-01T00:00:00.000Z" | ||
}, | ||
"filename": "filename", | ||
"metadata": {} | ||
}, | ||
{ | ||
"_id": { | ||
"$oid": "000000000000000000000002" | ||
}, | ||
"length": 0, | ||
"chunkSize": 4, | ||
"uploadDate": { | ||
"$date": "1970-01-01T00:00:00.000Z" | ||
}, | ||
"filename": "filename", | ||
"metadata": {} | ||
}, | ||
{ | ||
"_id": { | ||
"$oid": "000000000000000000000003" | ||
}, | ||
"length": 2, | ||
"chunkSize": 4, | ||
"uploadDate": { | ||
"$date": "1970-01-01T00:00:00.000Z" | ||
}, | ||
"filename": "filename", | ||
"metadata": {} | ||
}, | ||
{ | ||
"_id": { | ||
"$oid": "000000000000000000000004" | ||
}, | ||
"length": 8, | ||
"chunkSize": 4, | ||
"uploadDate": { | ||
"$date": "1970-01-01T00:00:00.000Z" | ||
}, | ||
"filename": "otherfilename", | ||
"metadata": {} | ||
} | ||
] | ||
}, | ||
{ | ||
"collectionName": "fs.chunks", | ||
"databaseName": "gridfs-tests", | ||
"documents": [ | ||
{ | ||
"_id": { | ||
"$oid": "000000000000000000000001" | ||
}, | ||
"files_id": { | ||
"$oid": "000000000000000000000002" | ||
}, | ||
"n": 0, | ||
"data": { | ||
"$binary": { | ||
"base64": "", | ||
"subType": "00" | ||
} | ||
} | ||
}, | ||
{ | ||
"_id": { | ||
"$oid": "000000000000000000000002" | ||
}, | ||
"files_id": { | ||
"$oid": "000000000000000000000003" | ||
}, | ||
"n": 0, | ||
"data": { | ||
"$binary": { | ||
"base64": "", | ||
"subType": "00" | ||
} | ||
} | ||
}, | ||
{ | ||
"_id": { | ||
"$oid": "000000000000000000000003" | ||
}, | ||
"files_id": { | ||
"$oid": "000000000000000000000003" | ||
}, | ||
"n": 0, | ||
"data": { | ||
"$binary": { | ||
"base64": "", | ||
"subType": "00" | ||
} | ||
} | ||
}, | ||
{ | ||
"_id": { | ||
"$oid": "000000000000000000000004" | ||
}, | ||
"files_id": { | ||
"$oid": "000000000000000000000004" | ||
}, | ||
"n": 0, | ||
"data": { | ||
"$binary": { | ||
"base64": "", | ||
"subType": "00" | ||
} | ||
} | ||
} | ||
] | ||
} | ||
], | ||
"tests": [ | ||
{ | ||
"description": "delete when multiple revisions of the file exist", | ||
"operations": [ | ||
{ | ||
"name": "deleteByName", | ||
"object": "bucket0", | ||
"arguments": { | ||
"filename": "filename" | ||
} | ||
} | ||
], | ||
"outcome": [ | ||
{ | ||
"collectionName": "fs.files", | ||
"databaseName": "gridfs-tests", | ||
"documents": [ | ||
{ | ||
"_id": { | ||
"$oid": "000000000000000000000004" | ||
}, | ||
"length": 8, | ||
"chunkSize": 4, | ||
"uploadDate": { | ||
"$date": "1970-01-01T00:00:00.000Z" | ||
}, | ||
"filename": "otherfilename", | ||
"metadata": {} | ||
} | ||
] | ||
}, | ||
{ | ||
"collectionName": "fs.chunks", | ||
"databaseName": "gridfs-tests", | ||
"documents": [ | ||
{ | ||
"_id": { | ||
"$oid": "000000000000000000000004" | ||
}, | ||
"files_id": { | ||
"$oid": "000000000000000000000004" | ||
}, | ||
"n": 0, | ||
"data": { | ||
"$binary": { | ||
"base64": "", | ||
"subType": "00" | ||
} | ||
} | ||
} | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"description": "delete when file name does not exist", | ||
"operations": [ | ||
{ | ||
"name": "deleteByName", | ||
"object": "bucket0", | ||
"arguments": { | ||
"filename": "missing-file" | ||
}, | ||
"expectError": { | ||
"isClientError": true | ||
} | ||
} | ||
] | ||
} | ||
] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to add this helper to all the tests for sessions/transactions/CSOT. For example test_gridfs_bucket in test_session and test_gridfs_does_not_support_transactions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be a separate ticket?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be in this ticket.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't the existing tests missing this decorator have it added in a separate ticket? Those changes seem unrelated to this addition.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's completely related. We're adding a new api so we have to test all the features. Although I'm not not what you mean by "tests missing this decorator".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm saying we need to add the delete_by_name helper to the tests for sessions/transactions/CSOT...