diff --git a/doc/changelog.rst b/doc/changelog.rst index 0633049857..d25aff5655 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -9,6 +9,8 @@ PyMongo 4.12 brings a number of changes including: - Support for configuring DEK cache lifetime via the ``key_expiration_ms`` argument to :class:`~pymongo.encryption_options.AutoEncryptionOpts`. - Support for $lookup in CSFLE and QE supported on MongoDB 8.1+. +- Added :meth:`gridfs.asynchronous.grid_file.AsyncGridFSBucket.delete_by_name` and :meth:`gridfs.grid_file.GridFSBucket.delete_by_name` + for more performant deletion of a file with multiple revisions. - AsyncMongoClient no longer performs DNS resolution for "mongodb+srv://" connection strings on creation. To avoid blocking the asyncio loop, the resolution is now deferred until the client is first connected. - Added index hinting support to the diff --git a/gridfs/asynchronous/grid_file.py b/gridfs/asynchronous/grid_file.py index 3f3179c45c..d634eb745a 100644 --- a/gridfs/asynchronous/grid_file.py +++ b/gridfs/asynchronous/grid_file.py @@ -834,6 +834,35 @@ async def delete(self, file_id: Any, session: Optional[AsyncClientSession] = Non if not res.deleted_count: raise NoFile("no file could be deleted because none matched %s" % file_id) + @_csot.apply + async def delete_by_name( + self, filename: str, session: Optional[AsyncClientSession] = None + ) -> None: + """Given a filename, delete this stored file's files collection document(s) + and associated chunks from a GridFS bucket. + + For example:: + + my_db = AsyncMongoClient().test + fs = AsyncGridFSBucket(my_db) + await fs.upload_from_stream("test_file", "data I want to store!") + await fs.delete_by_name("test_file") + + Raises :exc:`~gridfs.errors.NoFile` if no file with the given filename exists. + + :param filename: The name of the file to be deleted. + :param session: a :class:`~pymongo.client_session.AsyncClientSession` + + .. versionadded:: 4.12 + """ + _disallow_transactions(session) + files = self._files.find({"filename": filename}, {"_id": 1}, session=session) + file_ids = [file["_id"] async for file in files] + res = await self._files.delete_many({"_id": {"$in": file_ids}}, session=session) + await self._chunks.delete_many({"files_id": {"$in": file_ids}}, session=session) + if not res.deleted_count: + raise NoFile(f"no file could be deleted because none matched filename {filename!r}") + def find(self, *args: Any, **kwargs: Any) -> AsyncGridOutCursor: """Find and return the files collection documents that match ``filter`` diff --git a/gridfs/synchronous/grid_file.py b/gridfs/synchronous/grid_file.py index 35386857d6..c5c3c62cde 100644 --- a/gridfs/synchronous/grid_file.py +++ b/gridfs/synchronous/grid_file.py @@ -830,6 +830,33 @@ def delete(self, file_id: Any, session: Optional[ClientSession] = None) -> None: if not res.deleted_count: raise NoFile("no file could be deleted because none matched %s" % file_id) + @_csot.apply + def delete_by_name(self, filename: str, session: Optional[ClientSession] = None) -> None: + """Given a filename, delete this stored file's files collection document(s) + and associated chunks from a GridFS bucket. + + For example:: + + my_db = MongoClient().test + fs = GridFSBucket(my_db) + fs.upload_from_stream("test_file", "data I want to store!") + fs.delete_by_name("test_file") + + Raises :exc:`~gridfs.errors.NoFile` if no file with the given filename exists. + + :param filename: The name of the file to be deleted. + :param session: a :class:`~pymongo.client_session.ClientSession` + + .. versionadded:: 4.12 + """ + _disallow_transactions(session) + files = self._files.find({"filename": filename}, {"_id": 1}, session=session) + file_ids = [file["_id"] for file in files] + res = self._files.delete_many({"_id": {"$in": file_ids}}, session=session) + self._chunks.delete_many({"files_id": {"$in": file_ids}}, session=session) + if not res.deleted_count: + raise NoFile(f"no file could be deleted because none matched filename {filename!r}") + def find(self, *args: Any, **kwargs: Any) -> GridOutCursor: """Find and return the files collection documents that match ``filter`` diff --git a/test/asynchronous/test_gridfs_bucket.py b/test/asynchronous/test_gridfs_bucket.py index 29877ee9c4..03d49d5c3d 100644 --- a/test/asynchronous/test_gridfs_bucket.py +++ b/test/asynchronous/test_gridfs_bucket.py @@ -115,6 +115,17 @@ async def test_multi_chunk_delete(self): self.assertEqual(0, await self.db.fs.files.count_documents({})) self.assertEqual(0, await self.db.fs.chunks.count_documents({})) + async def test_delete_by_name(self): + self.assertEqual(0, await self.db.fs.files.count_documents({})) + self.assertEqual(0, await self.db.fs.chunks.count_documents({})) + gfs = gridfs.AsyncGridFSBucket(self.db) + await gfs.upload_from_stream("test_filename", b"hello", chunk_size_bytes=1) + self.assertEqual(1, await self.db.fs.files.count_documents({})) + self.assertEqual(5, await self.db.fs.chunks.count_documents({})) + await gfs.delete_by_name("test_filename") + self.assertEqual(0, await self.db.fs.files.count_documents({})) + self.assertEqual(0, await self.db.fs.chunks.count_documents({})) + async def test_empty_file(self): oid = await self.fs.upload_from_stream("test_filename", b"") self.assertEqual(b"", await (await self.fs.open_download_stream(oid)).read()) diff --git a/test/asynchronous/test_session.py b/test/asynchronous/test_session.py index 4431cbcb16..3c249718ce 100644 --- a/test/asynchronous/test_session.py +++ b/test/asynchronous/test_session.py @@ -45,7 +45,7 @@ from bson import DBRef from gridfs.asynchronous.grid_file import AsyncGridFS, AsyncGridFSBucket -from pymongo import ASCENDING, AsyncMongoClient, monitoring +from pymongo import ASCENDING, AsyncMongoClient, _csot, monitoring from pymongo.asynchronous.command_cursor import AsyncCommandCursor from pymongo.asynchronous.cursor import AsyncCursor from pymongo.asynchronous.helpers import anext @@ -543,7 +543,7 @@ async def find(session=None): (bucket.rename, [1, "f2"], {}), # Delete both files so _test_ops can run these operations twice. (bucket.delete, [1], {}), - (bucket.delete, [2], {}), + (bucket.delete_by_name, ["f"], {}), ) async def test_gridfsbucket_cursor(self): diff --git a/test/asynchronous/test_transactions.py b/test/asynchronous/test_transactions.py index 884110cd45..ea4d1e3e6c 100644 --- a/test/asynchronous/test_transactions.py +++ b/test/asynchronous/test_transactions.py @@ -32,7 +32,7 @@ from bson import encode from bson.raw_bson import RawBSONDocument -from pymongo import WriteConcern +from pymongo import WriteConcern, _csot from pymongo.asynchronous import client_session from pymongo.asynchronous.client_session import TransactionOptions from pymongo.asynchronous.command_cursor import AsyncCommandCursor @@ -295,6 +295,7 @@ async def gridfs_open_upload_stream(*args, **kwargs): "new-name", ), ), + (bucket.delete_by_name, ("new-name",)), ] async with client.start_session() as s, await s.start_transaction(): diff --git a/test/asynchronous/unified_format.py b/test/asynchronous/unified_format.py index c6884a6d16..cc516ee822 100644 --- a/test/asynchronous/unified_format.py +++ b/test/asynchronous/unified_format.py @@ -66,7 +66,7 @@ from bson import SON, json_util from bson.codec_options import DEFAULT_CODEC_OPTIONS from bson.objectid import ObjectId -from gridfs import AsyncGridFSBucket, GridOut +from gridfs import AsyncGridFSBucket, GridOut, NoFile from pymongo import ASCENDING, AsyncMongoClient, CursorType, _csot from pymongo.asynchronous.change_stream import AsyncChangeStream from pymongo.asynchronous.client_session import AsyncClientSession, TransactionOptions, _TxnState @@ -632,7 +632,7 @@ def process_error(self, exception, spec): # Connection errors are considered client errors. if isinstance(error, ConnectionFailure): self.assertNotIsInstance(error, NotPrimaryError) - elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError)): + elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError, NoFile)): pass else: self.assertNotIsInstance(error, PyMongoError) diff --git a/test/gridfs/deleteByName.json b/test/gridfs/deleteByName.json new file mode 100644 index 0000000000..884d0300ce --- /dev/null +++ b/test/gridfs/deleteByName.json @@ -0,0 +1,230 @@ +{ + "description": "gridfs-deleteByName", + "schemaVersion": "1.0", + "createEntities": [ + { + "client": { + "id": "client0" + } + }, + { + "database": { + "id": "database0", + "client": "client0", + "databaseName": "gridfs-tests" + } + }, + { + "bucket": { + "id": "bucket0", + "database": "database0" + } + }, + { + "collection": { + "id": "bucket0_files_collection", + "database": "database0", + "collectionName": "fs.files" + } + }, + { + "collection": { + "id": "bucket0_chunks_collection", + "database": "database0", + "collectionName": "fs.chunks" + } + } + ], + "initialData": [ + { + "collectionName": "fs.files", + "databaseName": "gridfs-tests", + "documents": [ + { + "_id": { + "$oid": "000000000000000000000001" + }, + "length": 0, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "filename", + "metadata": {} + }, + { + "_id": { + "$oid": "000000000000000000000002" + }, + "length": 0, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "filename", + "metadata": {} + }, + { + "_id": { + "$oid": "000000000000000000000003" + }, + "length": 2, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "filename", + "metadata": {} + }, + { + "_id": { + "$oid": "000000000000000000000004" + }, + "length": 8, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "otherfilename", + "metadata": {} + } + ] + }, + { + "collectionName": "fs.chunks", + "databaseName": "gridfs-tests", + "documents": [ + { + "_id": { + "$oid": "000000000000000000000001" + }, + "files_id": { + "$oid": "000000000000000000000002" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + }, + { + "_id": { + "$oid": "000000000000000000000002" + }, + "files_id": { + "$oid": "000000000000000000000003" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + }, + { + "_id": { + "$oid": "000000000000000000000003" + }, + "files_id": { + "$oid": "000000000000000000000003" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + }, + { + "_id": { + "$oid": "000000000000000000000004" + }, + "files_id": { + "$oid": "000000000000000000000004" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + } + ] + } + ], + "tests": [ + { + "description": "delete when multiple revisions of the file exist", + "operations": [ + { + "name": "deleteByName", + "object": "bucket0", + "arguments": { + "filename": "filename" + } + } + ], + "outcome": [ + { + "collectionName": "fs.files", + "databaseName": "gridfs-tests", + "documents": [ + { + "_id": { + "$oid": "000000000000000000000004" + }, + "length": 8, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "otherfilename", + "metadata": {} + } + ] + }, + { + "collectionName": "fs.chunks", + "databaseName": "gridfs-tests", + "documents": [ + { + "_id": { + "$oid": "000000000000000000000004" + }, + "files_id": { + "$oid": "000000000000000000000004" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + } + ] + } + ] + }, + { + "description": "delete when file name does not exist", + "operations": [ + { + "name": "deleteByName", + "object": "bucket0", + "arguments": { + "filename": "missing-file" + }, + "expectError": { + "isClientError": true + } + } + ] + } + ] +} diff --git a/test/test_gridfs_bucket.py b/test/test_gridfs_bucket.py index d68c9f6ba2..04063a213d 100644 --- a/test/test_gridfs_bucket.py +++ b/test/test_gridfs_bucket.py @@ -115,6 +115,17 @@ def test_multi_chunk_delete(self): self.assertEqual(0, self.db.fs.files.count_documents({})) self.assertEqual(0, self.db.fs.chunks.count_documents({})) + def test_delete_by_name(self): + self.assertEqual(0, self.db.fs.files.count_documents({})) + self.assertEqual(0, self.db.fs.chunks.count_documents({})) + gfs = gridfs.GridFSBucket(self.db) + gfs.upload_from_stream("test_filename", b"hello", chunk_size_bytes=1) + self.assertEqual(1, self.db.fs.files.count_documents({})) + self.assertEqual(5, self.db.fs.chunks.count_documents({})) + gfs.delete_by_name("test_filename") + self.assertEqual(0, self.db.fs.files.count_documents({})) + self.assertEqual(0, self.db.fs.chunks.count_documents({})) + def test_empty_file(self): oid = self.fs.upload_from_stream("test_filename", b"") self.assertEqual(b"", (self.fs.open_download_stream(oid)).read()) diff --git a/test/test_session.py b/test/test_session.py index 905539a1f8..ec25a735e7 100644 --- a/test/test_session.py +++ b/test/test_session.py @@ -45,7 +45,7 @@ from bson import DBRef from gridfs.synchronous.grid_file import GridFS, GridFSBucket -from pymongo import ASCENDING, MongoClient, monitoring +from pymongo import ASCENDING, MongoClient, _csot, monitoring from pymongo.common import _MAX_END_SESSIONS from pymongo.errors import ConfigurationError, InvalidOperation, OperationFailure from pymongo.operations import IndexModel, InsertOne, UpdateOne @@ -543,7 +543,7 @@ def find(session=None): (bucket.rename, [1, "f2"], {}), # Delete both files so _test_ops can run these operations twice. (bucket.delete, [1], {}), - (bucket.delete, [2], {}), + (bucket.delete_by_name, ["f"], {}), ) def test_gridfsbucket_cursor(self): diff --git a/test/test_transactions.py b/test/test_transactions.py index 80b3e3765e..c549b743be 100644 --- a/test/test_transactions.py +++ b/test/test_transactions.py @@ -32,7 +32,7 @@ from bson import encode from bson.raw_bson import RawBSONDocument -from pymongo import WriteConcern +from pymongo import WriteConcern, _csot from pymongo.errors import ( CollectionInvalid, ConfigurationError, @@ -287,6 +287,7 @@ def gridfs_open_upload_stream(*args, **kwargs): "new-name", ), ), + (bucket.delete_by_name, ("new-name",)), ] with client.start_session() as s, s.start_transaction(): diff --git a/test/unified_format.py b/test/unified_format.py index 4aec2ad729..fd7f92909e 100644 --- a/test/unified_format.py +++ b/test/unified_format.py @@ -65,7 +65,7 @@ from bson import SON, json_util from bson.codec_options import DEFAULT_CODEC_OPTIONS from bson.objectid import ObjectId -from gridfs import GridFSBucket, GridOut +from gridfs import GridFSBucket, GridOut, NoFile from pymongo import ASCENDING, CursorType, MongoClient, _csot from pymongo.encryption_options import _HAVE_PYMONGOCRYPT from pymongo.errors import ( @@ -631,7 +631,7 @@ def process_error(self, exception, spec): # Connection errors are considered client errors. if isinstance(error, ConnectionFailure): self.assertNotIsInstance(error, NotPrimaryError) - elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError)): + elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError, NoFile)): pass else: self.assertNotIsInstance(error, PyMongoError)