From cd4b1dbecbe3d3406bc4d58129b810afb9e47cd8 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Mon, 24 Mar 2025 12:27:00 -0400 Subject: [PATCH 1/4] PYTHON-4946 - GridFS spec: Add performant 'rename all revisions by filename' feature - rename_by_name --- doc/changelog.rst | 2 + gridfs/asynchronous/grid_file.py | 30 +++ gridfs/synchronous/grid_file.py | 30 +++ test/asynchronous/unified_format.py | 5 +- test/gridfs/renameByName.json | 313 ++++++++++++++++++++++++++++ test/unified_format.py | 5 +- 6 files changed, 383 insertions(+), 2 deletions(-) create mode 100644 test/gridfs/renameByName.json diff --git a/doc/changelog.rst b/doc/changelog.rst index b172da6b8e..88034d0c6d 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -9,6 +9,8 @@ PyMongo 4.12 brings a number of changes including: - Support for configuring DEK cache lifetime via the ``key_expiration_ms`` argument to :class:`~pymongo.encryption_options.AutoEncryptionOpts`. - Support for $lookup in CSFLE and QE supported on MongoDB 8.1+. +- Added :meth:`gridfs.asynchronous.grid_file.AsyncGridFSBucket.rename_by_name` and :meth:`gridfs.grid_file.GridFSBucket.rename_by_name` + for more performant renaming of file revisions. Issues Resolved ............... diff --git a/gridfs/asynchronous/grid_file.py b/gridfs/asynchronous/grid_file.py index 3f3179c45c..567e109824 100644 --- a/gridfs/asynchronous/grid_file.py +++ b/gridfs/asynchronous/grid_file.py @@ -1021,6 +1021,36 @@ async def rename( "matched file_id %i" % (new_filename, file_id) ) + async def rename_by_name( + self, filename: str, new_filename: str, session: Optional[AsyncClientSession] = None + ) -> None: + """Renames the stored file with the specified filename. + + For example:: + + my_db = AsyncMongoClient().test + fs = AsyncGridFSBucket(my_db) + await fs.upload_from_stream("test_file", "data I want to store!") + await fs.rename_by_name("test_file", "new_test_name") + + Raises :exc:`~gridfs.errors.NoFile` if no file with file_id exists. + + :param filename: The filename of the file to be renamed. + :param new_filename: The new name of the file. + :param session: a + :class:`~pymongo.client_session.AsyncClientSession` + + .. versionadded:: 4.12 + """ + _disallow_transactions(session) + result = await self._files.update_many( + {"filename": filename}, {"$set": {"filename": new_filename}}, session=session + ) + if not result.matched_count: + raise NoFile( + f"no files could be renamed {new_filename} because none matched filename {filename}" + ) + class AsyncGridIn: """Class to write data to GridFS.""" diff --git a/gridfs/synchronous/grid_file.py b/gridfs/synchronous/grid_file.py index 35386857d6..78e1e6f8c5 100644 --- a/gridfs/synchronous/grid_file.py +++ b/gridfs/synchronous/grid_file.py @@ -1015,6 +1015,36 @@ def rename( "matched file_id %i" % (new_filename, file_id) ) + def rename_by_name( + self, filename: str, new_filename: str, session: Optional[ClientSession] = None + ) -> None: + """Renames the stored file with the specified filename. + + For example:: + + my_db = MongoClient().test + fs = GridFSBucket(my_db) + fs.upload_from_stream("test_file", "data I want to store!") + fs.rename_by_name("test_file", "new_test_name") + + Raises :exc:`~gridfs.errors.NoFile` if no file with file_id exists. + + :param filename: The filename of the file to be renamed. + :param new_filename: The new name of the file. + :param session: a + :class:`~pymongo.client_session.ClientSession` + + .. versionadded:: 4.12 + """ + _disallow_transactions(session) + result = self._files.update_many( + {"filename": filename}, {"$set": {"filename": new_filename}}, session=session + ) + if not result.matched_count: + raise NoFile( + f"no files could be renamed {new_filename} because none matched filename {filename}" + ) + class GridIn: """Class to write data to GridFS.""" diff --git a/test/asynchronous/unified_format.py b/test/asynchronous/unified_format.py index 886b31e4a6..3d63ced9d8 100644 --- a/test/asynchronous/unified_format.py +++ b/test/asynchronous/unified_format.py @@ -66,7 +66,7 @@ from bson import SON, json_util from bson.codec_options import DEFAULT_CODEC_OPTIONS from bson.objectid import ObjectId -from gridfs import AsyncGridFSBucket, GridOut +from gridfs import AsyncGridFSBucket, GridOut, NoFile from pymongo import ASCENDING, AsyncMongoClient, CursorType, _csot from pymongo.asynchronous.change_stream import AsyncChangeStream from pymongo.asynchronous.client_session import AsyncClientSession, TransactionOptions, _TxnState @@ -630,6 +630,9 @@ def process_error(self, exception, spec): self.assertNotIsInstance(error, NotPrimaryError) elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError)): pass + # gridfs NoFile errors are considered client errors. + elif isinstance(error, NoFile): + pass else: self.assertNotIsInstance(error, PyMongoError) diff --git a/test/gridfs/renameByName.json b/test/gridfs/renameByName.json new file mode 100644 index 0000000000..26f04fb9e0 --- /dev/null +++ b/test/gridfs/renameByName.json @@ -0,0 +1,313 @@ +{ + "description": "gridfs-renameByName", + "schemaVersion": "1.0", + "createEntities": [ + { + "client": { + "id": "client0" + } + }, + { + "database": { + "id": "database0", + "client": "client0", + "databaseName": "gridfs-tests" + } + }, + { + "bucket": { + "id": "bucket0", + "database": "database0" + } + }, + { + "collection": { + "id": "bucket0_files_collection", + "database": "database0", + "collectionName": "fs.files" + } + }, + { + "collection": { + "id": "bucket0_chunks_collection", + "database": "database0", + "collectionName": "fs.chunks" + } + } + ], + "initialData": [ + { + "collectionName": "fs.files", + "databaseName": "gridfs-tests", + "documents": [ + { + "_id": { + "$oid": "000000000000000000000001" + }, + "length": 0, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "filename", + "metadata": {} + }, + { + "_id": { + "$oid": "000000000000000000000002" + }, + "length": 0, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "filename", + "metadata": {} + }, + { + "_id": { + "$oid": "000000000000000000000003" + }, + "length": 2, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "filename", + "metadata": {} + }, + { + "_id": { + "$oid": "000000000000000000000004" + }, + "length": 8, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "otherfilename", + "metadata": {} + } + ] + }, + { + "collectionName": "fs.chunks", + "databaseName": "gridfs-tests", + "documents": [ + { + "_id": { + "$oid": "000000000000000000000001" + }, + "files_id": { + "$oid": "000000000000000000000002" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + }, + { + "_id": { + "$oid": "000000000000000000000002" + }, + "files_id": { + "$oid": "000000000000000000000003" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + }, + { + "_id": { + "$oid": "000000000000000000000003" + }, + "files_id": { + "$oid": "000000000000000000000004" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + }, + { + "_id": { + "$oid": "000000000000000000000004" + }, + "files_id": { + "$oid": "000000000000000000000004" + }, + "n": 1, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + } + ] + } + ], + "tests": [ + { + "description": "rename when multiple revisions of the file exist", + "operations": [ + { + "name": "renameByName", + "object": "bucket0", + "arguments": { + "filename": "filename", + "newFilename": "newfilename" + } + } + ], + "outcome": [ + { + "collectionName": "fs.files", + "databaseName": "gridfs-tests", + "documents": [ + { + "_id": { + "$oid": "000000000000000000000001" + }, + "length": 0, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "newfilename", + "metadata": {} + }, + { + "_id": { + "$oid": "000000000000000000000002" + }, + "length": 0, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "newfilename", + "metadata": {} + }, + { + "_id": { + "$oid": "000000000000000000000003" + }, + "length": 2, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "newfilename", + "metadata": {} + }, + { + "_id": { + "$oid": "000000000000000000000004" + }, + "length": 8, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "otherfilename", + "metadata": {} + } + ] + }, + { + "collectionName": "fs.chunks", + "databaseName": "gridfs-tests", + "documents": [ + { + "_id": { + "$oid": "000000000000000000000001" + }, + "files_id": { + "$oid": "000000000000000000000002" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + }, + { + "_id": { + "$oid": "000000000000000000000002" + }, + "files_id": { + "$oid": "000000000000000000000003" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + }, + { + "_id": { + "$oid": "000000000000000000000003" + }, + "files_id": { + "$oid": "000000000000000000000004" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + }, + { + "_id": { + "$oid": "000000000000000000000004" + }, + "files_id": { + "$oid": "000000000000000000000004" + }, + "n": 1, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + } + ] + } + ] + }, + { + "description": "rename when file name does not exist", + "operations": [ + { + "name": "renameByName", + "object": "bucket0", + "arguments": { + "filename": "missing-file", + "newFilename": "newfilename" + }, + "expectError": { + "isClientError": true + } + } + ] + } + ] +} diff --git a/test/unified_format.py b/test/unified_format.py index 471a067bee..2ff24c737b 100644 --- a/test/unified_format.py +++ b/test/unified_format.py @@ -65,7 +65,7 @@ from bson import SON, json_util from bson.codec_options import DEFAULT_CODEC_OPTIONS from bson.objectid import ObjectId -from gridfs import GridFSBucket, GridOut +from gridfs import GridFSBucket, GridOut, NoFile from pymongo import ASCENDING, CursorType, MongoClient, _csot from pymongo.encryption_options import _HAVE_PYMONGOCRYPT from pymongo.errors import ( @@ -629,6 +629,9 @@ def process_error(self, exception, spec): self.assertNotIsInstance(error, NotPrimaryError) elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError)): pass + # gridfs NoFile errors are considered client errors. + elif isinstance(error, NoFile): + pass else: self.assertNotIsInstance(error, PyMongoError) From 2103d6713e99231a9c3fbb9035a0cfd3ca0b5762 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 25 Mar 2025 09:34:00 -0400 Subject: [PATCH 2/4] Address review --- gridfs/asynchronous/grid_file.py | 7 +++---- gridfs/synchronous/grid_file.py | 7 +++---- test/asynchronous/test_gridfs_bucket.py | 13 +++++++++++++ test/asynchronous/unified_format.py | 5 +---- test/test_gridfs_bucket.py | 9 +++++++++ test/unified_format.py | 5 +---- 6 files changed, 30 insertions(+), 16 deletions(-) diff --git a/gridfs/asynchronous/grid_file.py b/gridfs/asynchronous/grid_file.py index 567e109824..7dadbe90a6 100644 --- a/gridfs/asynchronous/grid_file.py +++ b/gridfs/asynchronous/grid_file.py @@ -1033,12 +1033,11 @@ async def rename_by_name( await fs.upload_from_stream("test_file", "data I want to store!") await fs.rename_by_name("test_file", "new_test_name") - Raises :exc:`~gridfs.errors.NoFile` if no file with file_id exists. + Raises :exc:`~gridfs.errors.NoFile` if no file with the given filename exists. :param filename: The filename of the file to be renamed. :param new_filename: The new name of the file. - :param session: a - :class:`~pymongo.client_session.AsyncClientSession` + :param session: a :class:`~pymongo.client_session.AsyncClientSession` .. versionadded:: 4.12 """ @@ -1048,7 +1047,7 @@ async def rename_by_name( ) if not result.matched_count: raise NoFile( - f"no files could be renamed {new_filename} because none matched filename {filename}" + f"no files could be renamed {new_filename!r} because none matched filename {filename!r}" ) diff --git a/gridfs/synchronous/grid_file.py b/gridfs/synchronous/grid_file.py index 78e1e6f8c5..667a8a496a 100644 --- a/gridfs/synchronous/grid_file.py +++ b/gridfs/synchronous/grid_file.py @@ -1027,12 +1027,11 @@ def rename_by_name( fs.upload_from_stream("test_file", "data I want to store!") fs.rename_by_name("test_file", "new_test_name") - Raises :exc:`~gridfs.errors.NoFile` if no file with file_id exists. + Raises :exc:`~gridfs.errors.NoFile` if no file with the given filename exists. :param filename: The filename of the file to be renamed. :param new_filename: The new name of the file. - :param session: a - :class:`~pymongo.client_session.ClientSession` + :param session: a :class:`~pymongo.client_session.ClientSession` .. versionadded:: 4.12 """ @@ -1042,7 +1041,7 @@ def rename_by_name( ) if not result.matched_count: raise NoFile( - f"no files could be renamed {new_filename} because none matched filename {filename}" + f"no files could be renamed {new_filename!r} because none matched filename {filename!r}" ) diff --git a/test/asynchronous/test_gridfs_bucket.py b/test/asynchronous/test_gridfs_bucket.py index 29877ee9c4..0950d1415b 100644 --- a/test/asynchronous/test_gridfs_bucket.py +++ b/test/asynchronous/test_gridfs_bucket.py @@ -439,6 +439,19 @@ async def test_rename(self): b"testing", await (await self.fs.open_download_stream_by_name("second_name")).read() ) + async def test_rename_by_name(self): + _id = await self.fs.upload_from_stream("first_name", b"testing") + self.assertEqual( + b"testing", await (await self.fs.open_download_stream_by_name("first_name")).read() + ) + + await self.fs.rename_by_name("first_name", "second_name") + with self.assertRaises(NoFile): + await self.fs.open_download_stream_by_name("first_name") + self.assertEqual( + b"testing", await (await self.fs.open_download_stream_by_name("second_name")).read() + ) + @patch("gridfs.asynchronous.grid_file._UPLOAD_BUFFER_SIZE", 5) async def test_abort(self): gin = self.fs.open_upload_stream("test_filename", chunk_size_bytes=5) diff --git a/test/asynchronous/unified_format.py b/test/asynchronous/unified_format.py index 3d63ced9d8..5b784e7881 100644 --- a/test/asynchronous/unified_format.py +++ b/test/asynchronous/unified_format.py @@ -628,10 +628,7 @@ def process_error(self, exception, spec): # Connection errors are considered client errors. if isinstance(error, ConnectionFailure): self.assertNotIsInstance(error, NotPrimaryError) - elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError)): - pass - # gridfs NoFile errors are considered client errors. - elif isinstance(error, NoFile): + elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError, NoFile)): pass else: self.assertNotIsInstance(error, PyMongoError) diff --git a/test/test_gridfs_bucket.py b/test/test_gridfs_bucket.py index d68c9f6ba2..1365b0b30a 100644 --- a/test/test_gridfs_bucket.py +++ b/test/test_gridfs_bucket.py @@ -413,6 +413,15 @@ def test_rename(self): self.fs.open_download_stream_by_name("first_name") self.assertEqual(b"testing", (self.fs.open_download_stream_by_name("second_name")).read()) + def test_rename_by_name(self): + _id = self.fs.upload_from_stream("first_name", b"testing") + self.assertEqual(b"testing", (self.fs.open_download_stream_by_name("first_name")).read()) + + self.fs.rename_by_name("first_name", "second_name") + with self.assertRaises(NoFile): + self.fs.open_download_stream_by_name("first_name") + self.assertEqual(b"testing", (self.fs.open_download_stream_by_name("second_name")).read()) + @patch("gridfs.synchronous.grid_file._UPLOAD_BUFFER_SIZE", 5) def test_abort(self): gin = self.fs.open_upload_stream("test_filename", chunk_size_bytes=5) diff --git a/test/unified_format.py b/test/unified_format.py index 2ff24c737b..797bacab9e 100644 --- a/test/unified_format.py +++ b/test/unified_format.py @@ -627,10 +627,7 @@ def process_error(self, exception, spec): # Connection errors are considered client errors. if isinstance(error, ConnectionFailure): self.assertNotIsInstance(error, NotPrimaryError) - elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError)): - pass - # gridfs NoFile errors are considered client errors. - elif isinstance(error, NoFile): + elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError, NoFile)): pass else: self.assertNotIsInstance(error, PyMongoError) From 12181761a4ce04de22d606c70601ed8c1606b44a Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Wed, 26 Mar 2025 15:00:55 -0400 Subject: [PATCH 3/4] Adjust changelog wording --- doc/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/changelog.rst b/doc/changelog.rst index 777eedb35a..81921ed4be 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -10,7 +10,7 @@ PyMongo 4.12 brings a number of changes including: :class:`~pymongo.encryption_options.AutoEncryptionOpts`. - Support for $lookup in CSFLE and QE supported on MongoDB 8.1+. - Added :meth:`gridfs.asynchronous.grid_file.AsyncGridFSBucket.rename_by_name` and :meth:`gridfs.grid_file.GridFSBucket.rename_by_name` - for more performant renaming of file revisions. + for more performant renaming of a file with multiple revisions. - AsyncMongoClient no longer performs DNS resolution for "mongodb+srv://" connection strings on creation. To avoid blocking the asyncio loop, the resolution is now deferred until the client is first connected. - Added index hinting support to the From 9c0bc9f50d3611a1b73b407ac8efa944e7a7b310 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Thu, 27 Mar 2025 15:48:09 -0400 Subject: [PATCH 4/4] Address review --- test/asynchronous/test_session.py | 1 + test/asynchronous/test_transactions.py | 7 +++++++ test/test_session.py | 1 + test/test_transactions.py | 7 +++++++ 4 files changed, 16 insertions(+) diff --git a/test/asynchronous/test_session.py b/test/asynchronous/test_session.py index 4431cbcb16..c46ffc020c 100644 --- a/test/asynchronous/test_session.py +++ b/test/asynchronous/test_session.py @@ -541,6 +541,7 @@ async def find(session=None): (bucket.download_to_stream_by_name, ["f", sio], {}), (find, [], {}), (bucket.rename, [1, "f2"], {}), + (bucket.rename_by_name, ["f2", "f3"], {}), # Delete both files so _test_ops can run these operations twice. (bucket.delete, [1], {}), (bucket.delete, [2], {}), diff --git a/test/asynchronous/test_transactions.py b/test/asynchronous/test_transactions.py index 884110cd45..3fdf799ce4 100644 --- a/test/asynchronous/test_transactions.py +++ b/test/asynchronous/test_transactions.py @@ -295,6 +295,13 @@ async def gridfs_open_upload_stream(*args, **kwargs): "new-name", ), ), + ( + bucket.rename_by_name, + ( + "new-name", + "new-name2", + ), + ), ] async with client.start_session() as s, await s.start_transaction(): diff --git a/test/test_session.py b/test/test_session.py index 905539a1f8..357e846588 100644 --- a/test/test_session.py +++ b/test/test_session.py @@ -541,6 +541,7 @@ def find(session=None): (bucket.download_to_stream_by_name, ["f", sio], {}), (find, [], {}), (bucket.rename, [1, "f2"], {}), + (bucket.rename_by_name, ["f2", "f3"], {}), # Delete both files so _test_ops can run these operations twice. (bucket.delete, [1], {}), (bucket.delete, [2], {}), diff --git a/test/test_transactions.py b/test/test_transactions.py index 80b3e3765e..2cbb959d79 100644 --- a/test/test_transactions.py +++ b/test/test_transactions.py @@ -287,6 +287,13 @@ def gridfs_open_upload_stream(*args, **kwargs): "new-name", ), ), + ( + bucket.rename_by_name, + ( + "new-name", + "new-name2", + ), + ), ] with client.start_session() as s, s.start_transaction():