Skip to content

Commit 4ae93c4

Browse files
authored
PYTHON-1552 Prevent uploading partial or corrupt GridFS files after an error occurs
1 parent 922e63d commit 4ae93c4

File tree

5 files changed

+41
-27
lines changed

5 files changed

+41
-27
lines changed

doc/migrate-to-pymongo4.rst

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -879,12 +879,11 @@ and store it with other file metadata. For example::
879879
import hashlib
880880
my_db = MongoClient().test
881881
fs = GridFSBucket(my_db)
882-
grid_in = fs.open_upload_stream("test_file")
883-
file_data = b'...'
884-
sha356 = hashlib.sha256(file_data).hexdigest()
885-
grid_in.write(file_data)
886-
grid_in.sha356 = sha356 # Set the custom 'sha356' field
887-
grid_in.close()
882+
with fs.open_upload_stream("test_file") as grid_in:
883+
file_data = b'...'
884+
sha356 = hashlib.sha256(file_data).hexdigest()
885+
grid_in.write(file_data)
886+
grid_in.sha356 = sha356 # Set the custom 'sha356' field
888887

889888
Note that for large files, the checksum may need to be computed in chunks
890889
to avoid the excessive memory needed to load the entire file at once.

gridfs/__init__.py

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,8 @@ def put(self, data: Any, **kwargs: Any) -> Any:
109109
110110
Equivalent to doing::
111111
112-
try:
113-
f = new_file(**kwargs)
112+
with fs.new_file(**kwargs) as f:
114113
f.write(data)
115-
finally:
116-
f.close()
117114
118115
`data` can be either an instance of :class:`bytes` or a file-like
119116
object providing a :meth:`read` method. If an `encoding` keyword
@@ -134,13 +131,10 @@ def put(self, data: Any, **kwargs: Any) -> Any:
134131
.. versionchanged:: 3.0
135132
w=0 writes to GridFS are now prohibited.
136133
"""
137-
grid_file = GridIn(self.__collection, **kwargs)
138-
try:
139-
grid_file.write(data)
140-
finally:
141-
grid_file.close()
142134

143-
return grid_file._id
135+
with GridIn(self.__collection, **kwargs) as grid_file:
136+
grid_file.write(data)
137+
return grid_file._id
144138

145139
def get(self, file_id: Any, session: Optional[ClientSession] = None) -> GridOut:
146140
"""Get a file from GridFS by ``"_id"``.
@@ -528,11 +522,11 @@ def open_upload_stream(
528522
529523
my_db = MongoClient().test
530524
fs = GridFSBucket(my_db)
531-
grid_in = fs.open_upload_stream(
525+
with fs.open_upload_stream(
532526
"test_file", chunk_size_bytes=4,
533-
metadata={"contentType": "text/plain"})
534-
grid_in.write("data I want to store!")
535-
grid_in.close() # uploaded on close
527+
metadata={"contentType": "text/plain"}) as grid_in:
528+
grid_in.write("data I want to store!")
529+
# uploaded on close
536530
537531
Returns an instance of :class:`~gridfs.grid_file.GridIn`.
538532
@@ -584,13 +578,13 @@ def open_upload_stream_with_id(
584578
585579
my_db = MongoClient().test
586580
fs = GridFSBucket(my_db)
587-
grid_in = fs.open_upload_stream_with_id(
581+
with fs.open_upload_stream_with_id(
588582
ObjectId(),
589583
"test_file",
590584
chunk_size_bytes=4,
591-
metadata={"contentType": "text/plain"})
592-
grid_in.write("data I want to store!")
593-
grid_in.close() # uploaded on close
585+
metadata={"contentType": "text/plain"}) as grid_in:
586+
grid_in.write("data I want to store!")
587+
# uploaded on close
594588
595589
Returns an instance of :class:`~gridfs.grid_file.GridIn`.
596590

gridfs/grid_file.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,9 +396,14 @@ def __enter__(self) -> "GridIn":
396396
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Any:
397397
"""Support for the context manager protocol.
398398
399-
Close the file and allow exceptions to propagate.
399+
Close the file if no exceptions occur and allow exceptions to propagate.
400400
"""
401-
self.close()
401+
if exc_type is None:
402+
# No exceptions happened.
403+
self.close()
404+
else:
405+
# Something happened, at minimum mark as closed.
406+
object.__setattr__(self, "_closed", True)
402407

403408
# propagate exceptions
404409
return False

test/test_grid_file.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,22 @@ def test_context_manager(self):
675675
with GridOut(self.db.fs, infile._id) as outfile:
676676
self.assertEqual(contents, outfile.read())
677677

678+
def test_exception_file_non_existence(self):
679+
contents = b"Imagine this is some important data..."
680+
681+
with self.assertRaises(ConnectionError):
682+
with GridIn(self.db.fs, filename="important") as infile:
683+
infile.write(contents)
684+
raise ConnectionError("Test exception")
685+
686+
# Expectation: File chunks are written, entry in files doesn't appear.
687+
self.assertEqual(
688+
self.db.fs.chunks.count_documents({"files_id": infile._id}), infile._chunk_number
689+
)
690+
691+
self.assertIsNone(self.db.fs.files.find_one({"_id": infile._id}))
692+
self.assertTrue(infile.closed)
693+
678694
def test_prechunked_string(self):
679695
def write_me(s, chunk_size):
680696
buf = BytesIO(s)

test/test_gridfs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@ def test_gridfs_secondary_lazy(self):
540540

541541
# Connects, doesn't create index.
542542
self.assertRaises(NoFile, fs.get_last_version)
543-
self.assertRaises(NotPrimaryError, fs.put, "data")
543+
self.assertRaises(NotPrimaryError, fs.put, "data", encoding="utf-8")
544544

545545

546546
if __name__ == "__main__":

0 commit comments

Comments
 (0)