Skip to content

Commit 00a02fb

Browse files
committed
Use a fixed size cache to store sha sums
1 parent 794c9d4 commit 00a02fb

File tree

1 file changed

+16
-19
lines changed

1 file changed

+16
-19
lines changed

flask_pymongo/__init__.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
__all__ = ("PyMongo", "ASCENDING", "DESCENDING", "BSONObjectIdConverter", "BSONProvider")
2828

2929
import hashlib
30+
from collections import OrderedDict
3031
from mimetypes import guess_type
3132
from typing import Any
3233

@@ -66,6 +67,8 @@ def __init__(
6667
) -> None:
6768
self.cx: MongoClient | None = None
6869
self.db: Database | None = None
70+
self._hash_cache = OrderedDict()
71+
self._hash_limit = 128
6972

7073
if app is not None:
7174
self.init_app(app, uri, *args, **kwargs)
@@ -184,10 +187,18 @@ def get_upload(filename):
184187
response.content_length = fileobj.length
185188
response.last_modified = fileobj.upload_date
186189

187-
# Get or compute the sha1 sum for the etag.
188-
metadata = fileobj.metadata
189-
sha1_sum = metadata and metadata.get("sha1_sum")
190-
sha1_sum = sha1_sum or self._compute_sha(fileobj)
190+
# GridFS does not manage its own checksum, so we manage our own using its
191+
# metadata storage, to be used for the etag.
192+
sha1_sum = self._hash_cache.get(str(fileobj._id))
193+
if sha1_sum is None:
194+
# Compute the checksum of the file for the etag.
195+
pos = fileobj.tell()
196+
raw_data = fileobj.read()
197+
fileobj.seek(pos)
198+
sha1_sum = hashlib.sha1(raw_data).hexdigest()
199+
while len(self._hash_cache) >= self._hash_limit:
200+
self._hash_cache.popitem()
201+
self._hash_cache[str(fileobj._id)] = sha1_sum
191202
response.set_etag(sha1_sum)
192203

193204
response.cache_control.max_age = cache_for
@@ -238,19 +249,5 @@ def save_upload(filename):
238249
db_obj = self.db
239250
assert db_obj is not None, "Please initialize the app before calling save_file!"
240251
storage = GridFS(db_obj, base)
241-
242-
# GridFS does not manage its own hash, so we manage our own using its
243-
# metadata storage, to be used for the etag.
244-
sha1_sum = self._compute_sha(fileobj)
245-
metadata = dict(sha1_sum=sha1_sum)
246-
id = storage.put(
247-
fileobj, filename=filename, content_type=content_type, metadata=metadata, **kwargs
248-
)
252+
id = storage.put(fileobj, filename=filename, content_type=content_type, **kwargs)
249253
return id
250-
251-
def _compute_sha(self, fileobj: Any) -> str:
252-
"""Compute the sha sum of a file object."""
253-
pos = fileobj.tell()
254-
raw_data = fileobj.read()
255-
fileobj.seek(pos)
256-
return hashlib.sha1(raw_data).hexdigest()

0 commit comments

Comments
 (0)