|
27 | 27 | __all__ = ("PyMongo", "ASCENDING", "DESCENDING", "BSONObjectIdConverter", "BSONProvider")
|
28 | 28 |
|
29 | 29 | import hashlib
|
| 30 | +from collections import OrderedDict |
30 | 31 | from mimetypes import guess_type
|
31 | 32 | from typing import Any
|
32 | 33 |
|
@@ -66,6 +67,8 @@ def __init__(
|
66 | 67 | ) -> None:
|
67 | 68 | self.cx: MongoClient | None = None
|
68 | 69 | self.db: Database | None = None
|
| 70 | + self._hash_cache = OrderedDict() |
| 71 | + self._hash_limit = 128 |
69 | 72 |
|
70 | 73 | if app is not None:
|
71 | 74 | self.init_app(app, uri, *args, **kwargs)
|
@@ -184,10 +187,18 @@ def get_upload(filename):
|
184 | 187 | response.content_length = fileobj.length
|
185 | 188 | response.last_modified = fileobj.upload_date
|
186 | 189 |
|
187 |
| - # Get or compute the sha1 sum for the etag. |
188 |
| - metadata = fileobj.metadata |
189 |
| - sha1_sum = metadata and metadata.get("sha1_sum") |
190 |
| - sha1_sum = sha1_sum or self._compute_sha(fileobj) |
| 190 | + # GridFS does not manage its own checksum, so we manage our own using its |
| 191 | + # metadata storage, to be used for the etag. |
| 192 | + sha1_sum = self._hash_cache.get(str(fileobj._id)) |
| 193 | + if sha1_sum is None: |
| 194 | + # Compute the checksum of the file for the etag. |
| 195 | + pos = fileobj.tell() |
| 196 | + raw_data = fileobj.read() |
| 197 | + fileobj.seek(pos) |
| 198 | + sha1_sum = hashlib.sha1(raw_data).hexdigest() |
| 199 | + while len(self._hash_cache) >= self._hash_limit: |
| 200 | + self._hash_cache.popitem() |
| 201 | + self._hash_cache[str(fileobj._id)] = sha1_sum |
191 | 202 | response.set_etag(sha1_sum)
|
192 | 203 |
|
193 | 204 | response.cache_control.max_age = cache_for
|
@@ -238,19 +249,5 @@ def save_upload(filename):
|
238 | 249 | db_obj = self.db
|
239 | 250 | assert db_obj is not None, "Please initialize the app before calling save_file!"
|
240 | 251 | storage = GridFS(db_obj, base)
|
241 |
| - |
242 |
| - # GridFS does not manage its own hash, so we manage our own using its |
243 |
| - # metadata storage, to be used for the etag. |
244 |
| - sha1_sum = self._compute_sha(fileobj) |
245 |
| - metadata = dict(sha1_sum=sha1_sum) |
246 |
| - id = storage.put( |
247 |
| - fileobj, filename=filename, content_type=content_type, metadata=metadata, **kwargs |
248 |
| - ) |
| 252 | + id = storage.put(fileobj, filename=filename, content_type=content_type, **kwargs) |
249 | 253 | return id
|
250 |
| - |
251 |
| - def _compute_sha(self, fileobj: Any) -> str: |
252 |
| - """Compute the sha sum of a file object.""" |
253 |
| - pos = fileobj.tell() |
254 |
| - raw_data = fileobj.read() |
255 |
| - fileobj.seek(pos) |
256 |
| - return hashlib.sha1(raw_data).hexdigest() |
0 commit comments