Skip to content

Commit 4534bd7

Browse files
committed
Remove use of GET_ITERATOR_CHUNK_SIZE in favor of MongoDB native document fetching policy
1 parent 6b5d00c commit 4534bd7

File tree

4 files changed

+36
-11
lines changed

4 files changed

+36
-11
lines changed

django_mongodb_backend/compiler.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import itertools
2+
import sys
23
from collections import defaultdict
34

45
from bson import SON, json_util
@@ -11,13 +12,16 @@
1112
from django.db.models.functions.math import Power
1213
from django.db.models.lookups import IsNull
1314
from django.db.models.sql import compiler
14-
from django.db.models.sql.constants import GET_ITERATOR_CHUNK_SIZE, MULTI, SINGLE
15+
from django.db.models.sql.constants import MULTI, SINGLE
1516
from django.db.models.sql.datastructures import BaseTable
1617
from django.utils.functional import cached_property
1718
from pymongo import ASCENDING, DESCENDING
1819

1920
from .query import MongoQuery, wrap_database_errors
2021

22+
# Maximum document batch size for MongoDB cursor responses.
23+
MAX_BATCH_SIZE_MB = 1024 * 1024 * 16
24+
2125

2226
class SQLCompiler(compiler.SQLCompiler):
2327
"""Base class for all Mongo compilers."""
@@ -235,9 +239,7 @@ def pre_sql_setup(self, with_col_aliases=False):
235239
self.order_by_objs = [expr.replace_expressions(all_replacements) for expr, _ in order_by]
236240
return extra_select, order_by, group_by
237241

238-
def execute_sql(
239-
self, result_type=MULTI, chunked_fetch=False, chunk_size=GET_ITERATOR_CHUNK_SIZE
240-
):
242+
def execute_sql(self, result_type=MULTI, chunked_fetch=False, chunk_size=None):
241243
self.pre_sql_setup()
242244
try:
243245
query = self.build_query(
@@ -258,7 +260,8 @@ def execute_sql(
258260
else:
259261
return self._make_result(obj, self.columns)
260262
# result_type is MULTI
261-
cursor.batch_size(chunk_size)
263+
# if chunk_size is not None:
264+
# cursor.batch_size(chunk_size)
262265
result = self.cursor_iter(cursor, chunk_size, self.columns)
263266
if not chunked_fetch:
264267
# If using non-chunked reads, read data into memory.
@@ -270,7 +273,7 @@ def results_iter(
270273
results=None,
271274
tuple_expected=False,
272275
chunked_fetch=False,
273-
chunk_size=GET_ITERATOR_CHUNK_SIZE,
276+
chunk_size=None,
274277
):
275278
"""
276279
Return an iterator over the results from executing query given
@@ -318,12 +321,25 @@ def _make_result(self, entity, columns):
318321
result.append(obj.get(name))
319322
return result
320323

321-
def cursor_iter(self, cursor, chunk_size, columns):
322-
"""Yield chunks of results from cursor."""
324+
def cursor_iter(self, cursor, _, columns):
325+
"""
326+
Yield chunks of results from cursor.
327+
MongoDB ignores all chunk_size overrides. Cursor iteration abides by
328+
MongoDB's default cursor batch size response.
329+
Read more here: https://www.mongodb.com/docs/manual/core/cursors/#cursor-batches
330+
"""
323331
chunk = []
324-
for row in cursor:
332+
chunk_size = 101 # MongoDB's default initial batch size
333+
334+
for i, row in enumerate(cursor):
325335
chunk.append(self._make_result(row, columns))
336+
326337
if len(chunk) == chunk_size:
338+
if i == chunk_size - 1: # First chunk complete
339+
# Using current row as representation, approximate
340+
# how many rows can fit in a 16MB payload (MongoDB batch_size max)
341+
# then set that as the new chunk size.
342+
chunk_size = MAX_BATCH_SIZE_MB // sys.getsizeof(row)
327343
yield chunk
328344
chunk = []
329345
yield chunk

django_mongodb_backend/query.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ def get_cursor(self):
7676
"""
7777
self.compiler.connection.validate_no_broken_transaction()
7878
return self.compiler.collection.aggregate(
79-
self.get_pipeline(), session=self.compiler.connection.session
79+
self.get_pipeline(),
80+
session=self.compiler.connection.session,
8081
)
8182

8283
def get_pipeline(self):

docs/source/releases/5.1.x.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ Django MongoDB Backend 5.1.x
1111
the ``base_field`` uses a database converter.
1212
- Fixed ``RecursionError`` when using ``Trunc`` database functions on non-MongoDB
1313
databases.
14+
- Removed ``GET_ITERATOR_CHUNK_SIZE`` as default from
15+
:meth:`SQLCompiler.execute_sql() <django_mongodb_backend.compiler.SQLCompiler.execute_sql>`.
16+
Cursor iteration will now use MongoDB default batch sizing.
17+
1418

1519
5.1.0 beta 3
1620
============

docs/source/releases/5.2.x.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,16 @@ New features
1818
instance that may be of more than one model class.
1919

2020
Bug fixes
21-
---------
21+
--------
2222

2323
- Fixed ``RecursionError`` when using ``Trunc`` database functions on non-MongoDB
2424
databases.
2525
- :meth:`QuerySet.explain() <django.db.models.query.QuerySet.explain>` now
2626
:ref:`returns a string that can be parsed as JSON <queryset-explain>`.
27+
- Removed ``GET_ITERATOR_CHUNK_SIZE`` as default from
28+
:meth:`SQLCompiler.execute_sql() <django_mongodb_backend.compiler.SQLCompiler.execute_sql>`.
29+
Cursor iteration will now use MongoDB default batch sizing.
30+
2731

2832
5.2.0 beta 1
2933
============

0 commit comments

Comments
 (0)