Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit 1cb8abf

Browse files
aogieremlaver
authored andcommitted
paginate by startkey (#437)
* startkey/bookmark pagination
1 parent cb44158 commit 1cb8abf

File tree

5 files changed

+139
-37
lines changed

5 files changed

+139
-37
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Unreleased
22

3+
- [IMPROVED] Updated `Result` iteration by paginating with views' `startkey` and
4+
queries' `bookmark`.
35
- [FIXED] Bug where document context manager performed remote save despite
46
uncaught exceptions being raised inside `with` block.
57
- [FIXED] Fixed parameter type of `selector` in docstring.

src/cloudant/result.py

Lines changed: 82 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"""
1616
API module for interacting with result collections.
1717
"""
18+
from functools import partial
1819
from ._2to3 import STRTYPE
1920
from .error import ResultException
2021
from ._common_util import py_to_couch_validate, type_or_none
@@ -327,16 +328,16 @@ def _handle_result_by_key_slice(self, key_slice):
327328
def __iter__(self):
328329
"""
329330
Provides iteration support, primarily for large data collections.
330-
The iterator uses the ``skip`` and ``limit`` options to consume
331-
data in chunks controlled by the ``page_size`` option. It retrieves
332-
a batch of data from the result collection and then yields each
333-
element.
331+
The iterator uses the ``startkey``, ``startkey_docid``, and ``limit``
332+
options to consume data in chunks controlled by the ``page_size``
333+
option. It retrieves a batch of data from the result collection
334+
and then yields each element.
334335
335336
See :class:`~cloudant.result.Result` for Result iteration examples.
336337
337338
:returns: Iterable data sequence
338339
"""
339-
invalid_options = ('skip', 'limit')
340+
invalid_options = ('limit', )
340341
if any(x in invalid_options for x in self.options):
341342
raise ResultException(103, invalid_options, self.options)
342343

@@ -347,21 +348,60 @@ def __iter__(self):
347348
except ValueError:
348349
raise ResultException(104, self._page_size)
349350

350-
skip = 0
351+
init_opts = {
352+
'skip': self.options.pop('skip', None),
353+
'startkey': self.options.pop('startkey', None)
354+
}
355+
356+
self._call = partial(self._ref, #pylint: disable=attribute-defined-outside-init
357+
limit=self._real_page_size,
358+
**self.options)
359+
360+
response = self._call(**{k: v
361+
for k, v
362+
in init_opts.items()
363+
if v is not None})
364+
365+
return self._iterator(response)
366+
367+
@property
368+
def _real_page_size(self):
369+
'''
370+
In views we paginate with N+1 items per page.
371+
https://docs.couchdb.org/en/stable/ddocs/views/pagination.html#paging-alternate-method
372+
'''
373+
return self._page_size + 1
374+
375+
def _iterator(self, response):
376+
'''
377+
Iterate through view data.
378+
'''
379+
351380
while True:
352-
response = self._ref(
353-
limit=self._page_size,
354-
skip=skip,
355-
**self.options
356-
)
357381
result = self._parse_data(response)
358-
skip += self._page_size
359382
if result:
383+
doc_count = len(result)
384+
last = result.pop()
360385
for row in result:
361386
yield row
362-
if len(result) < self._page_size:
387+
388+
# We expect doc_count = self._page_size + 1 results, if
389+
# we have self._page_size or less it means we are on the
390+
# last page and need to return the last result.
391+
if doc_count < self._real_page_size:
392+
yield last
363393
break
364394
del result
395+
396+
# if we are in a view, keys could be duplicate so we
397+
# need to start from the right docid
398+
if last['id']:
399+
response = self._call(startkey=last['key'],
400+
startkey_docid=last['id'])
401+
# reduce result keys are unique by definition
402+
else:
403+
response = self._call(startkey=last['key'])
404+
365405
else:
366406
break
367407

@@ -510,3 +550,32 @@ def _parse_data(self, data):
510550
query result JSON response content
511551
"""
512552
return data.get('docs', [])
553+
554+
@property
555+
def _real_page_size(self):
556+
'''
557+
During queries iteration page size is user-specified
558+
'''
559+
return self._page_size
560+
561+
def _iterator(self, response):
562+
'''
563+
Iterate through query data.
564+
'''
565+
566+
while True:
567+
result = self._parse_data(response)
568+
bookmark = response.get('bookmark')
569+
if result:
570+
for row in result:
571+
yield row
572+
573+
del result
574+
575+
if not bookmark:
576+
break
577+
578+
response = self._call(bookmark=bookmark)
579+
580+
else:
581+
break

tests/unit/query_result_tests.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -425,24 +425,14 @@ def test_get_item_index_slice_using_stop_only_limit_skip(self):
425425

426426
def test_iteration_with_invalid_options(self):
427427
"""
428-
Test that iteration raises an exception when "skip" and/or "limit" are
429-
used as options for the result.
428+
Test that iteration raises an exception when "limit" is
429+
used as option for the result.
430430
"""
431-
result = self.create_result(q_parms={'skip': 10})
432-
with self.assertRaises(ResultException) as cm:
433-
invalid_result = [row for row in result]
434-
self.assertEqual(cm.exception.status_code, 103)
435-
436431
result = self.create_result(q_parms={'limit': 10})
437432
with self.assertRaises(ResultException) as cm:
438433
invalid_result = [row for row in result]
439434
self.assertEqual(cm.exception.status_code, 103)
440435

441-
result = self.create_result(q_parms={'limit': 10, 'skip': 10})
442-
with self.assertRaises(ResultException) as cm:
443-
invalid_result = [row for row in result]
444-
self.assertEqual(cm.exception.status_code, 103)
445-
446436
def test_iteration_invalid_page_size(self):
447437
"""
448438
Test that iteration raises an exception when and invalid "page_size" is

tests/unit/result_tests.py

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,15 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15+
import mock
1516
"""
1617
result module - Unit tests for Result class
1718
"""
1819
import unittest
1920

2021
from cloudant.error import ResultException
2122
from cloudant.result import Result, ResultByKey
23+
from cloudant.view import View
2224
from nose.plugins.attrib import attr
2325
from requests.exceptions import HTTPError
2426

@@ -564,24 +566,14 @@ def test_get_item_key_slice_using_stop_only(self):
564566

565567
def test_iteration_with_invalid_options(self):
566568
"""
567-
Test that iteration raises an exception when "skip" and/or "limit" are
568-
used as options for the result.
569+
Test that iteration raises an exception when "limit" is
570+
used as option for the result.
569571
"""
570-
result = Result(self.view001, skip=10)
571-
with self.assertRaises(ResultException) as cm:
572-
invalid_result = [row for row in result]
573-
self.assertEqual(cm.exception.status_code, 103)
574-
575572
result = Result(self.view001, limit=10)
576573
with self.assertRaises(ResultException) as cm:
577574
invalid_result = [row for row in result]
578575
self.assertEqual(cm.exception.status_code, 103)
579576

580-
result = Result(self.view001, skip=10, limit=10)
581-
with self.assertRaises(ResultException) as cm:
582-
invalid_result = [row for row in result]
583-
self.assertEqual(cm.exception.status_code, 103)
584-
585577
def test_iteration_invalid_page_size(self):
586578
"""
587579
Test that iteration raises an exception when and invalid "page_size" is
@@ -643,5 +635,49 @@ def test_iteration_no_data(self):
643635
result = Result(self.view001, startkey='ruby')
644636
self.assertEqual([x for x in result], [])
645637

638+
def test_iteration_integer_keys(self):
639+
"""
640+
Test that iteration works as expected when keys are integer.
641+
"""
642+
result = Result(self.view007, page_size=10)
643+
self.assertEqual(len([x for x in result]), 100)
644+
645+
def test_iteration_pagination(self):
646+
"""
647+
Test that iteration pagination works as expected.
648+
"""
649+
650+
class CallMock:
651+
expected_calls = [
652+
{'limit': 28},
653+
{'limit': 28, 'startkey': 1, 'startkey_docid': 'julia027'},
654+
{'limit': 28, 'startkey': 1, 'startkey_docid': 'julia054'},
655+
{'limit': 28, 'startkey': 1, 'startkey_docid': 'julia081'},
656+
]
657+
658+
def __init__(self, outer):
659+
self.outer = outer
660+
self.expected_calls.reverse()
661+
662+
def call(self, *args, **kwargs):
663+
self.outer.assertEqual(dict(kwargs),
664+
self.expected_calls.pop(),
665+
'pagination error')
666+
return View.__call__(self.outer.view007, *args, **kwargs)
667+
668+
with mock.patch.object(self, 'view007',
669+
CallMock(self).call) as _:
670+
671+
result = Result(self.view007, page_size=27)
672+
673+
expected = [
674+
{'id': 'julia{0:03d}'.format(i),
675+
'key': 1,
676+
'value': 'julia'}
677+
for i in range(100)
678+
]
679+
self.assertEqual([x for x in result], expected)
680+
681+
646682
if __name__ == '__main__':
647683
unittest.main()

tests/unit/unit_t_db_base.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,13 +314,18 @@ def create_views(self):
314314
'function (doc) {\n emit([doc.name, doc.age], 1);\n}',
315315
'_count'
316316
)
317+
self.ddoc.add_view(
318+
'view007',
319+
'function (doc) {\n emit(1, doc.name);\n}'
320+
)
317321
self.ddoc.save()
318322
self.view001 = self.ddoc.get_view('view001')
319323
self.view002 = self.ddoc.get_view('view002')
320324
self.view003 = self.ddoc.get_view('view003')
321325
self.view004 = self.ddoc.get_view('view004')
322326
self.view005 = self.ddoc.get_view('view005')
323327
self.view006 = self.ddoc.get_view('view006')
328+
self.view007 = self.ddoc.get_view('view007')
324329

325330
def create_search_index(self):
326331
"""

0 commit comments

Comments
 (0)