Skip to content

Commit 1d8ee60

Browse files
authored
Merge pull request #212 from josephroque/marker-based-pagination
Limit/Offset and Marker Based pagination
2 parents 0979c2a + fdf5004 commit 1d8ee60

12 files changed

+957
-0
lines changed

boxsdk/object/folder.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from boxsdk.object.group import Group
1010
from boxsdk.object.item import Item
1111
from boxsdk.object.user import User
12+
from boxsdk.pagination.limit_offset_based_object_collection import LimitOffsetBasedObjectCollection
13+
from boxsdk.pagination.marker_based_object_collection import MarkerBasedObjectCollection
1214
from boxsdk.util.api_call_decorator import api_call
1315
from boxsdk.util.text_enum import TextEnum
1416

@@ -154,6 +156,69 @@ def get_items(self, limit, offset=0, fields=None):
154156
response = box_response.json()
155157
return [self.translator.translate(item['type'])(self._session, item['id'], item) for item in response['entries']]
156158

159+
@api_call
160+
def get_items_limit_offset(self, limit=None, offset=0, fields=None):
161+
"""
162+
Get the items in a folder using limit-offset paging.
163+
164+
:param limit:
165+
The maximum number of items to return per page. If not specified, then will use the server-side default.
166+
:type limit:
167+
`int` or None
168+
:param offset:
169+
The index at which to start returning items.
170+
:type offset:
171+
`int`
172+
:param fields:
173+
List of fields to request.
174+
:type fields:
175+
`Iterable` of `unicode`
176+
:returns:
177+
An iterator of the items in the folder.
178+
:rtype:
179+
:class:`BoxObjectCollection`
180+
"""
181+
return LimitOffsetBasedObjectCollection(
182+
self.session,
183+
self.get_url('items'),
184+
limit=limit,
185+
fields=fields,
186+
offset=offset,
187+
return_full_pages=False,
188+
)
189+
190+
@api_call
191+
def get_items_marker(self, limit=None, marker=None, fields=None):
192+
"""
193+
Get the items in a folder using marker-based paging.
194+
195+
:param limit:
196+
The maximum number of items to return per page. If not specified, then will use the server-side default.
197+
:type limit:
198+
`int` or None
199+
:param marker:
200+
The offset index to start paging from.
201+
:type marker:
202+
`str` or None
203+
:param fields:
204+
List of fields to request.
205+
:type fields:
206+
`Iterable` of `unicode`
207+
:returns:
208+
An iterator of the items in the folder.
209+
:rtype:
210+
:class:`BoxObjectCollection`
211+
"""
212+
return MarkerBasedObjectCollection(
213+
self.session,
214+
self.get_url('items'),
215+
limit=limit,
216+
fields=fields,
217+
marker=marker,
218+
return_full_pages=False,
219+
supports_limit_offset_paging=True,
220+
)
221+
157222
@api_call
158223
def upload_stream(
159224
self,

boxsdk/pagination/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# coding: utf-8
2+
3+
from __future__ import unicode_literals
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
# coding: utf-8
2+
3+
from __future__ import unicode_literals
4+
5+
from abc import ABCMeta, abstractmethod
6+
import collections
7+
8+
from six import add_metaclass
9+
10+
from boxsdk.pagination.page import Page
11+
12+
13+
@add_metaclass(ABCMeta)
14+
class BoxObjectCollection(collections.Iterator, object):
15+
"""
16+
An iterator that represents a collection of Box objects (BaseObject).
17+
18+
A BoxObjectCollection instance contains everything it needs in order to retrieve and page through
19+
responses from Box API endpoints that return collections of Box objects.
20+
21+
This class only has two public methods:
22+
23+
1). next(), which returns either a Page (sequence of BaseObjects) or individual BaseObjects based on
24+
the constructor argument 'return_full_pages'.
25+
26+
2). next_pointer(), which returns the pointer (either an offset or a marker, based on the endpoint) that
27+
will be used to retrieve the next page of Box objects. This pointer can be used when requesting new
28+
BoxObjectCollection instances that start off from a particular page, instead of from the very beginning.
29+
"""
30+
def __init__(
31+
self,
32+
session,
33+
url,
34+
limit=None,
35+
fields=None,
36+
additional_params=None,
37+
return_full_pages=False,
38+
):
39+
"""
40+
:param session:
41+
The Box session used to make requests.
42+
:type session:
43+
:class:`BoxSession`
44+
:param url:
45+
The endpoint url to hit.
46+
:type url:
47+
`unicode`
48+
:param limit:
49+
The number of entries for each page to return. The default, as well as the upper limit of this value,
50+
differs by endpoint. See https://developer.box.com/reference. If limit is set to None, then the default
51+
limit (returned by Box in the response) is used.
52+
:type limit:
53+
`int` or None
54+
:param fields:
55+
List of fields to request. If None, will return the default fields for the object.
56+
:type fields:
57+
`Iterable` of `unicode` or None
58+
:param additional_params:
59+
Additional HTTP params to send in the request.
60+
:type additional_params:
61+
`dict` or None
62+
:param return_full_pages:
63+
If True, then the returned iterator for this collection will return full pages of Box objects on each
64+
call to next(). If False, the iterator will return a single Box object on each next() call.
65+
:type return_full_pages:
66+
`bool`
67+
"""
68+
super(BoxObjectCollection, self).__init__()
69+
self._session = session
70+
self._url = url
71+
self._limit = limit
72+
self._fields = fields
73+
self._additional_params = additional_params
74+
self._return_full_pages = return_full_pages
75+
self._has_retrieved_all_items = False
76+
self._all_items = None
77+
78+
def next(self):
79+
"""
80+
Returns either a Page (a Sequence of BaseObjects) or a BaseObject depending on self._return_full_pages.
81+
82+
Invoking this method may make an API call to Box. Any exceptions that can occur while making requests
83+
may be raised in this method.
84+
85+
:rtype:
86+
:class:`Page` or :class:`BaseObject`
87+
"""
88+
if self._all_items is None:
89+
self._all_items = self._items_generator()
90+
return next(self._all_items)
91+
92+
__next__ = next
93+
94+
def _items_generator(self):
95+
"""
96+
:rtype:
97+
:class:`Page` or :class:`BaseObject`
98+
"""
99+
while not self._has_retrieved_all_items:
100+
response_object = self._load_next_page()
101+
102+
# If the limit was not specified, then it should default to whatever the server tells us.
103+
if self._limit is None:
104+
self._limit = response_object['limit']
105+
106+
self._update_pointer_to_next_page(response_object)
107+
self._has_retrieved_all_items = not self._has_more_pages(response_object)
108+
page = Page(self._session, response_object)
109+
110+
if self._return_full_pages:
111+
yield page
112+
else:
113+
# It's possible for the Box API to return 0 items in a page, even if there are more items to be
114+
# retrieved on subsequent pages. When self._return_full_pages is True, then yielding a 0-item
115+
# page is fine because that's what the page returned.
116+
# But when we are iterating over individual items, and not pages, it's odd to yield a sequence of
117+
# Nones (for that page that had 0 items). So instead, we continue to request more pages until we
118+
# have Box objects to yield.
119+
if not page:
120+
continue
121+
for entry in page:
122+
yield entry
123+
124+
def _load_next_page(self):
125+
"""
126+
Request the next page of entries from Box. Raises any network-related exceptions, including BoxAPIException.
127+
Returns a parsed dictionary of the JSON response from Box
128+
129+
:rtype:
130+
`dict`
131+
"""
132+
params = {}
133+
if self._limit is not None:
134+
params['limit'] = self._limit
135+
if self._fields:
136+
params['fields'] = ','.join(self._fields)
137+
if self._additional_params:
138+
params.update(self._additional_params)
139+
params.update(self._next_page_pointer_params())
140+
box_response = self._session.get(self._url, params=params)
141+
return box_response.json()
142+
143+
@abstractmethod
144+
def _update_pointer_to_next_page(self, response_object):
145+
"""
146+
Update the internal pointer attribute of this class to what will be used to request the next page
147+
of Box objects.
148+
149+
A "pointer" can either be a marker (for marker-based paging) or an offset (for limit-offset paging).
150+
151+
:param response_object:
152+
The parsed HTTP response from Box after requesting more pages.
153+
:type response_object:
154+
`dict`
155+
"""
156+
raise NotImplementedError
157+
158+
@abstractmethod
159+
def _has_more_pages(self, response_object):
160+
"""
161+
Are there more pages of entries to query Box for? This gets invoked after self._update_pointer_to_next_page().
162+
163+
:param response_object:
164+
The parsed HTTP response from Box after requesting more pages.
165+
:type response_object:
166+
`dict`
167+
:rtype:
168+
`bool`
169+
"""
170+
raise NotImplementedError
171+
172+
@abstractmethod
173+
def _next_page_pointer_params(self):
174+
"""
175+
The dict of HTTP params that specify which page of Box objects to retrieve.
176+
177+
:rtype:
178+
`dict`
179+
"""
180+
raise NotImplementedError
181+
182+
@abstractmethod
183+
def next_pointer(self):
184+
"""
185+
The pointer that will be used to request the next page of Box objects.
186+
187+
For limit-offset based paging, this is an offset. For marker-based paging, this is a marker.
188+
189+
The pointer only gets progressed upon successful page requests to Box.
190+
191+
:rtype:
192+
varies
193+
"""
194+
raise NotImplementedError
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# coding: utf-8
2+
3+
from __future__ import unicode_literals
4+
5+
from .box_object_collection import BoxObjectCollection
6+
7+
8+
class LimitOffsetBasedObjectCollection(BoxObjectCollection):
9+
"""
10+
An iterator of Box objects (BaseObjects) that were retrieved from a Box API endpoint that supports
11+
limit-offset type of pagination.
12+
13+
See https://developer.box.com/reference#pagination for more details.
14+
"""
15+
16+
def __init__(
17+
self,
18+
session,
19+
url,
20+
limit=None,
21+
fields=None,
22+
additional_params=None,
23+
return_full_pages=False,
24+
offset=0,
25+
):
26+
"""
27+
:param offset:
28+
The offset index to start paging from.
29+
:type offset:
30+
`int`
31+
"""
32+
super(LimitOffsetBasedObjectCollection, self).__init__(
33+
session,
34+
url,
35+
limit=limit,
36+
fields=fields,
37+
additional_params=additional_params,
38+
return_full_pages=return_full_pages,
39+
)
40+
self._offset = offset
41+
42+
def _update_pointer_to_next_page(self, response_object):
43+
"""Baseclass override."""
44+
total_count = response_object['total_count']
45+
46+
# The API might use a lower limit than the client asked for, if the
47+
# client asked for a limit above the maximum limit for that endpoint.
48+
# The API is supposed to respond with the limit that it actually used.
49+
# If that is given, then use that limit for the offset calculation, and
50+
# also for the remainder of the paging.
51+
#
52+
# Similarly, the API reports the offset that it used. In theory, this
53+
# should always be the same as what was requested. But just in case, do
54+
# the same thing with offset.
55+
if 'limit' in response_object:
56+
self._limit, old_limit = int(response_object['limit']), self._limit
57+
58+
# If the API erroneously sends a bad value for limit, we want to
59+
# avoid getting into an infinite chain of API calls. So abort with
60+
# a runtime error.
61+
if self._limit <= 0 < old_limit:
62+
self._offset = total_count # Disable additional paging.
63+
raise RuntimeError('API returned limit={0}, cannot continue paging'.format(self._limit))
64+
65+
if 'offset' in response_object:
66+
self._offset = int(response_object['offset'])
67+
68+
if total_count >= self._offset + self._limit:
69+
self._offset += self._limit
70+
else:
71+
self._offset = total_count
72+
73+
def _has_more_pages(self, response_object):
74+
"""Baseclass override."""
75+
return self._offset < response_object['total_count']
76+
77+
def _next_page_pointer_params(self):
78+
"""Baseclass override."""
79+
return {'offset': self._offset}
80+
81+
def next_pointer(self):
82+
"""Baseclass override."""
83+
return self._offset

0 commit comments

Comments
 (0)