1616"""
1717import json
1818import logging
19+ import functools
1920
2021import requests
2122from requests import Request
@@ -74,6 +75,7 @@ class DSpaceClient:
7475 if 'USER_AGENT' in os .environ :
7576 USER_AGENT = os .environ ['USER_AGENT' ]
7677 verbose = False
78+ ITER_PAGE_SIZE = 20
7779
7880 # Simple enum for patch operation types
7981 class PatchOperation :
@@ -82,6 +84,38 @@ class PatchOperation:
8284 REPLACE = 'replace'
8385 MOVE = 'move'
8486
87+ def paginated (embed_name , item_constructor , embedding = lambda x : x ):
88+ """
89+ @param embed_name: The key under '_embedded' in the JSON response that contains the resources to be paginated.
90+ (e.g. 'collections', 'objects', 'items', etc.)
91+ @param item_constructor: A callable that takes a resource dictionary and returns an item.
92+ @param embedding: Optional post-fetch processing lambda (default: identity function) for each resource
93+ @return: A decorator that, when applied to a method, follows pagination and yields each resource
94+ """
95+ def decorator (fun ):
96+ @functools .wraps (fun )
97+ def decorated (self , * args , ** kwargs ):
98+ def do_paginate (url , params ):
99+ params ['size' ] = self .ITER_PAGE_SIZE
100+
101+ while url is not None :
102+ r_json = embedding (self .fetch_resource (url , params ))
103+ for resource in r_json .get ('_embedded' , {}).get (embed_name , []):
104+ yield item_constructor (resource )
105+
106+ if 'next' in r_json .get ('_links' , {}):
107+ url = r_json ['_links' ]['next' ]['href' ]
108+ # assume the ‘next’ link contains all the
109+ # params needed for the correct next page:
110+ params = {}
111+ else :
112+ url = None
113+
114+ return fun (do_paginate , self , * args , ** kwargs )
115+ return decorated
116+
117+ return decorator
118+
85119 def __init__ (self , api_endpoint = API_ENDPOINT , username = USERNAME , password = PASSWORD , solr_endpoint = SOLR_ENDPOINT ,
86120 solr_auth = SOLR_AUTH , fake_user_agent = False ):
87121 """
@@ -397,6 +431,36 @@ def search_objects(self, query=None, scope=None, filters=None, page=0, size=20,
397431
398432 return dsos
399433
434+ @paginated (
435+ embed_name = 'objects' ,
436+ item_constructor = lambda x : SimpleDSpaceObject (x ['_embedded' ]['indexableObject' ]),
437+ embedding = lambda x : x ['_embedded' ]['searchResult' ]
438+ )
439+ def search_objects_iter (do_paginate , self , query = None , scope = None , filters = None , dso_type = None , sort = None ):
440+ """
441+ Do a basic search as in search_objects, automatically handling pagination by requesting the next page when all items from one page have been consumed
442+ @param query: query string
443+ @param scope: uuid to limit search scope, eg. owning collection, parent community, etc.
444+ @param filters: discovery filters as dict eg. {'f.entityType': 'Publication,equals', ... }
445+ @param sort: sort eg. 'title,asc'
446+ @param dso_type: DSO type to further filter results
447+ @return: Iterator of SimpleDSpaceObject
448+ """
449+ if filters is None :
450+ filters = {}
451+ url = f'{ self .API_ENDPOINT } /discover/search/objects'
452+ params = {}
453+ if query is not None :
454+ params ['query' ] = query
455+ if scope is not None :
456+ params ['scope' ] = scope
457+ if dso_type is not None :
458+ params ['dsoType' ] = dso_type
459+ if sort is not None :
460+ params ['sort' ] = sort
461+
462+ return do_paginate (url , {** params , ** filters })
463+
400464 def fetch_resource (self , url , params = None ):
401465 """
402466 Simple function for higher-level 'get' functions to use whenever they want
@@ -571,6 +635,20 @@ def get_bundles(self, parent=None, uuid=None, page=0, size=20, sort=None):
571635
572636 return bundles
573637
638+ @paginated ('bundles' , Bundle )
639+ def get_bundles_iter (do_paginate , self , parent , sort = None ):
640+ """
641+ Get bundles for an item, automatically handling pagination by requesting the next page when all items from one page have been consumed
642+ @param parent: python Item object, from which the UUID will be referenced in the URL.
643+ @return: Iterator of Bundle
644+ """
645+ url = f'{ self .API_ENDPOINT } /core/items/{ parent .uuid } /bundles'
646+ params = {}
647+ if sort is not None :
648+ params ['sort' ] = sort
649+
650+ return do_paginate (url , params )
651+
574652 def create_bundle (self , parent = None , name = 'ORIGINAL' ):
575653 """
576654 Create new bundle in the specified item
@@ -621,6 +699,24 @@ def get_bitstreams(self, uuid=None, bundle=None, page=0, size=20, sort=None):
621699 bitstreams .append (Bitstream (bitstream_resource ))
622700 return bitstreams
623701
702+ @paginated ('bitstreams' , Bitstream )
703+ def get_bitstreams_iter (do_paginate , self , bundle , sort = None ):
704+ """
705+ Get all bitstreams for a specific bundle, automatically handling pagination by requesting the next page when all items from one page have been consumed
706+ @param bundle: A python Bundle object to parse for bitstream links to retrieve
707+ @return: Iterator of Bitstream
708+ """
709+ if 'bitstreams' in bundle .links :
710+ url = bundle .links ['bitstreams' ]['href' ]
711+ else :
712+ url = f'{ self .API_ENDPOINT } /core/bundles/{ bundle .uuid } /bitstreams'
713+ logging .warning (f'Cannot find bundle bitstream links, will try to construct manually: { url } ' )
714+ params = {}
715+ if sort is not None :
716+ params ['sort' ] = sort
717+
718+ return do_paginate (url , params )
719+
624720 def create_bitstream (self , bundle = None , name = None , path = None , mime = None , metadata = None , retry = False ):
625721 """
626722 Upload a file and create a bitstream for a specified parent bundle, from the uploaded file and
@@ -734,6 +830,24 @@ def get_communities(self, uuid=None, page=0, size=20, sort=None, top=False):
734830 # Return list (populated or empty)
735831 return communities
736832
833+ @paginated ('communities' , Community )
834+ def get_communities_iter (do_paginate , self , sort = None , top = False ):
835+ """
836+ Get communities as an iterator, automatically handling pagination by requesting the next page when all items from one page have been consumed
837+ @param top: whether to restrict search to top communities (default: false)
838+ @return: Iterator of Community
839+ """
840+ if top :
841+ url = f'{ self .API_ENDPOINT } /core/communities/search/top'
842+ else :
843+ url = f'{ self .API_ENDPOINT } /core/communities'
844+
845+ params = {}
846+ if sort is not None :
847+ params ['sort' ] = sort
848+
849+ return do_paginate (url , params )
850+
737851 def create_community (self , parent , data ):
738852 """
739853 Create a community, either top-level or beneath a given parent
@@ -799,6 +913,21 @@ def get_collections(self, uuid=None, community=None, page=0, size=20, sort=None)
799913 # Return list (populated or empty)
800914 return collections
801915
916+ @paginated ('collections' , Collection )
917+ def get_collections_iter (do_paginate , self , community = None , sort = None ):
918+ """
919+ Get collections as an iterator, automatically handling pagination by requesting the next page when all items from one page have been consumed
920+ @param community: Community object. If present, collections for a community
921+ @return: Iterator of Collection
922+ """
923+ url = f'{ self .API_ENDPOINT } /core/collections'
924+
925+ if community is not None :
926+ if 'collections' in community .links and 'href' in community .links ['collections' ]:
927+ url = community .links ['collections' ]['href' ]
928+
929+ return do_paginate (url , {})
930+
802931 def create_collection (self , parent , data ):
803932 """
804933 Create collection beneath a given parent community.
@@ -975,6 +1104,12 @@ def delete_user(self, user):
9751104
9761105 # PAGINATION
9771106 def get_users (self , page = 0 , size = 20 , sort = None ):
1107+ """
1108+ Get a list of users (epersons) in the DSpace instance
1109+ @param page: Integer for page / offset of results. Default: 0
1110+ @param size: Integer for page size. Default: 20 (same as REST API default)
1111+ @return: list of User objects
1112+ """
9781113 url = f'{ self .API_ENDPOINT } /eperson/epersons'
9791114 users = list ()
9801115 params = {}
@@ -992,6 +1127,19 @@ def get_users(self, page=0, size=20, sort=None):
9921127 users .append (User (user_resource ))
9931128 return users
9941129
1130+ @paginated ('epersons' , User )
1131+ def get_users_iter (do_paginate , self , sort = None ):
1132+ """
1133+ Get an iterator of users (epersons) in the DSpace instance, automatically handling pagination by requesting the next page when all items from one page have been consumed
1134+ @return: Iterator of User
1135+ """
1136+ url = f'{ self .API_ENDPOINT } /eperson/epersons'
1137+ params = {}
1138+ if sort is not None :
1139+ params ['sort' ] = sort
1140+
1141+ return do_paginate (url , params )
1142+
9951143 def create_group (self , group ):
9961144 """
9971145 Create a group
0 commit comments