Add: pagination support for issue requests (#140)

lwasser · web-flow · commit a76ba9765a8b · 2024-03-20T17:46:20.000-06:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@
 - Fix: Refactor all issue related GitHub methods to gh_client module (@lwasser, #125)
 - Add: support for partners and emeritus_editor in contributor model (@lwasser, #133)
 - Fix: Refactor all contributor GitHub related methods into gh_client module from contributors module (@lwasser, #125)
+- Fix: Add support for pagination in github issue requests  (@lwasser, #139)
 
 
 ## [v0.2.3] - 2024-02-29
diff --git a/src/pyosmeta/github_api.py b/src/pyosmeta/github_api.py
@@ -11,6 +11,7 @@
 
 import logging
 import os
+import time
 
 import requests
 from dataclasses import dataclass
@@ -80,28 +81,69 @@ def api_endpoint(self):
         )
         return url
 
+    def handle_rate_limit(self, response):
+        """
+        Handle rate limiting by waiting until the rate limit resets.
+
+        Parameters
+        ----------
+        response : requests.Response
+            The response object from the API request.
+
+        Notes
+        -----
+        This method checks the remaining rate limit in the response headers.
+        If the remaining requests are exhausted, it calculates the time
+        until the rate limit resets and sleeps accordingly.
+        """
+
+        if "X-RateLimit-Remaining" in response.headers:
+            remaining_requests = int(response.headers["X-RateLimit-Remaining"])
+            if remaining_requests <= 0:
+                reset_time = int(response.headers["X-RateLimit-Reset"])
+                sleep_time = max(reset_time - time.time(), 0) + 1
+                time.sleep(sleep_time)
+
     def return_response(self) -> list[dict[str, object]]:
         """
         Make a GET request to the Github API endpoint
         Deserialize json response to list of dicts.
 
+        Handles pagination as github has a REST api 100 request max.
+
         Returns
         -------
         list
             List of dict items each containing a review issue
         """
 
+        results = []
+        # This is computed as a property. Reassign here to support pagination
+        # and new urls for each page
+        api_endpoint_url = self.api_endpoint
         try:
-            response = requests.get(
-                self.api_endpoint,
-                headers={"Authorization": f"token {self.get_token()}"},
-            )
-            response.raise_for_status()
+            while True:
+                response = requests.get(
+                    api_endpoint_url,
+                    headers={"Authorization": f"token {self.get_token()}"},
+                )
+                response.raise_for_status()
+                results.extend(response.json())
+
+                # Check if there are more pages to fetch
+                if "next" in response.links:
+                    next_url = response.links["next"]["url"]
+                    api_endpoint_url = next_url
+                else:
+                    break
+
+                # Handle rate limiting
+                self.handle_rate_limit(response)
 
         except requests.HTTPError as exception:
             raise exception
 
-        return response.json()
+        return results
 
     def get_repo_meta(self, url: str) -> dict[str, Any] | None:
         """