@@ -70,17 +70,7 @@ def extract_pull_requests(
7070 pages = 0
7171
7272 while True :
73- resp = session .get (base_url , params = params )
74- if (
75- resp .status_code == 403
76- and int (resp .headers .get ("X-RateLimit-Remaining" , "1" )) == 0
77- ):
78- sleep_for_rate_limit (resp )
79- # retry same URL/params after sleeping
80- continue
81- if resp .status_code != 200 :
82- error_text = resp .text if resp .text else "No response text"
83- raise SystemExit (f"GitHub API error { resp .status_code } : { error_text } " )
73+ resp = github_get (session , base_url , params = params )
8474
8575 batch = resp .json ()
8676 pages += 1
@@ -164,26 +154,13 @@ def extract_commits(
164154
165155 logger .info (f"Commits URL: { commits_url } " )
166156
167- resp = session .get (commits_url )
168- if (
169- resp .status_code == 403
170- and int (resp .headers .get ("X-RateLimit-Remaining" , "1" )) == 0
171- ):
172- sleep_for_rate_limit (resp )
173- resp = session .get (commits_url )
174- if resp .status_code != 200 :
175- raise SystemExit (f"GitHub API error { resp .status_code } : { resp .text } " )
157+ resp = github_get (session , commits_url )
176158
177159 commits = resp .json ()
178160 for commit in commits :
179161 commit_sha = commit .get ("sha" )
180162 commit_url = f"{ api_base } /repos/{ repo } /commits/{ commit_sha } "
181- commit_resp = session .get (commit_url )
182- if commit_resp .status_code != 200 :
183- raise SystemExit (
184- f"GitHub API error { commit_resp .status_code } : { commit_resp .text } "
185- )
186- commit_data = commit_resp .json ()
163+ commit_data = github_get (session , commit_url ).json ()
187164 commit ["files" ] = commit_data .get ("files" , [])
188165
189166 logger .info (f"Extracted { len (commits )} commits for PR #{ pr_number } " )
@@ -216,17 +193,7 @@ def extract_reviewers(
216193
217194 logger .info (f"Reviewers URL: { reviewers_url } " )
218195
219- resp = session .get (reviewers_url )
220- if (
221- resp .status_code == 403
222- and int (resp .headers .get ("X-RateLimit-Remaining" , "1" )) == 0
223- ):
224- sleep_for_rate_limit (resp )
225- resp = session .get (reviewers_url )
226- if resp .status_code != 200 :
227- raise SystemExit (f"GitHub API error { resp .status_code } : { resp .text } " )
228-
229- reviewers = resp .json ()
196+ reviewers = github_get (session , reviewers_url ).json ()
230197
231198 logger .info (f"Extracted { len (reviewers )} reviewers for PR #{ pr_number } " )
232199 return reviewers
@@ -258,17 +225,7 @@ def extract_comments(
258225
259226 logger .info (f"Comments URL: { comments_url } " )
260227
261- resp = session .get (comments_url )
262- if (
263- resp .status_code == 403
264- and int (resp .headers .get ("X-RateLimit-Remaining" , "1" )) == 0
265- ):
266- sleep_for_rate_limit (resp )
267- resp = session .get (comments_url )
268- if resp .status_code != 200 :
269- raise SystemExit (f"GitHub API error { resp .status_code } : { resp .text } " )
270-
271- comments = resp .json ()
228+ comments = github_get (session , comments_url ).json ()
272229 logger .info (f"Extracted { len (comments )} comments for PR #{ pr_number } " )
273230 return comments
274231
@@ -278,13 +235,47 @@ def sleep_for_rate_limit(resp: requests.Response) -> None:
278235 remaining = int (resp .headers .get ("X-RateLimit-Remaining" , 1 ))
279236 reset = int (resp .headers .get ("X-RateLimit-Reset" , 0 ))
280237 if remaining == 0 :
281- sleep_time = max (0 , reset - int (time .time ()))
238+ sleep_time = max (0 , reset - int (time .time ())) + 5
282239 print (
283240 f"Rate limit exceeded. Sleeping for { sleep_time } seconds." , file = sys .stderr
284241 )
285242 time .sleep (sleep_time )
286243
287244
245+ def github_get (
246+ session : requests .Session ,
247+ url : str ,
248+ params : Optional [dict ] = None ,
249+ ) -> requests .Response :
250+ """
251+ Make a GitHub API GET request, retrying in a loop on rate limit.
252+
253+ Args:
254+ session: Authenticated requests session
255+ url: URL to fetch
256+ params: Optional query parameters
257+
258+ Returns:
259+ Successful response (status 200)
260+
261+ Raises:
262+ SystemExit: On non-200, non-rate-limit errors
263+ """
264+ while True :
265+ resp = session .get (url , params = params )
266+ if resp .status_code == 200 :
267+ return resp
268+ if (
269+ resp .status_code == 403
270+ and int (resp .headers .get ("X-RateLimit-Remaining" , "1" )) == 0
271+ ):
272+ sleep_for_rate_limit (resp )
273+ continue
274+ raise SystemExit (
275+ f"GitHub API error { resp .status_code } for { url } : { resp .text or 'No response text' } "
276+ )
277+
278+
288279def transform_data (raw_data : list [dict ], repo : str ) -> dict :
289280 """
290281 Transform GitHub pull request data into BigQuery-compatible format.
0 commit comments