@@ -10,21 +10,43 @@ class Generator < Jekyll::Generator
10
10
safe true
11
11
priority :highest
12
12
13
+ def fetch_all_pages ( url , headers )
14
+ all_results = [ ]
15
+ page = 1
16
+ per_page = 100 # Maximum allowed by GitHub API
17
+
18
+ loop do
19
+ response = HTTParty . get ( "#{ url } ?page=#{ page } &per_page=#{ per_page } " , headers : headers )
20
+ page_results = JSON . load ( response . body )
21
+
22
+ # Break if we get an empty array or if it's not an array (error case)
23
+ break if !page_results . is_a? ( Array ) || page_results . empty?
24
+
25
+ all_results . concat ( page_results )
26
+ page += 1
27
+
28
+ # Break if we got fewer results than per_page (last page)
29
+ break if page_results . length < per_page
30
+ end
31
+
32
+ all_results
33
+ end
34
+
13
35
def generate ( site )
14
36
headers = {
15
37
Authorization : 'token ' + ENV [ 'GITHUB_TOKEN' ] ,
16
38
}
17
39
site . data [ 'contributors' ] = { }
18
40
site . collections [ 'software' ] . docs . each do |d |
19
41
d . data [ 'github' ] = { }
20
- d . data [ 'github' ] [ 'releases' ] = JSON . load (
21
- HTTParty . get ( "https://api.github.com/repos/#{ d [ 'gh_org' ] } /#{ d [ 'name' ] } /releases" , headers : headers ) . body
42
+ d . data [ 'github' ] [ 'releases' ] = fetch_all_pages (
43
+ "https://api.github.com/repos/#{ d [ 'gh_org' ] } /#{ d [ 'name' ] } /releases" , headers
22
44
)
23
45
d . data [ 'github' ] [ 'repo' ] = JSON . load (
24
46
HTTParty . get ( "https://api.github.com/repos/#{ d [ 'gh_org' ] } /#{ d [ 'name' ] } " , headers : headers ) . body
25
47
)
26
- d . data [ 'github' ] [ 'contributors' ] = JSON . load (
27
- HTTParty . get ( "https://api.github.com/repos/#{ d [ 'gh_org' ] } /#{ d [ 'name' ] } /contributors" , headers : headers ) . body
48
+ d . data [ 'github' ] [ 'contributors' ] = fetch_all_pages (
49
+ "https://api.github.com/repos/#{ d [ 'gh_org' ] } /#{ d [ 'name' ] } /contributors" , headers
28
50
)
29
51
site . config [ 'extra_contributors' ] . each do |login , repo |
30
52
if repo == d [ 'name' ]
@@ -51,22 +73,39 @@ def generate(site)
51
73
site . data [ 'contributors' ] . each do |c |
52
74
fetched = false
53
75
sleep_time = 1
54
- while not fetched do
76
+ retry_count = 0
77
+ max_retries = 5
78
+
79
+ while not fetched and retry_count < max_retries do
55
80
begin
56
- fetched = JSON . load ( HTTParty . get ( "https://api.github.com/users/#{ c [ 'login' ] } " , headers : headers ) . body )
57
- sleep_time = 1
81
+ response = HTTParty . get ( "https://api.github.com/users/#{ c [ 'login' ] } " , headers : headers )
82
+ user_data = JSON . load ( response . body )
83
+
84
+ # Check if we got valid user data
85
+ if user_data . is_a? ( Hash ) && user_data . key? ( 'login' )
86
+ c . merge! ( user_data )
87
+ fetched = true
88
+ sleep_time = 1
89
+ else
90
+ puts "Invalid user data for #{ c [ 'login' ] } : #{ user_data } "
91
+ retry_count += 1
92
+ end
58
93
rescue => e
59
- puts e . message , sleep_time
60
- if e . message . include? ( "rate limit exceeded" )
61
- puts ( "Sleeping for #{ sleep_time } seconds" )
94
+ puts "Error fetching user #{ c [ 'login' ] } : #{ e . message } "
95
+ retry_count += 1
96
+
97
+ if e . message . include? ( "rate limit exceeded" ) || e . message . include? ( "403" )
98
+ puts ( "Rate limit hit, sleeping for #{ sleep_time } seconds" )
62
99
sleep ( sleep_time )
63
- sleep_time = sleep_time * 2
100
+ sleep_time = [ sleep_time * 2 , 60 ] . min # Cap at 60 seconds
101
+ elsif retry_count >= max_retries
102
+ puts ( "Max retries reached for user #{ c [ 'login' ] } , skipping" )
103
+ break
64
104
else
65
- raise # re-raise the last exception
105
+ sleep ( 1 ) # Brief pause before retry
66
106
end
67
107
end
68
108
end
69
- c . merge! ( fetched )
70
109
end
71
110
end
72
111
end
0 commit comments