@@ -6,8 +6,11 @@ import (
66 "errors"
77 "fmt"
88 "io"
9+ "math"
910 "net/http"
11+ "regexp"
1012 "sort"
13+ "strconv"
1114 "sync"
1215 "time"
1316
@@ -16,28 +19,128 @@ import (
1619)
1720
1821var (
19- errNoMorePages = errors .New ("no more pages to get" )
20- ErrTooManyStars = errors .New ("repo has too many stargazers, github won't allow us to list all stars" )
22+ errNoMorePages = errors .New ("no more pages to get" )
23+ // linkLastPageRegex is used to parse the last page number from the Link header
24+ linkLastPageRegex = regexp .MustCompile (`[&?]page=(\d+)[^>]*>;\s*rel="last"` )
2125)
2226
27+ // maxConcurrentRequests is the maximum number of concurrent requests to GitHub API
28+ const maxConcurrentRequests = 5
29+
2330// Stargazer is a star at a given time.
2431type Stargazer struct {
2532 StarredAt time.Time `json:"starred_at"`
33+ // Count represents the actual position/count of this star (used in sampling mode).
34+ // If 0, use index+1 as count (non-sampling mode).
35+ Count int `json:"-"`
2636}
2737
2838// Stargazers returns all the stargazers of a given repo.
39+ // If star count is too large, it uses sampling mode to fetch data points.
2940func (gh * GitHub ) Stargazers (ctx context.Context , repo Repository ) (stars []Stargazer , err error ) {
30- if gh .totalPages (repo ) > 400 {
31- return stars , ErrTooManyStars
41+ // First request the first page to get the actual max page count (via Link header)
42+ firstPageStars , lastPage , err := gh .getFirstPageAndLastPage (ctx , repo )
43+ if err != nil {
44+ return nil , err
45+ }
46+
47+ log .WithField ("repo" , repo .FullName ).
48+ WithField ("lastPage" , lastPage ).
49+ WithField ("starCount" , repo .StargazersCount ).
50+ Debug ("got pagination info from API" )
51+
52+ // If only one page or page count is less than max sample pages, fetch all pages
53+ if lastPage <= gh .maxSamplePages {
54+ return gh .getAllStargazersWithFirstPage (ctx , repo , firstPageStars , lastPage )
55+ }
56+
57+ // Otherwise use sampling mode
58+ return gh .getSampledStargazers (ctx , repo , firstPageStars , lastPage )
59+ }
60+
61+ // getFirstPageAndLastPage requests the first page and parses the Link header to get the max page count.
62+ func (gh * GitHub ) getFirstPageAndLastPage (ctx context.Context , repo Repository ) ([]Stargazer , int , error ) {
63+ log := log .WithField ("repo" , repo .FullName )
64+
65+ resp , err := gh .makeStarPageRequest (ctx , repo , 1 , "" )
66+ if err != nil {
67+ return nil , 0 , err
68+ }
69+ defer resp .Body .Close ()
70+
71+ if resp .StatusCode == http .StatusForbidden {
72+ rateLimits .Inc ()
73+ log .Warn ("rate limit hit" )
74+ return nil , 0 , ErrRateLimit
75+ }
76+
77+ if resp .StatusCode != http .StatusOK {
78+ bts , _ := io .ReadAll (resp .Body )
79+ return nil , 0 , fmt .Errorf ("%w: %v" , ErrGitHubAPI , string (bts ))
80+ }
81+
82+ bts , err := io .ReadAll (resp .Body )
83+ if err != nil {
84+ return nil , 0 , err
85+ }
86+
87+ var stars []Stargazer
88+ if err := json .Unmarshal (bts , & stars ); err != nil {
89+ return nil , 0 , err
90+ }
91+
92+ // Parse Link header to get the max page count
93+ linkHeader := resp .Header .Get ("Link" )
94+ lastPage := gh .parseLastPageFromLink (linkHeader )
95+
96+ // If no Link header or parsing failed, there is only one page
97+ if lastPage == 0 {
98+ lastPage = 1
99+ }
100+
101+ log .WithField ("lastPage" , lastPage ).Debug ("parsed last page from Link header" )
102+
103+ return stars , lastPage , nil
104+ }
105+
106+ // parseLastPageFromLink parses the max page count from the Link header.
107+ // Link header format: <url>; rel="next", <url>; rel="last"
108+ func (gh * GitHub ) parseLastPageFromLink (linkHeader string ) int {
109+ if linkHeader == "" {
110+ return 0
111+ }
112+
113+ matches := linkLastPageRegex .FindStringSubmatch (linkHeader )
114+ if len (matches ) < 2 {
115+ return 0
116+ }
117+
118+ lastPage , err := strconv .Atoi (matches [1 ])
119+ if err != nil {
120+ return 0
121+ }
122+
123+ return lastPage
124+ }
125+
126+ // getAllStargazersWithFirstPage fetches all stargazers (used for small repositories).
127+ // firstPageStars is the already fetched first page data.
128+ func (gh * GitHub ) getAllStargazersWithFirstPage (ctx context.Context , repo Repository , firstPageStars []Stargazer , lastPage int ) (stars []Stargazer , err error ) {
129+ stars = append (stars , firstPageStars ... )
130+
131+ // If only one page, return directly
132+ if lastPage <= 1 {
133+ return stars , nil
32134 }
33135
34136 var (
35137 wg errgroup.Group
36138 lock sync.Mutex
37139 )
38140
39- wg .SetLimit (4 )
40- for page := 1 ; page <= gh .lastPage (repo ); page ++ {
141+ wg .SetLimit (maxConcurrentRequests )
142+ // Start fetching from page 2 (page 1 is already fetched)
143+ for page := 2 ; page <= lastPage ; page ++ {
41144 page := page
42145 wg .Go (func () error {
43146 result , err := gh .getStargazersPage (ctx , repo , page )
@@ -61,6 +164,134 @@ func (gh *GitHub) Stargazers(ctx context.Context, repo Repository) (stars []Star
61164 return
62165}
63166
167+ // getSampledStargazers fetches stargazers using sampling mode (used for large repositories).
168+ // Inspired by star-history project's sampling logic.
169+ // firstPageStars is the already fetched first page data, lastPage is the actual max page count parsed from Link header.
170+ func (gh * GitHub ) getSampledStargazers (ctx context.Context , repo Repository , firstPageStars []Stargazer , lastPage int ) (stars []Stargazer , err error ) {
171+ log .WithField ("repo" , repo .FullName ).
172+ WithField ("lastPage" , lastPage ).
173+ Info ("using sampling mode for large repo" )
174+
175+ // Calculate sample page numbers, evenly distributed across all pages
176+ samplePages := gh .calculateSamplePages (lastPage , gh .maxSamplePages )
177+
178+ type pageResult struct {
179+ page int
180+ star Stargazer
181+ starCount int // the actual count position of this star
182+ }
183+
184+ var (
185+ wg errgroup.Group
186+ lock sync.Mutex
187+ results []pageResult
188+ )
189+
190+ // First page is already fetched, add it to results directly
191+ if len (firstPageStars ) > 0 {
192+ results = append (results , pageResult {
193+ page : 1 ,
194+ star : firstPageStars [0 ],
195+ starCount : 1 ,
196+ })
197+ }
198+
199+ wg .SetLimit (maxConcurrentRequests )
200+ for _ , page := range samplePages {
201+ // Skip first page (already fetched)
202+ if page == 1 {
203+ continue
204+ }
205+ page := page
206+ wg .Go (func () error {
207+ result , err := gh .getStargazersPage (ctx , repo , page )
208+ if errors .Is (err , errNoMorePages ) {
209+ return nil
210+ }
211+ if err != nil {
212+ return err
213+ }
214+ if len (result ) == 0 {
215+ return nil
216+ }
217+
218+ // Calculate the actual position of the first star on this page (based on page number and page size)
219+ // The 1st star on page 1 is star #1
220+ // The 1st star on page N is star #(N-1)*pageSize + 1
221+ starCount := (page - 1 )* gh .pageSize + 1
222+
223+ lock .Lock ()
224+ defer lock .Unlock ()
225+ results = append (results , pageResult {
226+ page : page ,
227+ star : result [0 ],
228+ starCount : starCount ,
229+ })
230+ return nil
231+ })
232+ }
233+
234+ if err = wg .Wait (); err != nil {
235+ return nil , err
236+ }
237+
238+ // Sort results by page number
239+ sort .Slice (results , func (i , j int ) bool {
240+ return results [i ].page < results [j ].page
241+ })
242+
243+ // Extract the first star from each sampled page as a data point and set Count
244+ for _ , r := range results {
245+ star := r .star
246+ star .Count = r .starCount
247+ stars = append (stars , star )
248+ }
249+
250+ // Add the last data point (current time and total star count)
251+ // This ensures the chart extends to the current time point
252+ stars = append (stars , Stargazer {
253+ StarredAt : time .Now (),
254+ Count : repo .StargazersCount ,
255+ })
256+
257+ return stars , nil
258+ }
259+
260+ // calculateSamplePages calculates the page numbers to sample.
261+ // Evenly distributed across all pages, ensuring the first page is included.
262+ func (gh * GitHub ) calculateSamplePages (totalPages , maxSamples int ) []int {
263+ pages := make ([]int , 0 , maxSamples )
264+
265+ for i := 1 ; i <= maxSamples ; i ++ {
266+ // Calculate evenly distributed page numbers
267+ page := int (math .Round (float64 (i * totalPages ) / float64 (maxSamples )))
268+ if page < 1 {
269+ page = 1
270+ }
271+ if page > totalPages {
272+ page = totalPages
273+ }
274+ pages = append (pages , page )
275+ }
276+
277+ // Ensure first page is included (important for displaying start time)
278+ if len (pages ) > 0 && pages [0 ] != 1 {
279+ pages [0 ] = 1
280+ }
281+
282+ // Deduplicate (may have duplicates in edge cases)
283+ seen := make (map [int ]bool )
284+ uniquePages := make ([]int , 0 , len (pages ))
285+ for _ , p := range pages {
286+ if ! seen [p ] {
287+ seen [p ] = true
288+ uniquePages = append (uniquePages , p )
289+ }
290+ }
291+
292+ return uniquePages
293+ }
294+
64295// - get last modified from cache
65296// - if exists, hit api with it
66297// - if it returns 304, get from cache
@@ -139,14 +370,6 @@ func (gh *GitHub) getStargazersPage(ctx context.Context, repo Repository, page i
139370 }
140371}
141372
142- func (gh * GitHub ) totalPages (repo Repository ) int {
143- return repo .StargazersCount / gh .pageSize
144- }
145-
146- func (gh * GitHub ) lastPage (repo Repository ) int {
147- return gh .totalPages (repo ) + 1
148- }
149-
150373func (gh * GitHub ) makeStarPageRequest (ctx context.Context , repo Repository , page int , etag string ) (* http.Response , error ) {
151374 url := fmt .Sprintf (
152375 "https://api.github.com/repos/%s/stargazers?page=%d&per_page=%d" ,
0 commit comments