11import os
22import re
3+ import time
34import typing
45
56from bs4 import BeautifulSoup
67from bs4 import element
78import requests
9+ from tqdm import tqdm
810
911
1012class UsernameRepositoryError (ValueError ):
@@ -59,19 +61,30 @@ class GitHub:
5961 __GITHUB_URL : str = "https://github.com"
6062 __STARGAZERS_URL_SUFFIX : str = "/stargazers"
6163 __PAGE_SUFFIX : str = "?page="
62- __MARK_END_OF_STARGAZERS : str = ' This repository has no more stargazers.'
64+ __MARK_END_OF_STARGAZERS : str = " This repository has no more stargazers."
6365
6466 __OK_STATUS_CODE : int = 200
6567 __TOO_MANY_REQUESTS_STATUS_CODE : int = 429
6668 __NOT_FOUND_STATUS_CODE : int = 404
6769
70+ _sleep : float = 2 # sleep time in seconds to avoid "too many requests error"
71+
6872 def __init__ (self , username_and_repository : str ) -> None :
6973 self .__username , self .__repository = GitHub .__extract_user_and_repo (username_and_repository )
7074 self .__repository_url : str = self .__get_repository_url ()
7175 self .__stargazers_base_url : str = self .__repository_url + self .__STARGAZERS_URL_SUFFIX
7276
77+ @classmethod
78+ def __check_username_and_repository (cls , username_and_repository : str ) -> str :
79+ if username_and_repository .startswith ("https://github.com/" ):
80+ return username_and_repository [19 :]
81+ elif username_and_repository .startswith ("github.com/" ):
82+ return username_and_repository [11 :]
83+ return username_and_repository
84+
7385 @classmethod
7486 def __extract_user_and_repo (cls , username_and_repository : str ) -> typing .Optional [typing .Tuple [str , str ]]:
87+ username_and_repository = cls .__check_username_and_repository (username_and_repository )
7588 components : typing .List [str ] = username_and_repository .split ("/" )
7689 if len (components ) != 2 :
7790 raise UsernameRepositoryError ()
@@ -85,7 +98,8 @@ def __get_repository_url(self) -> str:
8598 return os .path .join (self .__GITHUB_URL , self .__username , self .__repository )
8699
87100 def __get_soup (self , url : str ) -> BeautifulSoup :
88- response : requests .Response = requests .get (url , headers = {'Content-Type' : 'text/html' })
101+ time .sleep (self ._sleep ) # to avoid __TOO_MANY_REQUESTS_STATUS_CODE
102+ response : requests .Response = requests .get (url , headers = {"Content-Type" : "text/html" })
89103
90104 status_code : int = response .status_code
91105 if status_code == self .__OK_STATUS_CODE :
@@ -98,7 +112,7 @@ def __get_soup(self, url: str) -> BeautifulSoup:
98112
99113 def __extract_stargazers_from_url (self , url : str ) -> typing .List [str ]:
100114 soup : typing .Optional [BeautifulSoup ] = self .__get_soup (url )
101- h3_components : element .ResultSet = soup .find_all ('h3' )
115+ h3_components : element .ResultSet = soup .find_all ("h3" )
102116
103117 def _check_hyperlink_component (component : element .Tag ) -> None :
104118 """Check the BeautifulSoup `element.Tag` component that receives a hyperlink HTML tag.
@@ -114,20 +128,20 @@ def _check_hyperlink_component(component: element.Tag) -> None:
114128
115129 If any of the above mentioned is missing or not in the expected form, an Exception is raised.
116130 """
117- hyperlink_component : typing .Optional [element .Tag ] = component .find ('a' )
131+ hyperlink_component : typing .Optional [element .Tag ] = component .find ("a" )
118132 if not hyperlink_component :
119133 raise MissingHyperlinkTagError ()
120- if not hyperlink_component .get (' href' ):
134+ if not hyperlink_component .get (" href" ):
121135 raise MissingHrefAttributeError ()
122- href_content : str = hyperlink_component [' href' ]
136+ href_content : str = hyperlink_component [" href" ]
123137 if not re .match (r"/.+$" , href_content ):
124138 raise HrefContentError (href_content )
125139
126140 def _extract_username_from_h3 (component : element .Tag ) -> typing .Optional [str ]:
127141 if component .get_text () == self .__MARK_END_OF_STARGAZERS :
128142 return None
129143 _check_hyperlink_component (component )
130- return component .a [' href' ][1 :] # dropping the first '/' character
144+ return component .a [" href" ][1 :] # dropping the first '/' character
131145
132146 users : typing .List [str ] = []
133147 for component in h3_components :
@@ -146,17 +160,18 @@ def get_all_stargazers(self) -> typing.List[str]:
146160
147161 all_stargazers : typing .List [str ] = []
148162 previous_stargazers : typing .List [str ] = []
149- while True :
150- current_url : str = self .__get_url_page_template (page_number )
151- current_stargazers : typing .List [str ] = self .__extract_stargazers_from_url (current_url )
152- if not current_stargazers :
153- break
154- if current_stargazers == previous_stargazers :
155- break
156- all_stargazers += current_stargazers
157- previous_stargazers = current_stargazers
158- page_number += 1
159-
163+ with tqdm (desc = "Current number of stargazers is " , unit = "ppl" ) as pbar :
164+ while True :
165+ current_url : str = self .__get_url_page_template (page_number )
166+ current_stargazers : typing .List [str ] = self .__extract_stargazers_from_url (current_url )
167+ if not current_stargazers :
168+ break
169+ if current_stargazers == previous_stargazers :
170+ break
171+ pbar .update (len (current_stargazers ))
172+ all_stargazers += current_stargazers
173+ previous_stargazers = current_stargazers
174+ page_number += 1
160175 return sorted (all_stargazers )
161176
162177 def is_stargazer (self , user : str ) -> bool :
@@ -176,3 +191,12 @@ def is_stargazer(self, user: str) -> bool:
176191 page_number += 1
177192
178193 return False
194+
195+ @property
196+ def sleep_time (self ) -> float :
197+ return self ._sleep
198+
199+ @sleep_time .setter
200+ def sleep_time (self , duration : float ) -> None :
201+ assert isinstance (duration , (float , int ))
202+ self ._sleep = duration
0 commit comments