Skip to content

Commit b05ef91

Browse files
author
egor
committed
Add progress bar, sleep to avoid too many requests and style fixes
Signed-off-by: egor <[email protected]>
1 parent 4b0de00 commit b05ef91

File tree

3 files changed

+46
-22
lines changed

3 files changed

+46
-22
lines changed

github_stargazers/github.py

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import os
22
import re
3+
import time
34
import typing
45

56
from bs4 import BeautifulSoup
67
from bs4 import element
78
import requests
9+
from tqdm import tqdm
810

911

1012
class UsernameRepositoryError(ValueError):
@@ -59,19 +61,30 @@ class GitHub:
5961
__GITHUB_URL: str = "https://github.com"
6062
__STARGAZERS_URL_SUFFIX: str = "/stargazers"
6163
__PAGE_SUFFIX: str = "?page="
62-
__MARK_END_OF_STARGAZERS: str = 'This repository has no more stargazers.'
64+
__MARK_END_OF_STARGAZERS: str = "This repository has no more stargazers."
6365

6466
__OK_STATUS_CODE: int = 200
6567
__TOO_MANY_REQUESTS_STATUS_CODE: int = 429
6668
__NOT_FOUND_STATUS_CODE: int = 404
6769

70+
_sleep: float = 2 # sleep time in seconds to avoid "too many requests error"
71+
6872
def __init__(self, username_and_repository: str) -> None:
6973
self.__username, self.__repository = GitHub.__extract_user_and_repo(username_and_repository)
7074
self.__repository_url: str = self.__get_repository_url()
7175
self.__stargazers_base_url: str = self.__repository_url + self.__STARGAZERS_URL_SUFFIX
7276

77+
@classmethod
78+
def __check_username_and_repository(cls, username_and_repository: str) -> str:
79+
if username_and_repository.startswith("https://github.com/"):
80+
return username_and_repository[19:]
81+
elif username_and_repository.startswith("github.com/"):
82+
return username_and_repository[11:]
83+
return username_and_repository
84+
7385
@classmethod
7486
def __extract_user_and_repo(cls, username_and_repository: str) -> typing.Optional[typing.Tuple[str, str]]:
87+
username_and_repository = cls.__check_username_and_repository(username_and_repository)
7588
components: typing.List[str] = username_and_repository.split("/")
7689
if len(components) != 2:
7790
raise UsernameRepositoryError()
@@ -85,7 +98,8 @@ def __get_repository_url(self) -> str:
8598
return os.path.join(self.__GITHUB_URL, self.__username, self.__repository)
8699

87100
def __get_soup(self, url: str) -> BeautifulSoup:
88-
response: requests.Response = requests.get(url, headers={'Content-Type': 'text/html'})
101+
time.sleep(self._sleep) # to avoid __TOO_MANY_REQUESTS_STATUS_CODE
102+
response: requests.Response = requests.get(url, headers={"Content-Type": "text/html"})
89103

90104
status_code: int = response.status_code
91105
if status_code == self.__OK_STATUS_CODE:
@@ -98,7 +112,7 @@ def __get_soup(self, url: str) -> BeautifulSoup:
98112

99113
def __extract_stargazers_from_url(self, url: str) -> typing.List[str]:
100114
soup: typing.Optional[BeautifulSoup] = self.__get_soup(url)
101-
h3_components: element.ResultSet = soup.find_all('h3')
115+
h3_components: element.ResultSet = soup.find_all("h3")
102116

103117
def _check_hyperlink_component(component: element.Tag) -> None:
104118
"""Check the BeautifulSoup `element.Tag` component that receives a hyperlink HTML tag.
@@ -114,20 +128,20 @@ def _check_hyperlink_component(component: element.Tag) -> None:
114128
115129
If any of the above mentioned is missing or not in the expected form, an Exception is raised.
116130
"""
117-
hyperlink_component: typing.Optional[element.Tag] = component.find('a')
131+
hyperlink_component: typing.Optional[element.Tag] = component.find("a")
118132
if not hyperlink_component:
119133
raise MissingHyperlinkTagError()
120-
if not hyperlink_component.get('href'):
134+
if not hyperlink_component.get("href"):
121135
raise MissingHrefAttributeError()
122-
href_content: str = hyperlink_component['href']
136+
href_content: str = hyperlink_component["href"]
123137
if not re.match(r"/.+$", href_content):
124138
raise HrefContentError(href_content)
125139

126140
def _extract_username_from_h3(component: element.Tag) -> typing.Optional[str]:
127141
if component.get_text() == self.__MARK_END_OF_STARGAZERS:
128142
return None
129143
_check_hyperlink_component(component)
130-
return component.a['href'][1:] # dropping the first '/' character
144+
return component.a["href"][1:] # dropping the first '/' character
131145

132146
users: typing.List[str] = []
133147
for component in h3_components:
@@ -146,17 +160,18 @@ def get_all_stargazers(self) -> typing.List[str]:
146160

147161
all_stargazers: typing.List[str] = []
148162
previous_stargazers: typing.List[str] = []
149-
while True:
150-
current_url: str = self.__get_url_page_template(page_number)
151-
current_stargazers: typing.List[str] = self.__extract_stargazers_from_url(current_url)
152-
if not current_stargazers:
153-
break
154-
if current_stargazers == previous_stargazers:
155-
break
156-
all_stargazers += current_stargazers
157-
previous_stargazers = current_stargazers
158-
page_number += 1
159-
163+
with tqdm(desc="Current number of stargazers is ", unit="ppl") as pbar:
164+
while True:
165+
current_url: str = self.__get_url_page_template(page_number)
166+
current_stargazers: typing.List[str] = self.__extract_stargazers_from_url(current_url)
167+
if not current_stargazers:
168+
break
169+
if current_stargazers == previous_stargazers:
170+
break
171+
pbar.update(len(current_stargazers))
172+
all_stargazers += current_stargazers
173+
previous_stargazers = current_stargazers
174+
page_number += 1
160175
return sorted(all_stargazers)
161176

162177
def is_stargazer(self, user: str) -> bool:
@@ -176,3 +191,12 @@ def is_stargazer(self, user: str) -> bool:
176191
page_number += 1
177192

178193
return False
194+
195+
@property
196+
def sleep_time(self) -> float:
197+
return self._sleep
198+
199+
@sleep_time.setter
200+
def sleep_time(self, duration: float) -> None:
201+
assert isinstance(duration, (float, int))
202+
self._sleep = duration

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# pylint: disable=no-name-in-module,import-error
2-
from os import path
32
from setuptools import setup
43

54

@@ -18,7 +17,7 @@ def get_long_description():
1817

1918
setup(
2019
name='github-stargazers',
21-
#package_dir = {'': 'github-stargazers'},
20+
# package_dir = {'': 'github-stargazers'},
2221
packages=['github_stargazers'],
2322
entry_points={
2423
'console_scripts': [
@@ -29,7 +28,8 @@ def get_long_description():
2928
'beautifulsoup4>=4.6.0',
3029
'halo>=0.0.7',
3130
'click>=6.7',
32-
'requests>=2.18.4'
31+
'requests>=2.18.4',
32+
'tqdm>=4.19.6'
3333
],
3434
version=get_version(),
3535
description='List stargazers and check if a user starred that repository',

tests/test_github_stargazers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def wrong_arguments_message(halo_fail: str) -> str:
4848

4949
def verify_invoke_from_clirunner(result: Result, expected_output: str) -> None:
5050
assert result.exit_code == 0
51-
assert result.output == expected_output
51+
assert expected_output in result.output
5252

5353

5454
@responses.activate

0 commit comments

Comments
 (0)