Add progress bar, sleep to avoid too many requests and style fixes

egor · egor · commit b05ef91c4513 · 2018-10-17T01:51:06.000+02:00
Signed-off-by: egor &lt;egor@sourced.tech&gt;
diff --git a/github_stargazers/github.py b/github_stargazers/github.py
@@ -1,10 +1,12 @@
 import os
 import re
+import time
 import typing
 
 from bs4 import BeautifulSoup
 from bs4 import element
 import requests
+from tqdm import tqdm
 
 
 class UsernameRepositoryError(ValueError):
@@ -59,19 +61,30 @@ class GitHub:
     __GITHUB_URL: str = "https://github.com"
     __STARGAZERS_URL_SUFFIX: str = "/stargazers"
     __PAGE_SUFFIX: str = "?page="
-    __MARK_END_OF_STARGAZERS: str = 'This repository has no more stargazers.'
+    __MARK_END_OF_STARGAZERS: str = "This repository has no more stargazers."
 
     __OK_STATUS_CODE: int = 200
     __TOO_MANY_REQUESTS_STATUS_CODE: int = 429
     __NOT_FOUND_STATUS_CODE: int = 404
 
+    _sleep: float = 2  # sleep time in seconds to avoid "too many requests error"
+
     def __init__(self, username_and_repository: str) -> None:
         self.__username, self.__repository = GitHub.__extract_user_and_repo(username_and_repository)
         self.__repository_url: str = self.__get_repository_url()
         self.__stargazers_base_url: str = self.__repository_url + self.__STARGAZERS_URL_SUFFIX
 
+    @classmethod
+    def __check_username_and_repository(cls, username_and_repository: str) -> str:
+        if username_and_repository.startswith("https://github.com/"):
+            return username_and_repository[19:]
+        elif username_and_repository.startswith("github.com/"):
+            return username_and_repository[11:]
+        return username_and_repository
+
     @classmethod
     def __extract_user_and_repo(cls, username_and_repository: str) -> typing.Optional[typing.Tuple[str, str]]:
+        username_and_repository = cls.__check_username_and_repository(username_and_repository)
         components: typing.List[str] = username_and_repository.split("/")
         if len(components) != 2:
             raise UsernameRepositoryError()
@@ -85,7 +98,8 @@ def __get_repository_url(self) -> str:
         return os.path.join(self.__GITHUB_URL, self.__username, self.__repository)
 
     def __get_soup(self, url: str) -> BeautifulSoup:
-        response: requests.Response = requests.get(url, headers={'Content-Type': 'text/html'})
+        time.sleep(self._sleep)  # to avoid __TOO_MANY_REQUESTS_STATUS_CODE
+        response: requests.Response = requests.get(url, headers={"Content-Type": "text/html"})
 
         status_code: int = response.status_code
         if status_code == self.__OK_STATUS_CODE:
@@ -98,7 +112,7 @@ def __get_soup(self, url: str) -> BeautifulSoup:
 
     def __extract_stargazers_from_url(self, url: str) -> typing.List[str]:
         soup: typing.Optional[BeautifulSoup] = self.__get_soup(url)
-        h3_components: element.ResultSet = soup.find_all('h3')
+        h3_components: element.ResultSet = soup.find_all("h3")
 
         def _check_hyperlink_component(component: element.Tag) -> None:
             """Check the BeautifulSoup `element.Tag` component that receives a hyperlink HTML tag.
@@ -114,20 +128,20 @@ def _check_hyperlink_component(component: element.Tag) -> None:
 
             If any of the above mentioned is missing or not in the expected form, an Exception is raised.
             """
-            hyperlink_component: typing.Optional[element.Tag] = component.find('a')
+            hyperlink_component: typing.Optional[element.Tag] = component.find("a")
             if not hyperlink_component:
                 raise MissingHyperlinkTagError()
-            if not hyperlink_component.get('href'):
+            if not hyperlink_component.get("href"):
                 raise MissingHrefAttributeError()
-            href_content: str = hyperlink_component['href']
+            href_content: str = hyperlink_component["href"]
             if not re.match(r"/.+$", href_content):
                 raise HrefContentError(href_content)
 
         def _extract_username_from_h3(component: element.Tag) -> typing.Optional[str]:
             if component.get_text() == self.__MARK_END_OF_STARGAZERS:
                 return None
             _check_hyperlink_component(component)
-            return component.a['href'][1:]  # dropping the first '/' character
+            return component.a["href"][1:]  # dropping the first '/' character
 
         users: typing.List[str] = []
         for component in h3_components:
@@ -146,17 +160,18 @@ def get_all_stargazers(self) -> typing.List[str]:
 
         all_stargazers: typing.List[str] = []
         previous_stargazers: typing.List[str] = []
-        while True:
-            current_url: str = self.__get_url_page_template(page_number)
-            current_stargazers: typing.List[str] = self.__extract_stargazers_from_url(current_url)
-            if not current_stargazers:
-                break
-            if current_stargazers == previous_stargazers:
-                break
-            all_stargazers += current_stargazers
-            previous_stargazers = current_stargazers
-            page_number += 1
-
+        with tqdm(desc="Current number of stargazers is ", unit="ppl") as pbar:
+            while True:
+                current_url: str = self.__get_url_page_template(page_number)
+                current_stargazers: typing.List[str] = self.__extract_stargazers_from_url(current_url)
+                if not current_stargazers:
+                    break
+                if current_stargazers == previous_stargazers:
+                    break
+                pbar.update(len(current_stargazers))
+                all_stargazers += current_stargazers
+                previous_stargazers = current_stargazers
+                page_number += 1
         return sorted(all_stargazers)
 
     def is_stargazer(self, user: str) -> bool:
@@ -176,3 +191,12 @@ def is_stargazer(self, user: str) -> bool:
             page_number += 1
 
         return False
+
+    @property
+    def sleep_time(self) -> float:
+        return self._sleep
+
+    @sleep_time.setter
+    def sleep_time(self, duration: float) -> None:
+        assert isinstance(duration, (float, int))
+        self._sleep = duration
diff --git a/setup.py b/setup.py
@@ -1,5 +1,4 @@
 # pylint: disable=no-name-in-module,import-error
-from os import path
 from setuptools import setup
 
 
@@ -18,7 +17,7 @@ def get_long_description():
 
 setup(
     name='github-stargazers',
-    #package_dir = {'': 'github-stargazers'},
+    # package_dir = {'': 'github-stargazers'},
     packages=['github_stargazers'],
     entry_points={
         'console_scripts': [
@@ -29,7 +28,8 @@ def get_long_description():
         'beautifulsoup4>=4.6.0',
         'halo>=0.0.7',
         'click>=6.7',
-        'requests>=2.18.4'
+        'requests>=2.18.4',
+        'tqdm>=4.19.6'
     ],
     version=get_version(),
     description='List stargazers and check if a user starred that repository',
diff --git a/tests/test_github_stargazers.py b/tests/test_github_stargazers.py
@@ -48,7 +48,7 @@ def wrong_arguments_message(halo_fail: str) -> str:
 
 def verify_invoke_from_clirunner(result: Result, expected_output: str) -> None:
     assert result.exit_code == 0
-    assert result.output == expected_output
+    assert expected_output in result.output
 
 
 @responses.activate