File tree Expand file tree Collapse file tree 2 files changed +5
-2
lines changed
scripts/data_collector/us_index Expand file tree Collapse file tree 2 files changed +5
-2
lines changed Original file line number Diff line number Diff line change 1414import pandas as pd
1515from tqdm import tqdm
1616from loguru import logger
17+ from fake_useragent import UserAgent
1718
1819
1920CUR_DIR = Path (__file__ ).resolve ().parent
@@ -52,6 +53,7 @@ def __init__(
5253 )
5354
5455 self ._target_url = f"{ WIKI_URL } /{ WIKI_INDEX_NAME_MAP [self .index_name .upper ()]} "
56+ self ._ua = UserAgent ()
5557
5658 @property
5759 @abc .abstractmethod
@@ -114,7 +116,7 @@ def calendar_list(self) -> List[pd.Timestamp]:
114116
115117 def _request_new_companies (self ) -> requests .Response :
116118 headers = {
117- 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
119+ 'User-Agent' : self . _ua . random
118120 }
119121 resp = requests .get (self ._target_url , timeout = None , headers = headers )
120122 if resp .status_code != 200 :
@@ -232,7 +234,7 @@ def get_changes(self) -> pd.DataFrame:
232234 # NOTE: may update the index of the table
233235 # Add headers to avoid 403 Forbidden error from Wikipedia
234236 headers = {
235- 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
237+ 'User-Agent' : self . _ua . random
236238 }
237239 response = requests .get (self .WIKISP500_CHANGES_URL , headers = headers )
238240 response .raise_for_status ()
Original file line number Diff line number Diff line change 33pandas
44lxml
55loguru
6+ fake-useragent
You can’t perform that action at this time.
0 commit comments