22from datetime import datetime , timedelta
33from io import BytesIO
44from itertools import islice
5+ from typing import Dict , List , Optional , Tuple , Any
56
67import requests
78import os
1213
1314VERSION = '0.8.0'
1415
15- class TrancoList ():
16- def __init__ (self , date , list_id , lst ):
17- self .date = date
18- self .list_id = list_id
19- self .list_page = "https://tranco-list.eu/list/{}/1000000" .format (list_id )
20- self .list = {domain : index for index , domain in enumerate (lst , start = 1 )}
2116
22- def top (self , num = 1000000 ):
17+ class TrancoList :
18+ def __init__ (self , date : str , list_id : str , lst : List [str ]) -> None :
19+ self .date : str = date
20+ self .list_id : str = list_id
21+ self .list_page : str = "https://tranco-list.eu/list/{}/" .format (list_id )
22+ self .list : Dict [str , int ] = {domain : index for index , domain in enumerate (lst , start = 1 )}
23+
24+ def top (self , num : int = 1000000 ) -> List [str ]:
2325 return sorted (self .list , key = self .list .get )[:num ]
2426
25- def rank (self , domain ) :
27+ def rank (self , domain : str ) -> int :
2628 return self .list .get (domain , - 1 )
2729
30+
2831class TrancoCacheType (IntEnum ):
2932 NOT_CACHED = 0
3033 CACHED_NOT_FULL = 1
3134 CACHED_FULL = 2
3235
33- class Tranco ():
34- def __init__ (self , ** kwargs ):
36+
37+ class Tranco :
38+ def __init__ (self , ** kwargs ) -> None :
3539 """
3640 :param kwargs:
3741 cache_dir: <str> directory used to cache Tranco top lists, default: cwd + .tranco/
@@ -40,63 +44,66 @@ def __init__(self, **kwargs):
4044 """
4145
4246 # Caching is required.
43- self .cache_dir = kwargs .get ('cache_dir' , None )
47+ self .cache_dir : Optional [ str ] = kwargs .get ('cache_dir' , None )
4448 if self .cache_dir is None :
4549 cwd = os .getcwd ()
4650 self .cache_dir = os .path .join (cwd , '.tranco' )
4751 if not os .path .exists (self .cache_dir ):
4852 os .mkdir (self .cache_dir )
49- self .cache_metadata = {}
53+ self .cache_metadata : Dict [ str , TrancoCacheType ] = {}
5054 self ._load_cache_metadata ()
5155
52- self .account_email = kwargs .get ('account_email' )
53- self .api_key = kwargs .get ('api_key' )
56+ self .account_email : str = kwargs .get ('account_email' )
57+ self .api_key : str = kwargs .get ('api_key' )
5458
55- self .session = requests .Session ()
56- self .session .headers .update ({'User-Agent' : 'Python/{} python-requests/{} tranco-python/{}' .format (platform .python_version (), requests .__version__ , VERSION )})
59+ self .session : requests .Session = requests .Session ()
60+ self .session .headers .update ({'User-Agent' : 'Python/{} python-requests/{} tranco-python/{}' .format (
61+ platform .python_version (), requests .__version__ , VERSION )})
5762
58- def _cache_metadata_path (self ):
63+ def _cache_metadata_path (self ) -> str :
5964 return os .path .join (self .cache_dir , 'metadata.json' )
6065
61- def _cache_path (self , list_id ):
66+ def _cache_path (self , list_id ) -> str :
6267 return os .path .join (self .cache_dir , '{}.csv' .format (list_id ))
6368
64- def _load_cache_metadata (self ):
69+ def _load_cache_metadata (self ) -> None :
6570 if not os .path .exists (self ._cache_metadata_path ()):
6671 self ._write_cache_metadata ()
6772 with open (self ._cache_metadata_path (), "rt" ) as f :
6873 self .cache_metadata = json .load (f )
6974
70- def _write_cache_metadata (self ):
75+ def _write_cache_metadata (self ) -> None :
7176 with open (self ._cache_metadata_path (), 'wt' ) as f :
7277 json .dump (self .cache_metadata , f )
7378
74- def _get_list_cache (self , list_id ):
79+ def _get_list_cache (self , list_id ) -> TrancoCacheType :
7580 return self .cache_metadata .get (list_id , TrancoCacheType .NOT_CACHED )
7681
77- def _is_cached (self , list_id , full = False ):
82+ def _is_cached (self , list_id : Optional [ str ] , full : bool = False ) -> bool :
7883 if not list_id :
7984 raise ValueError ("You must pass a list ID to cache a list." )
80- list_cache = self ._get_list_cache (list_id )
85+ list_cache : TrancoCacheType = self ._get_list_cache (list_id )
8186 if list_cache == TrancoCacheType .NOT_CACHED :
8287 return False
8388
8489 if full and (list_cache == TrancoCacheType .CACHED_NOT_FULL ): # need full, but full not present
8590 return False
8691 return True
8792
88- def _add_to_cache (self , list_id = None , full = False ):
93+ def _add_to_cache (self , list_id : Optional [ str ] = None , full : bool = False ) -> None :
8994 if not list_id :
9095 raise ValueError ("You must pass a list ID to cache a list." )
91- self .cache_metadata [list_id ] = max (TrancoCacheType .CACHED_FULL if full else TrancoCacheType .CACHED_NOT_FULL , self ._get_list_cache (list_id ))
96+ self .cache_metadata [list_id ] = max (TrancoCacheType .CACHED_FULL if full else TrancoCacheType .CACHED_NOT_FULL ,
97+ self ._get_list_cache (list_id ))
9298 self ._write_cache_metadata ()
9399
94- def clear_cache (self ):
100+ def clear_cache (self ) -> None :
95101 for f in os .listdir (self .cache_dir ):
96102 os .remove (os .path .join (self .cache_dir , f ))
97103 self ._load_cache_metadata ()
98104
99- def list (self , date = None , list_id = None , subdomains = False , full = False ):
105+ def list (self , date : Optional [str ] = None , list_id : Optional [str ] = None , subdomains : bool = False ,
106+ full : bool = False ) -> TrancoList :
100107 """
101108 Retrieve a Tranco top list.
102109 :param date: Get the daily list for this date. If not given, the latest list is returned.
@@ -127,21 +134,22 @@ def list(self, date=None, list_id=None, subdomains=False, full=False):
127134
128135 return TrancoList (date , list_id , list (map (lambda x : x [x .index (',' ) + 1 :], top_list_lines )))
129136
130- def _get_list_id_for_date (self , date , subdomains = False ):
131- r1 = self .session .get ('https://tranco-list.eu/daily_list_id?date={}&subdomains={}' .format (date , str (subdomains ).lower ()))
137+ def _get_list_id_for_date (self , date : str , subdomains : bool = False ) -> str :
138+ r1 = self .session .get (
139+ 'https://tranco-list.eu/daily_list_id?date={}&subdomains={}' .format (date , str (subdomains ).lower ()))
132140 if r1 .status_code == 200 :
133141 return r1 .text
134142 else :
135143 raise AttributeError ("The daily list for this date is currently unavailable." )
136144
137- def _download_file (self , list_id , full = False ):
145+ def _download_file (self , list_id : str , full : bool = False ) -> None :
138146 if full :
139147 self ._download_full_file (list_id )
140148 else :
141149 self ._download_zip_file (list_id )
142150 self ._add_to_cache (list_id , full )
143151
144- def _download_zip_file (self , list_id ) :
152+ def _download_zip_file (self , list_id : str ) -> None :
145153 download_url = 'https://tranco-list.eu/download_daily/{}' .format (list_id )
146154 r = self .session .get (download_url , stream = True )
147155 if r .status_code == 200 :
@@ -167,15 +175,15 @@ def _download_zip_file(self, list_id):
167175 # List unavailable (non-success status code)
168176 raise AttributeError ("The daily list for this date is currently unavailable." )
169177
170- def _download_full_file (self , list_id ) :
178+ def _download_full_file (self , list_id : str ) -> None :
171179 download_url = 'https://tranco-list.eu/download/{}/full' .format (list_id )
172180 r = self .session .get (download_url )
173181 if r .status_code == 200 :
174182 file_bytes = r .content
175183 with open (self ._cache_path (list_id ), 'wb' ) as f :
176184 f .write (file_bytes )
177185
178- def configure (self , configuration ) :
186+ def configure (self , configuration : Dict [ str , Any ]) -> Tuple [ bool , str ] :
179187 """
180188 Configure a custom list (https://tranco-list.eu/configure).
181189 Requires that valid credentials were passed when creating the `Tranco` object.
@@ -212,7 +220,7 @@ def configure(self, configuration):
212220 elif r .status_code == 403 or r .status_code == 502 or r .status_code == 503 :
213221 raise ValueError ("This service is temporarily unavailable." )
214222
215- def list_metadata (self , list_id ) :
223+ def list_metadata (self , list_id : str ) -> Dict [ str , Any ] :
216224 """
217225 Retrieve metadata for list (whether it is already available, what its configuration is, ...)
218226 :param list_id: ID of the list for which to query metadata
0 commit comments