99from pathlib import Path
1010from typing import Any , AsyncGenerator , Callable , Generator , Literal , Mapping , Optional , Sequence , Union
1111
12- import hishel
1312import httpx
1413from httpx ._types import ProxyTypes
1514from pyrate_limiter import Duration , Limiter
1615
17- from .controller import get_cache_controller
1816from .filecache .transport import CachingTransport
19- from .key_generator import file_key_generator
2017from .ratelimiter import AsyncRateLimitingTransport , RateLimitingTransport , create_rate_limiter
21- from .serializer import JSONByteSerializer
2218
2319logger = logging .getLogger (__name__ )
2420
@@ -33,8 +29,6 @@ class HttpxThrottleCache:
3329 Rate Limiting is across all connections, whether via client & async_htp_client, using pyrate_limiter. For multiprocessing, use pyrate_limiters
3430 MultiprocessBucket or SqliteBucket w/ a file lock.
3531
36- Caching is implemented via Hishel, which allows a variety of configurations, including AWS storage.
37-
3832 This function is used for all synchronous requests.
3933 """
4034
@@ -44,14 +38,12 @@ class HttpxThrottleCache:
4438
4539 cache_rules : dict [str , dict [str , Union [bool , int ]]] = field (default_factory = lambda : {})
4640 rate_limiter_enabled : bool = True
47- cache_mode : Literal [False , "Disabled" , "Hishel-S3" , "Hishel-File" , " FileCache" ] = "Hishel-File "
41+ cache_mode : Literal [False , "Disabled" , "FileCache" ] = "FileCache "
4842 request_per_sec_limit : int = 10
4943 max_delay : Duration = field (default_factory = lambda : Duration .DAY )
5044 _client : Optional [httpx .Client ] = None
5145
5246 rate_limiter : Optional [Limiter ] = None
53- s3_bucket : Optional [str ] = None
54- s3_client : Optional [Any ] = None
5547 user_agent : Optional [str ] = None
5648 user_agent_factory : Optional [Callable [[], str ]] = None
5749
@@ -62,6 +54,9 @@ class HttpxThrottleCache:
6254 proxy : Optional [ProxyTypes ] = None
6355
6456 def __post_init__ (self ):
57+ if self .cache_mode == "Hishel-File" :
58+ logger .debug ("Hishel-File is deprecated and will be removed, due to breaking API changes" )
59+ self .cache_mode = "FileCache"
6560 self .cache_dir = Path (self .cache_dir ) if isinstance (self .cache_dir , str ) else self .cache_dir
6661 # self.lock = threading.Lock()
6762
@@ -75,15 +70,14 @@ def __post_init__(self):
7570
7671 if self .cache_mode == "Disabled" or self .cache_mode is False :
7772 pass
78- elif self .cache_mode == "Hishel-S3" :
79- if self .s3_bucket is None :
80- raise ValueError ("s3_bucket must be provided if using Hishel-S3 storage" )
81- else : # Hishel-File or FileCache
73+ elif self .cache_mode == "FileCache" :
8274 if self .cache_dir is None :
8375 raise ValueError (f"cache_dir must be provided if using a file based cache: { self .cache_mode } " )
8476 else :
8577 if not self .cache_dir .exists ():
8678 self .cache_dir .mkdir ()
79+ else :
80+ raise ValueError (f"Unsupported cache_mode: { self .cache_mode } " )
8781
8882 logger .debug (
8983 "Initialized cache with cache_mode=%s, cache_dir=%s, rate_limiter_enabled=%s" ,
@@ -248,21 +242,7 @@ def _get_transport(self, bypass_cache: bool, httpx_transport_params: dict[str, A
248242 assert self .cache_dir is not None
249243 return CachingTransport (cache_dir = self .cache_dir , transport = next_transport , cache_rules = self .cache_rules )
250244 else :
251- # either Hishel-S3 or Hishel-File
252- assert self .cache_mode == "Hishel-File" or self .cache_mode == "Hishel-S3"
253- controller = get_cache_controller (key_generator = file_key_generator , cache_rules = self .cache_rules )
254-
255- if self .cache_mode == "Hishel-S3" :
256- assert self .s3_bucket is not None
257- storage = hishel .S3Storage (
258- client = self .s3_client , bucket_name = self .s3_bucket , serializer = JSONByteSerializer ()
259- )
260- else :
261- assert self .cache_mode == "Hishel-File"
262- assert self .cache_dir is not None
263- storage = hishel .FileStorage (base_path = Path (self .cache_dir ), serializer = JSONByteSerializer ())
264-
265- return hishel .CacheTransport (transport = next_transport , storage = storage , controller = controller )
245+ raise ValueError (f"Unsupported cache_mode: { self .cache_mode } " )
266246
267247 def _get_async_transport (
268248 self , bypass_cache : bool , httpx_transport_params : dict [str , Any ]
@@ -286,21 +266,7 @@ def _get_async_transport(
286266 assert self .cache_dir is not None
287267 return CachingTransport (cache_dir = self .cache_dir , transport = next_transport , cache_rules = self .cache_rules ) # pyright: ignore[reportArgumentType]
288268 else :
289- # either Hishel-S3 or Hishel-File
290- assert self .cache_mode == "Hishel-File" or self .cache_mode == "Hishel-S3"
291- controller = get_cache_controller (key_generator = file_key_generator , cache_rules = self .cache_rules )
292-
293- if self .cache_mode == "Hishel-S3" :
294- assert self .s3_bucket is not None
295- storage = hishel .AsyncS3Storage (
296- client = self .s3_client , bucket_name = self .s3_bucket , serializer = JSONByteSerializer ()
297- )
298- else :
299- assert self .cache_mode == "Hishel-File"
300- assert self .cache_dir is not None
301- storage = hishel .AsyncFileStorage (base_path = Path (self .cache_dir ), serializer = JSONByteSerializer ())
302-
303- return hishel .AsyncCacheTransport (transport = next_transport , storage = storage , controller = controller )
269+ raise ValueError (f"Unsupported cache_mode: { self .cache_mode } " )
304270
305271 def __enter__ (self ):
306272 return self
0 commit comments