Skip to content

Commit 64c94e9

Browse files
authored
Remove hishel due to breaking API changes (#36)
* feat: Removed Hishel due to major breaking changes in 0.1.7 * test fixes * bump versions * fix for build
1 parent 51cf54d commit 64c94e9

File tree

12 files changed

+1315
-776
lines changed

12 files changed

+1315
-776
lines changed

.github/workflows/build_deploy.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ jobs:
4040
uv run pytest --ignore=tests/extras
4141
- name: Smoke Test Extras - Edgar and HTTP2
4242
run: |
43-
uv pip install edgartools h2
4443
uv run pytest tests/extras/test_edgartools.py tests/extras/test_http2.py
4544
- name: Upload package to PyPI
4645
if: startsWith(github.ref, 'refs/tags/v')

httpxthrottlecache/controller.py

Lines changed: 1 addition & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
import logging
22
import re
3-
from typing import Any, Callable, Optional, Union
4-
5-
import hishel
6-
import httpcore
3+
from typing import Optional, Union
74

85
logger = logging.getLogger(__name__)
96

@@ -39,74 +36,3 @@ def get_rule_for_request(
3936

4037
return None
4138

42-
43-
def get_cache_controller(
44-
key_generator: Callable[[httpcore.Request, Optional[bytes]], str],
45-
cache_rules: dict[str, dict[str, Union[bool, int]]],
46-
**kwargs: dict[str, Any],
47-
):
48-
class EdgarController(hishel.Controller):
49-
def is_cachable(self, request: httpcore.Request, response: httpcore.Response) -> bool:
50-
if response.status not in self._cacheable_status_codes:
51-
return False
52-
53-
cache_period = get_rule_for_request(
54-
request_host=request.url.host.decode(), target=request.url.target.decode(), cache_rules=cache_rules
55-
)
56-
57-
if cache_period: # True or an Int>0
58-
return True
59-
elif cache_period is False or cache_period == 0: # Explicitly not cacheable
60-
return False
61-
else:
62-
# Fall through default caching policy
63-
super_is_cachable = super().is_cachable(request, response)
64-
logger.debug("%s is cacheable %s", request.url, super_is_cachable)
65-
return super_is_cachable
66-
67-
def construct_response_from_cache(
68-
self, request: httpcore.Request, response: httpcore.Response, original_request: httpcore.Request
69-
) -> Union[httpcore.Request, httpcore.Response, None]:
70-
if (
71-
response.status not in self._cacheable_status_codes
72-
): # pragma: no cover - would only occur if the cache was loaded then rules changed
73-
return None
74-
75-
cache_period = get_rule_for_request(
76-
request_host=request.url.host.decode(), target=request.url.target.decode(), cache_rules=cache_rules
77-
)
78-
79-
if cache_period is True:
80-
# Cache forever, never recheck
81-
logger.debug("Cache hit for %s", request.url)
82-
return response
83-
elif (
84-
cache_period is False or cache_period == 0
85-
): # pragma: no cover - would only occur if the cache was loaded then rules changed
86-
return None
87-
elif cache_period: # int
88-
max_age = cache_period
89-
90-
age_seconds = hishel._controller.get_age(response, self._clock) # pyright: ignore[reportPrivateUsage]
91-
92-
if age_seconds > max_age:
93-
logger.debug(
94-
"Request needs to be validated before using %s (age=%d, max_age=%d)",
95-
request.url,
96-
age_seconds,
97-
max_age,
98-
)
99-
self._make_request_conditional(request=request, response=response)
100-
return request
101-
else:
102-
logger.debug("Cache hit for %s (age=%d, max_age=%d)", request.url, age_seconds, max_age)
103-
return response
104-
else:
105-
logger.debug("No rules applied to %s, using default", request.url)
106-
return super().construct_response_from_cache(request, response, original_request)
107-
108-
controller = EdgarController(
109-
cacheable_methods=["GET", "POST"], cacheable_status_codes=[200], key_generator=key_generator, **kwargs
110-
)
111-
112-
return controller

httpxthrottlecache/httpxclientmanager.py

Lines changed: 9 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,12 @@
99
from pathlib import Path
1010
from typing import Any, AsyncGenerator, Callable, Generator, Literal, Mapping, Optional, Sequence, Union
1111

12-
import hishel
1312
import httpx
1413
from httpx._types import ProxyTypes
1514
from pyrate_limiter import Duration, Limiter
1615

17-
from .controller import get_cache_controller
1816
from .filecache.transport import CachingTransport
19-
from .key_generator import file_key_generator
2017
from .ratelimiter import AsyncRateLimitingTransport, RateLimitingTransport, create_rate_limiter
21-
from .serializer import JSONByteSerializer
2218

2319
logger = logging.getLogger(__name__)
2420

@@ -33,8 +29,6 @@ class HttpxThrottleCache:
3329
Rate Limiting is across all connections, whether via client & async_htp_client, using pyrate_limiter. For multiprocessing, use pyrate_limiters
3430
MultiprocessBucket or SqliteBucket w/ a file lock.
3531
36-
Caching is implemented via Hishel, which allows a variety of configurations, including AWS storage.
37-
3832
This function is used for all synchronous requests.
3933
"""
4034

@@ -44,14 +38,12 @@ class HttpxThrottleCache:
4438

4539
cache_rules: dict[str, dict[str, Union[bool, int]]] = field(default_factory=lambda: {})
4640
rate_limiter_enabled: bool = True
47-
cache_mode: Literal[False, "Disabled", "Hishel-S3", "Hishel-File", "FileCache"] = "Hishel-File"
41+
cache_mode: Literal[False, "Disabled", "FileCache"] = "FileCache"
4842
request_per_sec_limit: int = 10
4943
max_delay: Duration = field(default_factory=lambda: Duration.DAY)
5044
_client: Optional[httpx.Client] = None
5145

5246
rate_limiter: Optional[Limiter] = None
53-
s3_bucket: Optional[str] = None
54-
s3_client: Optional[Any] = None
5547
user_agent: Optional[str] = None
5648
user_agent_factory: Optional[Callable[[], str]] = None
5749

@@ -62,6 +54,9 @@ class HttpxThrottleCache:
6254
proxy: Optional[ProxyTypes] = None
6355

6456
def __post_init__(self):
57+
if self.cache_mode == "Hishel-File":
58+
logger.debug("Hishel-File is deprecated and will be removed, due to breaking API changes")
59+
self.cache_mode = "FileCache"
6560
self.cache_dir = Path(self.cache_dir) if isinstance(self.cache_dir, str) else self.cache_dir
6661
# self.lock = threading.Lock()
6762

@@ -75,15 +70,14 @@ def __post_init__(self):
7570

7671
if self.cache_mode == "Disabled" or self.cache_mode is False:
7772
pass
78-
elif self.cache_mode == "Hishel-S3":
79-
if self.s3_bucket is None:
80-
raise ValueError("s3_bucket must be provided if using Hishel-S3 storage")
81-
else: # Hishel-File or FileCache
73+
elif self.cache_mode == "FileCache":
8274
if self.cache_dir is None:
8375
raise ValueError(f"cache_dir must be provided if using a file based cache: {self.cache_mode}")
8476
else:
8577
if not self.cache_dir.exists():
8678
self.cache_dir.mkdir()
79+
else:
80+
raise ValueError(f"Unsupported cache_mode: {self.cache_mode}")
8781

8882
logger.debug(
8983
"Initialized cache with cache_mode=%s, cache_dir=%s, rate_limiter_enabled=%s",
@@ -248,21 +242,7 @@ def _get_transport(self, bypass_cache: bool, httpx_transport_params: dict[str, A
248242
assert self.cache_dir is not None
249243
return CachingTransport(cache_dir=self.cache_dir, transport=next_transport, cache_rules=self.cache_rules)
250244
else:
251-
# either Hishel-S3 or Hishel-File
252-
assert self.cache_mode == "Hishel-File" or self.cache_mode == "Hishel-S3"
253-
controller = get_cache_controller(key_generator=file_key_generator, cache_rules=self.cache_rules)
254-
255-
if self.cache_mode == "Hishel-S3":
256-
assert self.s3_bucket is not None
257-
storage = hishel.S3Storage(
258-
client=self.s3_client, bucket_name=self.s3_bucket, serializer=JSONByteSerializer()
259-
)
260-
else:
261-
assert self.cache_mode == "Hishel-File"
262-
assert self.cache_dir is not None
263-
storage = hishel.FileStorage(base_path=Path(self.cache_dir), serializer=JSONByteSerializer())
264-
265-
return hishel.CacheTransport(transport=next_transport, storage=storage, controller=controller)
245+
raise ValueError(f"Unsupported cache_mode: {self.cache_mode}")
266246

267247
def _get_async_transport(
268248
self, bypass_cache: bool, httpx_transport_params: dict[str, Any]
@@ -286,21 +266,7 @@ def _get_async_transport(
286266
assert self.cache_dir is not None
287267
return CachingTransport(cache_dir=self.cache_dir, transport=next_transport, cache_rules=self.cache_rules) # pyright: ignore[reportArgumentType]
288268
else:
289-
# either Hishel-S3 or Hishel-File
290-
assert self.cache_mode == "Hishel-File" or self.cache_mode == "Hishel-S3"
291-
controller = get_cache_controller(key_generator=file_key_generator, cache_rules=self.cache_rules)
292-
293-
if self.cache_mode == "Hishel-S3":
294-
assert self.s3_bucket is not None
295-
storage = hishel.AsyncS3Storage(
296-
client=self.s3_client, bucket_name=self.s3_bucket, serializer=JSONByteSerializer()
297-
)
298-
else:
299-
assert self.cache_mode == "Hishel-File"
300-
assert self.cache_dir is not None
301-
storage = hishel.AsyncFileStorage(base_path=Path(self.cache_dir), serializer=JSONByteSerializer())
302-
303-
return hishel.AsyncCacheTransport(transport=next_transport, storage=storage, controller=controller)
269+
raise ValueError(f"Unsupported cache_mode: {self.cache_mode}")
304270

305271
def __enter__(self):
306272
return self

httpxthrottlecache/key_generator.py

Lines changed: 0 additions & 22 deletions
This file was deleted.

httpxthrottlecache/serializer.py

Lines changed: 0 additions & 125 deletions
This file was deleted.

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,11 @@ keywords = [
1111
"sec",
1212
"httpx",
1313
"pyrate_limiter",
14-
"hishel"
1514
]
1615
dependencies = [
1716
"aiofiles>=24.1.0",
1817
"filelock>=3.18.0",
19-
"hishel>=0.1.3",
18+
"httpx>=0.28.1",
2019
"pyrate-limiter>=3.9.0",
2120
]
2221
classifiers = [
@@ -31,7 +30,8 @@ classifiers = [
3130

3231
[dependency-groups]
3332
dev = [
34-
"hishel[s3]>=0.1.3",
33+
"edgartools>=4.34.1; platform_python_implementation == 'CPython'",
34+
"h2>=4.3.0",
3535
"pre-commit-uv>=4.1.4",
3636
"pylint>=3.3.8",
3737
"pytest>=8.4.1",

0 commit comments

Comments
 (0)