@@ -40,6 +40,7 @@ def __init__(self, settings: BaseSettings) -> None:
4040 self ._async_thread : AsyncThread | None = None
4141
4242 def open_spider (self , spider : Spider ) -> None :
43+ """Open the cache storage for a spider."""
4344 logger .debug ('Using Apify key value cache storage' , extra = {'spider' : spider })
4445 self .spider = spider
4546 self ._fingerprinter = spider .crawler .request_fingerprinter
@@ -57,16 +58,19 @@ async def open_kv() -> KeyValueStore:
5758 logger .debug (f"Opening cache storage's { kv_name !r} key value store" )
5859 self ._kv = self ._async_thread .run_coro (open_kv ())
5960
60- def close_spider (self , spider : Spider , current_time : int | None = None ) -> None :
61- assert self ._async_thread is not None , 'Async thread not initialized'
61+ def close_spider (self , _ : Spider , current_time : int | None = None ) -> None :
62+ """Close the cache storage for a spider."""
63+ if self ._async_thread is None :
64+ raise ValueError ('Async thread not initialized' )
6265
6366 logger .info (f'Cleaning up cache items (max { self .expiration_max_items } )' )
64- if 0 < self .expiration_secs :
67+ if self .expiration_secs > 0 :
6568 if current_time is None :
6669 current_time = int (time ())
6770
6871 async def expire_kv () -> None :
69- assert self ._kv is not None , 'Key value store not initialized'
72+ if self ._kv is None :
73+ raise ValueError ('Key value store not initialized' )
7074 i = 0
7175 async for item in self ._kv .iterate_keys ():
7276 value = await self ._kv .get_value (item .key )
@@ -97,10 +101,14 @@ async def expire_kv() -> None:
97101 finally :
98102 logger .debug ('Cache storage closed' )
99103
100- def retrieve_response (self , spider : Spider , request : Request , current_time : int | None = None ) -> Response | None :
101- assert self ._async_thread is not None , 'Async thread not initialized'
102- assert self ._kv is not None , 'Key value store not initialized'
103- assert self ._fingerprinter is not None , 'Request fingerprinter not initialized'
104+ def retrieve_response (self , _ : Spider , request : Request , current_time : int | None = None ) -> Response | None :
105+ """Retrieve a response from the cache storage."""
106+ if self ._async_thread is None :
107+ raise ValueError ('Async thread not initialized' )
108+ if self ._kv is None :
109+ raise ValueError ('Key value store not initialized' )
110+ if self ._fingerprinter is None :
111+ raise ValueError ('Request fingerprinter not initialized' )
104112
105113 key = self ._fingerprinter .fingerprint (request ).hex ()
106114 value = self ._async_thread .run_coro (self ._kv .get_value (key ))
@@ -125,10 +133,14 @@ def retrieve_response(self, spider: Spider, request: Request, current_time: int
125133 logger .debug ('Cache hit' , extra = {'request' : request })
126134 return respcls (url = url , headers = headers , status = status , body = body )
127135
128- def store_response (self , spider : Spider , request : Request , response : Response ) -> None :
129- assert self ._async_thread is not None , 'Async thread not initialized'
130- assert self ._kv is not None , 'Key value store not initialized'
131- assert self ._fingerprinter is not None , 'Request fingerprinter not initialized'
136+ def store_response (self , _ : Spider , request : Request , response : Response ) -> None :
137+ """Store a response in the cache storage."""
138+ if self ._async_thread is None :
139+ raise ValueError ('Async thread not initialized' )
140+ if self ._kv is None :
141+ raise ValueError ('Key value store not initialized' )
142+ if self ._fingerprinter is None :
143+ raise ValueError ('Request fingerprinter not initialized' )
132144
133145 key = self ._fingerprinter .fingerprint (request ).hex ()
134146 data = {
@@ -143,20 +155,21 @@ def store_response(self, spider: Spider, request: Request, response: Response) -
143155
144156def to_gzip (data : dict , mtime : int | None = None ) -> bytes :
145157 """Dump a dictionary to a gzip-compressed byte stream."""
146- with io .BytesIO () as byte_stream :
147- with gzip .GzipFile (fileobj = byte_stream , mode = 'wb' , mtime = mtime ) as gzip_file :
148- pickle .dump (data , gzip_file , protocol = 4 )
149- return byte_stream .getvalue ()
158+ with io .BytesIO () as byte_stream , gzip .GzipFile (fileobj = byte_stream , mode = 'wb' , mtime = mtime ) as gzip_file :
159+ pickle .dump (data , gzip_file , protocol = 4 )
160+ return byte_stream .getvalue ()
150161
151162
152163def from_gzip (gzip_bytes : bytes ) -> dict :
153164 """Load a dictionary from a gzip-compressed byte stream."""
154165 with io .BytesIO (gzip_bytes ) as byte_stream , gzip .GzipFile (fileobj = byte_stream , mode = 'rb' ) as gzip_file :
155- return pickle .load (gzip_file )
166+ data : dict = pickle .load (gzip_file )
167+ return data
156168
157169
158170def read_gzip_time (gzip_bytes : bytes ) -> int :
159171 """Read the modification time from a gzip-compressed byte stream without decompressing the data."""
160172 header = gzip_bytes [:10 ]
161173 header_components = struct .unpack ('<HBBI2B' , header )
162- return header_components [3 ]
174+ mtime : int = header_components [3 ]
175+ return mtime
0 commit comments