Skip to content

Commit e36508d

Browse files
committed
style: format code
1 parent 381c044 commit e36508d

File tree

2 files changed

+42
-48
lines changed

2 files changed

+42
-48
lines changed

src/apify/scrapy/cache.py

Lines changed: 36 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -33,25 +33,23 @@ class ApifyCacheStorage:
3333

3434
def __init__(self, settings: BaseSettings) -> None:
3535
self.expiration_max_items = 100
36-
self.expiration_secs: int = settings.getint("HTTPCACHE_EXPIRATION_SECS")
36+
self.expiration_secs: int = settings.getint('HTTPCACHE_EXPIRATION_SECS')
3737
self.spider: Spider | None = None
3838
self._kv: KeyValueStore | None = None
3939
self._fingerprinter: RequestFingerprinterProtocol | None = None
4040
self._async_thread: AsyncThread | None = None
4141

4242
def open_spider(self, spider: Spider) -> None:
43-
logger.debug("Using Apify key value cache storage", extra={"spider": spider})
43+
logger.debug('Using Apify key value cache storage', extra={'spider': spider})
4444
self.spider = spider
4545
self._fingerprinter = spider.crawler.request_fingerprinter
46-
kv_name = f"httpcache-{spider.name}"
46+
kv_name = f'httpcache-{spider.name}'
4747

4848
async def open_kv() -> KeyValueStore:
4949
config = Configuration.get_global_configuration()
5050
if config.is_at_home:
5151
storage_client = ApifyStorageClient.from_config(config)
52-
return await KeyValueStore.open(
53-
name=kv_name, storage_client=storage_client
54-
)
52+
return await KeyValueStore.open(name=kv_name, storage_client=storage_client)
5553
return await KeyValueStore.open(name=kv_name)
5654

5755
logger.debug("Starting background thread for cache storage's event loop")
@@ -60,88 +58,84 @@ async def open_kv() -> KeyValueStore:
6058
self._kv = self._async_thread.run_coro(open_kv())
6159

6260
def close_spider(self, spider: Spider, current_time: int | None = None) -> None:
63-
assert self._async_thread is not None, "Async thread not initialized"
61+
assert self._async_thread is not None, 'Async thread not initialized'
6462

65-
logger.info(f"Cleaning up cache items (max {self.expiration_max_items})")
63+
logger.info(f'Cleaning up cache items (max {self.expiration_max_items})')
6664
if 0 < self.expiration_secs:
6765
if current_time is None:
6866
current_time = int(time())
6967

7068
async def expire_kv() -> None:
71-
assert self._kv is not None, "Key value store not initialized"
69+
assert self._kv is not None, 'Key value store not initialized'
7270
i = 0
7371
async for item in self._kv.iterate_keys():
7472
value = await self._kv.get_value(item.key)
7573
try:
7674
gzip_time = read_gzip_time(value)
7775
except Exception as e:
78-
logger.warning(f"Malformed cache item {item.key}: {e}")
76+
logger.warning(f'Malformed cache item {item.key}: {e}')
7977
await self._kv.set_value(item.key, None)
8078
else:
8179
if self.expiration_secs < current_time - gzip_time:
82-
logger.debug(f"Expired cache item {item.key}")
80+
logger.debug(f'Expired cache item {item.key}')
8381
await self._kv.set_value(item.key, None)
8482
else:
85-
logger.debug(f"Valid cache item {item.key}")
83+
logger.debug(f'Valid cache item {item.key}')
8684
if i == self.expiration_max_items:
8785
break
8886
i += 1
8987

9088
self._async_thread.run_coro(expire_kv())
9189

92-
logger.debug("Closing cache storage")
90+
logger.debug('Closing cache storage')
9391
try:
9492
self._async_thread.close()
9593
except KeyboardInterrupt:
96-
logger.warning("Shutdown interrupted by KeyboardInterrupt!")
94+
logger.warning('Shutdown interrupted by KeyboardInterrupt!')
9795
except Exception:
98-
logger.exception("Exception occurred while shutting down cache storage")
96+
logger.exception('Exception occurred while shutting down cache storage')
9997
finally:
100-
logger.debug("Cache storage closed")
98+
logger.debug('Cache storage closed')
10199

102-
def retrieve_response(
103-
self, spider: Spider, request: Request, current_time: int | None = None
104-
) -> Response | None:
105-
assert self._async_thread is not None, "Async thread not initialized"
106-
assert self._kv is not None, "Key value store not initialized"
107-
assert self._fingerprinter is not None, "Request fingerprinter not initialized"
100+
def retrieve_response(self, spider: Spider, request: Request, current_time: int | None = None) -> Response | None:
101+
assert self._async_thread is not None, 'Async thread not initialized'
102+
assert self._kv is not None, 'Key value store not initialized'
103+
assert self._fingerprinter is not None, 'Request fingerprinter not initialized'
108104

109105
key = self._fingerprinter.fingerprint(request).hex()
110106
value = self._async_thread.run_coro(self._kv.get_value(key))
111107

112108
if value is None:
113-
logger.debug("Cache miss", extra={"request": request})
109+
logger.debug('Cache miss', extra={'request': request})
114110
return None
115111

116112
if current_time is None:
117113
current_time = int(time())
118114
if 0 < self.expiration_secs < current_time - read_gzip_time(value):
119-
logger.debug("Cache expired", extra={"request": request})
115+
logger.debug('Cache expired', extra={'request': request})
120116
return None
121117

122118
data = from_gzip(value)
123-
url = data["url"]
124-
status = data["status"]
125-
headers = Headers(data["headers"])
126-
body = data["body"]
119+
url = data['url']
120+
status = data['status']
121+
headers = Headers(data['headers'])
122+
body = data['body']
127123
respcls = responsetypes.from_args(headers=headers, url=url, body=body)
128124

129-
logger.debug("Cache hit", extra={"request": request})
125+
logger.debug('Cache hit', extra={'request': request})
130126
return respcls(url=url, headers=headers, status=status, body=body)
131127

132-
def store_response(
133-
self, spider: Spider, request: Request, response: Response
134-
) -> None:
135-
assert self._async_thread is not None, "Async thread not initialized"
136-
assert self._kv is not None, "Key value store not initialized"
137-
assert self._fingerprinter is not None, "Request fingerprinter not initialized"
128+
def store_response(self, spider: Spider, request: Request, response: Response) -> None:
129+
assert self._async_thread is not None, 'Async thread not initialized'
130+
assert self._kv is not None, 'Key value store not initialized'
131+
assert self._fingerprinter is not None, 'Request fingerprinter not initialized'
138132

139133
key = self._fingerprinter.fingerprint(request).hex()
140134
data = {
141-
"status": response.status,
142-
"url": response.url,
143-
"headers": dict(response.headers),
144-
"body": response.body,
135+
'status': response.status,
136+
'url': response.url,
137+
'headers': dict(response.headers),
138+
'body': response.body,
145139
}
146140
value = to_gzip(data)
147141
self._async_thread.run_coro(self._kv.set_value(key, value))
@@ -150,19 +144,19 @@ def store_response(
150144
def to_gzip(data: dict, mtime: int | None = None) -> bytes:
151145
"""Dump a dictionary to a gzip-compressed byte stream."""
152146
with io.BytesIO() as byte_stream:
153-
with gzip.GzipFile(fileobj=byte_stream, mode="wb", mtime=mtime) as gzip_file:
147+
with gzip.GzipFile(fileobj=byte_stream, mode='wb', mtime=mtime) as gzip_file:
154148
pickle.dump(data, gzip_file, protocol=4)
155149
return byte_stream.getvalue()
156150

157151

158152
def from_gzip(gzip_bytes: bytes) -> dict:
159153
"""Load a dictionary from a gzip-compressed byte stream."""
160-
with io.BytesIO(gzip_bytes) as byte_stream, gzip.GzipFile(fileobj=byte_stream, mode="rb") as gzip_file:
154+
with io.BytesIO(gzip_bytes) as byte_stream, gzip.GzipFile(fileobj=byte_stream, mode='rb') as gzip_file:
161155
return pickle.load(gzip_file)
162156

163157

164158
def read_gzip_time(gzip_bytes: bytes) -> int:
165159
"""Read the modification time from a gzip-compressed byte stream without decompressing the data."""
166160
header = gzip_bytes[:10]
167-
header_components = struct.unpack("<HBBI2B", header)
161+
header_components = struct.unpack('<HBBI2B', header)
168162
return header_components[3]

tests/unit/scrapy/test_cache.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,25 @@
33
from apify.scrapy.cache import from_gzip, read_gzip_time, to_gzip
44

55
FIXTURE_BYTES = (
6-
b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xffk`\x99*\xcc\x00\x01\xb5SzX\xf2\x12s"
7-
b"S\xa7\xf4\xb0:\xe6d&\xa7N)\xd6\x03\x00\x1c\xe8U\x9c\x1e\x00\x00\x00"
6+
b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xffk`\x99*\xcc\x00\x01\xb5SzX\xf2\x12s'
7+
b'S\xa7\xf4\xb0:\xe6d&\xa7N)\xd6\x03\x00\x1c\xe8U\x9c\x1e\x00\x00\x00'
88
)
99

1010

1111
def test_gzip() -> None:
12-
assert from_gzip(to_gzip({"name": "Alice"})) == {"name": "Alice"}
12+
assert from_gzip(to_gzip({'name': 'Alice'})) == {'name': 'Alice'}
1313

1414

1515
def test_to_gzip() -> None:
16-
data_bytes = to_gzip({"name": "Alice"}, mtime=0)
16+
data_bytes = to_gzip({'name': 'Alice'}, mtime=0)
1717

1818
assert data_bytes == FIXTURE_BYTES
1919

2020

2121
def test_from_gzip() -> None:
2222
data_dict = from_gzip(FIXTURE_BYTES)
2323

24-
assert data_dict == {"name": "Alice"}
24+
assert data_dict == {'name': 'Alice'}
2525

2626

2727
def test_read_gzip_time() -> None:
@@ -30,6 +30,6 @@ def test_read_gzip_time() -> None:
3030

3131
def test_read_gzip_time_non_zero() -> None:
3232
current_time = int(time())
33-
data_bytes = to_gzip({"name": "Alice"}, mtime=current_time)
33+
data_bytes = to_gzip({'name': 'Alice'}, mtime=current_time)
3434

3535
assert read_gzip_time(data_bytes) == current_time

0 commit comments

Comments
 (0)