Skip to content

Commit 22af7ac

Browse files
committed
fix: truncate too long kvs names
1 parent 4ff56d7 commit 22af7ac

File tree

2 files changed

+26
-3
lines changed

2 files changed

+26
-3
lines changed

src/apify/scrapy/extensions/_httpcache.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,33 @@ def read_gzip_time(gzip_bytes: bytes) -> int:
180180
return mtime
181181

182182

183-
def get_kvs_name(spider_name: str) -> str:
184-
"""Get the key value store name for a spider."""
183+
def get_kvs_name(spider_name: str, max_length: int = 60) -> str:
184+
"""Get the key value store name for a spider.
185+
186+
The key value store name is derived from the spider name by replacing all special characters
187+
with hyphens and trimming leading and trailing hyphens. The resulting name is prefixed with
188+
'httpcache-' and truncated to the maximum length.
189+
190+
The documentation
191+
[about storages](https://docs.apify.com/platform/storage/usage#named-and-unnamed-storages)
192+
mentions that names can be up to 63 characters long, so the default max length is set to 60.
193+
194+
Such naming isn't unique per spider, but should be sufficiently unique for most use cases.
195+
The name of the key value store should indicate to which spider it belongs, e.g. in
196+
the listing in the Apify's console.
197+
198+
Args:
199+
spider_name: Value of the Spider instance's name attribute.
200+
max_length: Maximum length of the key value store name.
201+
202+
Returns: Key value store name.
203+
204+
Raises:
205+
ValueError: If the spider name contains only special characters.
206+
"""
185207
slug = re.sub(r'[^a-zA-Z0-9-]', '-', spider_name)
186208
slug = re.sub(r'-+', '-', slug)
187209
slug = slug.strip('-')
188210
if not slug:
189211
raise ValueError(f'Unsupported spider name: {spider_name!r} (slug: {slug!r})')
190-
return f'httpcache-{slug}'
212+
return f'httpcache-{slug}'[:max_length]

tests/unit/scrapy/extensions/test_httpcache.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def test_read_gzip_time_non_zero() -> None:
5151
('test@spider', 'httpcache-test-spider'),
5252
(' test spider ', 'httpcache-test-spider'),
5353
('testspider.com', 'httpcache-testspider-com'),
54+
('t' * 100, 'httpcache-tttttttttttttttttttttttttttttttttttttttttttttttttt'),
5455
],
5556
)
5657
def test_get_kvs_name(spider_name: str, expected: str) -> None:

0 commit comments

Comments
 (0)