Skip to content

Commit f70e6a7

Browse files
committed
Report exceeded max encoded size in MB if applied
1 parent bdf6897 commit f70e6a7

File tree

5 files changed

+47
-6
lines changed

5 files changed

+47
-6
lines changed

scrapinghub/hubstorage/batchuploader.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import requests
1313
from collections import deque
1414
from threading import Thread, Event
15-
from .utils import xauth, iterqueue
15+
from .utils import xauth, iterqueue, sizeof_fmt
1616
from .serialization import jsonencode
1717

1818
logger = logging.getLogger('hubstorage.batchuploader')
@@ -227,8 +227,8 @@ def write(self, item):
227227
if len(data) > self.maxitemsize:
228228
truncated_data = data[:self.ERRMSG_DATA_TRUNCATION_LEN] + "..."
229229
raise ValueTooLarge(
230-
'Value exceeds max encoded size of {} bytes: {!r}'
231-
.format(self.maxitemsize, truncated_data))
230+
'Value exceeds max encoded size of {}: {!r}'
231+
.format(sizeof_fmt(self.maxitemsize), truncated_data))
232232

233233
self.itemsq.put(data)
234234
if self.itemsq.full():

scrapinghub/hubstorage/utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,21 @@ def apipoll(endpoint, *args, **kwargs):
131131
if result is not None or (time.time() - start) > max_poll:
132132
return result
133133

134+
135+
def sizeof_fmt(num):
136+
"""Little helper to get size in human readable form.
137+
138+
Size is rounded to a closest integer value (for simplicity).
139+
140+
>>> sizeof_fmt(100)
141+
'100 B'
142+
>>> sizeof_fmt(1024)
143+
'1 KiB'
144+
>>> sizeof_fmt(1024*1024 + 100)
145+
'1 MiB'
146+
"""
147+
for unit in ['B', 'KiB', 'MiB']:
148+
if abs(num) < 1024.0:
149+
return "%.0f %s" % (num, unit)
150+
num /= 1024.0
151+
return "%.0f %s" % (num, 'GiB')
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
eJydVQl320QQTtrSltKTq4W2YEyhJo1ky7dDOdI0BOc+nGQbEGEtbbyyZUmj1Sax+/Qe/Lv+LGbXoYQcrwX5+T3tzOzs7PfNN/rjQgpjOXJ9bGzMCxIWUyfxwkDAuA0XcilczJGr6IqZiNDK4FIK7+TIFTRxRl0WC7icwpXR/pkQEwSJ0RpEDK7a8C65i1YaRb7nUJU1f2h0RRj4XsAEXKPk2mhTwPSZ8J4N17Wtx1hkUN/bZ3CDkvtomXYcJoShTohD35j2/fDAWIm9jhfATRtukXEMmoDblFzGlw0W77MY7tjwvt69zeLekMlOvmA2zGpmdZBwLKZkVkwLPqDkDoYQQ23yHGZs4aVUNR/a8BG5iC6X7cPHlFzC1+c0YXDXhnvkIa5akk1mSlZmngaZYsGqZazqVNmasgqZuaUWfELJzWOgLLKgk3D41Ib75ALaSw14QKUuVyQ0kQIepvDZCNo+3pV2GHyuI1cWIKNPd0KXwRcLr6RetUN3ANkUvjzKEXtBBx6Rx7h4me2xQXYqky2OnryVtxq1RnYyk6Uy4egJpO+n8JWQ+ryYgWQiga9TeJzjSGgup3nYFCw2pjtYPnxjwwS5h7ZIg2cIJ6YRnshlO28hqgV4QsmtI6aixJgNsFpV0aQNBrmBjs7QiyYzLtvzFYjmKfbzNhROsm9RvXUaiw5jb6ibCIo2lEgRzc+o8JzMTv9tfy+qUB41yKhIqNhQ1RRP5CegdiZfdRsamoViHaao5FnyQAMsIg+7X2HMhZEgeMaRJYVvdUoZe/CUlJRQkiSayudFEsZIqrnfN4+BZzphP98N25D/m6tICg7f6TL7DO/twvea79WVjRb8IGUKP+Y4ynBaM/XsDOnN2PCc3D4hPSU8mD2F+k82zJ1E/ec3a65pwzz/t9oWbFh8o9qWzlPbsg0rr9W2ekxtazasv4XaNs5kr2XDps5aqldgC+lDjW3n+GtdEa7kxJG0F/wR+XNcM4tqZIrYWAYBkqRUIxzOXOkzd7etdUXdvhcox7k6i1ig2n838foqmVWuV+r1Qr1WqBXKyh172NCJ2lxU+RMaJ6ezH1VwOkm9YKFbBl4ilP0fXWfZYJ635xxvxZtvbg6b1rLXFM1gveLMNKvNXkS2ZuYbpgpyt9dUELSLzepStznQf69+uDicrSwPe97emukah9u8Tg66Tnl9vT3o+3vhTLXld6o7XRFYpF01llYr5R73GwUN0nmSQN+uwF7fPQjj3iiiJ/uhhikOu9iJCobRk8KOkBwH0S+6vX/NcZw8Nn9COQ6S37hJOc6JXW5RjmPg9/87BijlqP02r1GOAnf016MIrta3ao2XKTD+lNT+u3o1k8qyx4/JtiMl05N2/6jd+cK4NP8Ch5o5Rw==

tests/hubstorage/test_batchuploader.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,22 +41,31 @@ def test_writer_maxitemsize(hsclient, hsproject):
4141
with pytest.raises(ValueTooLarge) as excinfo1:
4242
writer.write({'b': 'x' * max_size})
4343
excinfo1.match(
44-
r'Value exceeds max encoded size of 1048576 bytes:'
44+
r'Value exceeds max encoded size of 1 MiB:'
4545
' \'{"b": "x+\\.\\.\\.\'')
4646

4747
with pytest.raises(ValueTooLarge) as excinfo2:
4848
writer.write({'b'*max_size: 'x'})
4949
excinfo2.match(
50-
r'Value exceeds max encoded size of 1048576 bytes:'
50+
r'Value exceeds max encoded size of 1 MiB:'
5151
' \'{"b+\\.\\.\\.\'')
5252

5353
with pytest.raises(ValueTooLarge) as excinfo3:
5454
writer.write({'b'*(max_size//2): 'x'*(max_size//2)})
5555
excinfo3.match(
56-
r'Value exceeds max encoded size of 1048576 bytes:'
56+
r'Value exceeds max encoded size of 1 MiB:'
5757
' \'{"b+\\.\\.\\.\'')
5858

5959

60+
def test_writer_maxitemsize_custom(hsclient, hsproject):
61+
_, writer = _job_and_writer(hsclient, hsproject, maxitemsize=512*1024)
62+
with pytest.raises(ValueTooLarge) as excinfo:
63+
writer.write({'b': 'x' * writer.maxitemsize})
64+
excinfo.match(
65+
r'Value exceeds max encoded size of 512 KiB:'
66+
' \'{"b": "x+\\.\\.\\.\'')
67+
68+
6069
def test_writer_contentencoding(hsclient, hsproject):
6170
for ce in ('identity', 'gzip'):
6271
job, writer = _job_and_writer(hsclient, hsproject,

tests/hubstorage/test_utils.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
"""
2+
Test utils module.
3+
"""
4+
5+
from scrapinghub.hubstorage.utils import sizeof_fmt
6+
7+
8+
def test_sizeof_fmt():
9+
assert sizeof_fmt(1000) == '1000 B'
10+
assert sizeof_fmt(1024) == '1 KiB'
11+
assert sizeof_fmt(1024 * 1024) == '1 MiB'
12+
assert sizeof_fmt(1024 * 1024 + 100) == '1 MiB'
13+
assert sizeof_fmt(1024 * 1024 * 1024) == '1 GiB'

0 commit comments

Comments
 (0)