Skip to content

Commit cc0453d

Browse files
committed
Convert proxy logic to subclasses system
1 parent 8764f74 commit cc0453d

File tree

8 files changed

+139
-123
lines changed

8 files changed

+139
-123
lines changed

scrapinghub/client/activity.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import absolute_import
22

3-
from .proxy import _Proxy
3+
from .proxy import _Proxy, format_iter_filters
44
from .utils import parse_job_key
55

66

@@ -46,8 +46,10 @@ class Activity(_Proxy):
4646
"""
4747
def __init__(self, *args, **kwargs):
4848
super(Activity, self).__init__(*args, **kwargs)
49-
self._proxy_methods([('iter', 'list')])
50-
self._wrap_iter_methods(['iter'])
49+
50+
def iter(self, **params):
51+
params = format_iter_filters(params)
52+
return self._origin.list(**params)
5153

5254
def add(self, values, **kwargs):
5355
"""Add new event to the project activity.

scrapinghub/client/collections.py

Lines changed: 52 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55

66
from ..hubstorage.collectionsrt import Collection as _Collection
77

8-
from .proxy import _Proxy, proxy_methods, wrap_kwargs, format_iter_filters
8+
from .proxy import _Proxy, _DownloadableProxyMixin, format_iter_filters
99
from .utils import update_kwargs
1010

1111

12-
class Collections(_Proxy):
12+
class Collections(_Proxy, _DownloadableProxyMixin):
1313
"""Access to project collections.
1414
1515
Not a public constructor: use :class:`~scrapinghub.client.projects.Project`
@@ -144,42 +144,6 @@ class Collection(object):
144144
def __init__(self, client, collections, type_, name):
145145
self._client = client
146146
self._origin = _Collection(type_, name, collections._origin)
147-
proxy_methods(self._origin, self, [
148-
'create_writer', 'count',
149-
('iter', 'iter_values'),
150-
('iter_raw_json', 'iter_json'),
151-
])
152-
# simplified version of _Proxy._wrap_iter_methods logic
153-
# to provide better support for filter param in iter methods
154-
for method in ['iter', 'iter_raw_json']:
155-
wrapped = wrap_kwargs(getattr(self, method), format_iter_filters)
156-
setattr(self, method, wrapped)
157-
158-
def list(self, key=None, prefix=None, prefixcount=None, startts=None,
159-
endts=None, requests_params=None, **params):
160-
"""Convenient shortcut to list iter results.
161-
162-
Please note that :meth:`list` method can use a lot of memory and for a
163-
large amount of logs it's recommended to iterate through it
164-
via :meth:`iter` method (all params and available filters are same for
165-
both methods).
166-
167-
:param key: a string key or a list of keys to filter with.
168-
:param prefix: a string prefix to filter items.
169-
:param prefixcount: maximum number of values to return per prefix.
170-
:param startts: UNIX timestamp at which to begin results.
171-
:param endts: UNIX timestamp at which to end results.
172-
:param requests_params: (optional) a dict with optional requests params.
173-
:param \*\*params: (optional) additional query params for the request.
174-
:return: a list of items where each item is represented with a dict.
175-
:rtype: :class:`list[dict]`
176-
"""
177-
# FIXME there should be similar docstrings for iter/iter_raw_json
178-
# but as we proxy them as-is, it's not in place, should be improved
179-
update_kwargs(params, key=key, prefix=prefix, prefixcount=prefixcount,
180-
startts=startts, endts=endts,
181-
requests_params=requests_params)
182-
return list(self.iter(requests_params=None, **params))
183147

184148
def get(self, key, **params):
185149
"""Get item from collection by key.
@@ -215,6 +179,28 @@ def delete(self, keys):
215179
"object providing string keys")
216180
self._origin.delete(keys)
217181

182+
def count(self, *args, **kwargs):
183+
return self._origin._collections.count(
184+
self._origin.coltype, self._origin.colname, *args, **kwargs)
185+
186+
def iter(self, key=None, prefix=None, prefixcount=None, startts=None,
187+
endts=None, requests_params=None, **params):
188+
update_kwargs(params, key=key, prefix=prefix, prefixcount=prefixcount,
189+
startts=startts, endts=endts,
190+
requests_params=requests_params)
191+
params = format_iter_filters(params)
192+
return self._origin._collections.iter_values(
193+
self._origin.coltype, self._origin.colname, **params)
194+
195+
def iter_raw_json(self, key=None, prefix=None, prefixcount=None,
196+
startts=None, endts=None, requests_params=None, **params):
197+
update_kwargs(params, key=key, prefix=prefix, prefixcount=prefixcount,
198+
startts=startts, endts=endts,
199+
requests_params=requests_params)
200+
params = format_iter_filters(params)
201+
return self._origin._collections.iter_json(
202+
self._origin.coltype, self._origin.colname, **params)
203+
218204
def iter_raw_msgpack(self, key=None, prefix=None, prefixcount=None,
219205
startts=None, endts=None, requests_params=None,
220206
**params):
@@ -234,5 +220,33 @@ def iter_raw_msgpack(self, key=None, prefix=None, prefixcount=None,
234220
update_kwargs(params, key=key, prefix=prefix, prefixcount=prefixcount,
235221
startts=startts, endts=endts,
236222
requests_params=requests_params)
223+
params = format_iter_filters(params)
237224
return self._origin._collections.iter_msgpack(
238225
self._origin.coltype, self._origin.colname, **params)
226+
227+
def list(self, key=None, prefix=None, prefixcount=None, startts=None,
228+
endts=None, requests_params=None, **params):
229+
"""Convenient shortcut to list iter results.
230+
231+
Please note that :meth:`list` method can use a lot of memory and for a
232+
large amount of logs it's recommended to iterate through it
233+
via :meth:`iter` method (all params and available filters are same for
234+
both methods).
235+
236+
:param key: a string key or a list of keys to filter with.
237+
:param prefix: a string prefix to filter items.
238+
:param prefixcount: maximum number of values to return per prefix.
239+
:param startts: UNIX timestamp at which to begin results.
240+
:param endts: UNIX timestamp at which to end results.
241+
:param requests_params: (optional) a dict with optional requests params.
242+
:param \*\*params: (optional) additional query params for the request.
243+
:return: a list of items where each item is represented with a dict.
244+
:rtype: :class:`list[dict]`
245+
"""
246+
update_kwargs(params, key=key, prefix=prefix, prefixcount=prefixcount,
247+
startts=startts, endts=endts)
248+
return list(self.iter(requests_params=requests_params, **params))
249+
250+
def create_writer(self, **kwargs):
251+
return self._origin._collections.create_writer(
252+
self._origin.coltype, self._origin.colname, **kwargs)

scrapinghub/client/frontiers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ class Frontiers(_Proxy):
8585
"""
8686
def __init__(self, *args, **kwargs):
8787
super(Frontiers, self).__init__(*args, **kwargs)
88-
self._proxy_methods(['close', 'flush'])
8988

9089
def get(self, name):
9190
"""Get a frontier by name.
@@ -121,6 +120,12 @@ def newcount(self):
121120
"""
122121
return sum(self._origin.newcount.values())
123122

123+
def flush(self):
124+
self._origin.flush()
125+
126+
def close(self):
127+
self._origin.close()
128+
124129

125130
class Frontier(object):
126131
"""Representation of a frontier object.

scrapinghub/client/items.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from __future__ import absolute_import
22

3-
from .proxy import _Proxy
3+
from .proxy import _ItemsResourceProxy, _DownloadableProxyMixin
44

55

6-
class Items(_Proxy):
6+
class Items(_ItemsResourceProxy, _DownloadableProxyMixin):
77
"""Representation of collection of job items.
88
99
Not a public constructor: use :class:`~scrapinghub.client.jobs.Job` instanc

scrapinghub/client/logs.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
from __future__ import absolute_import
2+
23
import json
4+
import logging
35

4-
from .proxy import _Proxy
6+
from .proxy import _ItemsResourceProxy, _DownloadableProxyMixin
57
from .utils import LogLevel
68

79

8-
class Logs(_Proxy):
10+
class Logs(_ItemsResourceProxy, _DownloadableProxyMixin):
911
"""Representation of collection of job logs.
1012
1113
Not a public constructor: use :class:`~scrapinghub.client.jobs.Job` instance
@@ -47,11 +49,24 @@ class Logs(_Proxy):
4749
'time': 1486375511188,
4850
}]
4951
"""
52+
def log(self, message, level=logging.INFO, ts=None, **other):
53+
self._origin.log(message, level=level, ts=ts, **other)
54+
55+
def debug(self, message, **other):
56+
self._origin.debug(message, **other)
57+
58+
def info(self, message, **other):
59+
self._origin.info(message, **other)
60+
61+
def warn(self, message, **other):
62+
self._origin.warn(message, **other)
63+
warning = warn
64+
65+
def error(self, message, **other):
66+
self._origin.error(message, **other)
5067

51-
def __init__(self, *args, **kwargs):
52-
super(Logs, self).__init__(*args, **kwargs)
53-
self._proxy_methods(['log', 'debug', 'info', 'warning', 'warn',
54-
'error', 'batch_write_start'])
68+
def batch_write_start(self):
69+
return self._origin.batch_write_start()
5570

5671
def _modify_iter_params(self, params):
5772
"""Modify iter() filters on-the-fly.

scrapinghub/client/proxy.py

Lines changed: 46 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,6 @@
33
import six
44
import json
55

6-
from ..hubstorage.resourcetype import DownloadableResource
7-
from ..hubstorage.resourcetype import ItemsResourceType
8-
from ..hubstorage.collectionsrt import Collections
9-
106
from .exceptions import wrap_value_too_large
117

128

@@ -30,35 +26,18 @@ def __init__(self, cls, client, key):
3026
self._client = client
3127
self._origin = cls(client._hsclient, key)
3228

33-
if issubclass(cls, ItemsResourceType):
34-
self._proxy_methods(['get', 'write', 'flush', 'close',
35-
'stats', ('iter', 'list')])
36-
# redefine write method to wrap hubstorage.ValueTooLarge error
37-
origin_method = getattr(self, 'write')
38-
setattr(self, 'write', wrap_value_too_large(origin_method))
39-
40-
# DType iter_values() has more priority than IType list()
41-
# plus Collections interface doesn't need the iter methods
42-
if issubclass(cls, DownloadableResource) and cls is not Collections:
43-
methods = [('iter', 'iter_values'),
44-
('iter_raw_msgpack', 'iter_msgpack'),
45-
('iter_raw_json', 'iter_json')]
46-
self._proxy_methods(methods)
47-
self._wrap_iter_methods([method[0] for method in methods])
48-
49-
def _proxy_methods(self, methods):
50-
"""A little helper for cleaner interface."""
51-
proxy_methods(self._origin, self, methods)
52-
53-
def _wrap_iter_methods(self, methods):
54-
"""Modify kwargs for all passed self.iter* methods."""
55-
for method in methods:
56-
wrapped = wrap_kwargs(getattr(self, method),
57-
self._modify_iter_params)
58-
setattr(self, method, wrapped)
29+
def list(self, *args, **kwargs):
30+
"""Convenient shortcut to list iter results.
31+
32+
Please note that :meth:`list` method can use a lot of memory and for a
33+
large amount of elements it's recommended to iterate through it via
34+
:meth:`iter` method (all params and available filters are same for both
35+
methods).
36+
"""
37+
return list(self.iter(*args, **kwargs))
5938

6039
def _modify_iter_params(self, params):
61-
"""A helper to modify iter() params on-the-fly.
40+
"""A helper to modify iter*() params on-the-fly.
6241
6342
The method is internal and should be redefined in subclasses.
6443
@@ -68,15 +47,43 @@ def _modify_iter_params(self, params):
6847
"""
6948
return format_iter_filters(params)
7049

71-
def list(self, *args, **kwargs):
72-
"""Convenient shortcut to list iter results.
7350

74-
Please note that :meth:`list` method can use a lot of memory and for a
75-
large amount of elements it's recommended to iterate through it via
76-
:meth:`iter` method (all params and available filters are same for both
77-
methods).
78-
"""
79-
return list(self.iter(*args, **kwargs))
51+
class _ItemsResourceProxy(_Proxy):
52+
53+
def get(self, _key, **params):
54+
return self._origin.get(_key, **params)
55+
56+
@wrap_value_too_large
57+
def write(self, item):
58+
return self._origin.write(item)
59+
60+
def iter(self, _key=None, **params):
61+
params = self._modify_iter_params(params)
62+
return self._origin.list(_key, **params)
63+
64+
def flush(self):
65+
self._origin.flush()
66+
67+
def stats(self):
68+
return self._origin.stats()
69+
70+
def close(self, block=True):
71+
self._origin.close(block)
72+
73+
74+
class _DownloadableProxyMixin(object):
75+
76+
def iter(self, _path=None, requests_params=None, **apiparams):
77+
apiparams = self._modify_iter_params(apiparams)
78+
return self._origin.iter_values(_path, requests_params, **apiparams)
79+
80+
def iter_raw_json(self, _path=None, requests_params=None, **apiparams):
81+
apiparams = self._modify_iter_params(apiparams)
82+
return self._origin.iter_json(_path, requests_params, **apiparams)
83+
84+
def iter_raw_msgpack(self, _path=None, requests_params=None, **apiparams):
85+
apiparams = self._modify_iter_params(apiparams)
86+
return self._origin.iter_msgpack(_path, requests_params, **apiparams)
8087

8188

8289
class _MappingProxy(_Proxy):
@@ -130,25 +137,6 @@ def iter(self):
130137
return six.iteritems(next(self._origin.apiget()))
131138

132139

133-
def proxy_methods(origin, successor, methods):
134-
"""A helper to proxy methods from origin to successor.
135-
136-
Accepts a list with strings and tuples:
137-
138-
- each string defines:
139-
a successor method name to proxy 1:1 with origin method
140-
- each tuple should consist of 2 strings:
141-
a successor method name and an origin method name
142-
"""
143-
for method in methods:
144-
if isinstance(method, tuple):
145-
successor_name, origin_name = method
146-
else:
147-
successor_name, origin_name = method, method
148-
if not hasattr(successor, successor_name):
149-
setattr(successor, successor_name, getattr(origin, origin_name))
150-
151-
152140
def format_iter_filters(params):
153141
"""Format iter() filter param on-the-fly.
154142
@@ -168,11 +156,3 @@ def format_iter_filters(params):
168156
if filter_data:
169157
params['filter'] = filter_data
170158
return params
171-
172-
173-
def wrap_kwargs(fn, kwargs_fn):
174-
"""Tiny wrapper to prepare modified version of function kwargs"""
175-
def wrapped(*args, **kwargs):
176-
kwargs = kwargs_fn(kwargs)
177-
return fn(*args, **kwargs)
178-
return wrapped

scrapinghub/client/requests.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from __future__ import absolute_import
22

3-
from .proxy import _Proxy
3+
from .proxy import _ItemsResourceProxy, _DownloadableProxyMixin
44

55

6-
class Requests(_Proxy):
6+
class Requests(_ItemsResourceProxy, _DownloadableProxyMixin):
77
"""Representation of collection of job requests.
88
99
Not a public constructor: use :class:`~scrapinghub.client.jobs.Job` instance
@@ -41,6 +41,6 @@ class Requests(_Proxy):
4141
'url': 'https://example.com'
4242
}]
4343
"""
44-
def __init__(self, *args, **kwargs):
45-
super(Requests, self).__init__(*args, **kwargs)
46-
self._proxy_methods(['add'])
44+
def add(self, url, status, method, rs, parent, duration, ts, fp=None):
45+
return self._origin.add(
46+
url, status, method, rs, parent, duration, ts, fp=None)

scrapinghub/client/samples.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from __future__ import absolute_import
22

3-
from .proxy import _Proxy
3+
from .proxy import _ItemsResourceProxy
44

55

6-
class Samples(_Proxy):
6+
class Samples(_ItemsResourceProxy):
77
"""Representation of collection of job samples.
88
99
Not a public constructor: use :class:`~scrapinghub.client.jobs.Job` instance

0 commit comments

Comments
 (0)