Skip to content

Commit 7137456

Browse files
authored
Merge pull request #54 from scrapinghub/sc1467-1-ide
Improve client to use it via IDE
2 parents 59f4883 + 5656825 commit 7137456

File tree

15 files changed

+642
-312
lines changed

15 files changed

+642
-312
lines changed

README.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ Jobs instance is described well in ``Jobs`` section below.
8888

8989
For example, to schedule a spider run (it returns a job object)::
9090

91-
>>> project.jobs.schedule('spider1', spider_args={'arg1':'val1'})
91+
>>> project.jobs.schedule('spider1', job_args={'arg1':'val1'})
9292
<scrapinghub.client.Job at 0x106ee12e8>>
9393

9494
Project instance also has the following fields:
@@ -151,7 +151,7 @@ Like project instance, spider instance has ``jobs`` field to work with the spide
151151

152152
To schedule a spider run::
153153

154-
>>> spider.jobs.schedule(spider_args={'arg1:'val1'})
154+
>>> spider.jobs.schedule(job_args={'arg1:'val1'})
155155
<scrapinghub.client.Job at 0x106ee12e8>>
156156

157157
Note that you don't need to specify spider name explicitly.

scrapinghub/client/__init__.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,44 +51,43 @@ def __init__(self, auth=None, dash_endpoint=None, **kwargs):
5151
url=dash_endpoint)
5252
self._hsclient = HubstorageClient(auth=(login, password), **kwargs)
5353

54-
def get_project(self, projectid):
54+
def get_project(self, project_id):
5555
"""Get :class:`Project` instance with a given project id.
5656
5757
The method is a shortcut for client.projects.get().
5858
59-
:param projectid: integer or string numeric project id.
59+
:param project_id: integer or string numeric project id.
6060
:return: :class:`Project` object.
61-
:rtype: scrapinghub.client.Project.
61+
:rtype: scrapinghub.client.projects.Project
6262
6363
Usage::
6464
6565
>>> project = client.get_project(123)
6666
>>> project
67-
<scrapinghub.client.Project at 0x106cdd6a0>
67+
<scrapinghub.client.projects.Project at 0x106cdd6a0>
6868
"""
69-
return self.projects.get(parse_project_id(projectid))
69+
return self.projects.get(parse_project_id(project_id))
7070

71-
def get_job(self, jobkey):
72-
"""Get Job with a given jobkey.
71+
def get_job(self, job_key):
72+
"""Get Job with a given job key.
7373
74-
:param jobkey: job key string in format 'project/spider/job',
74+
:param job_key: job key string in format 'project_id/spider_id/job_id',
7575
where all the components are integers.
7676
:return: :class:`Job` object.
77-
:rtype: scrapinghub.client.Job.
77+
:rtype: scrapinghub.client.jobs.Job
7878
7979
Usage::
8080
8181
>>> job = client.get_job('123/1/1')
8282
>>> job
83-
<scrapinghub.client.Job at 0x10afe2eb1>
83+
<scrapinghub.client.jobs.Job at 0x10afe2eb1>
8484
"""
85-
projectid = parse_job_key(jobkey).projectid
86-
return self.projects.get(projectid).jobs.get(jobkey)
85+
project_id = parse_job_key(job_key).project_id
86+
return self.projects.get(project_id).jobs.get(job_key)
8787

8888
def close(self, timeout=None):
8989
"""Close client instance.
9090
91-
:param timeout: (optional) float timeout secs to stop everything
92-
gracefully.
91+
:param timeout: (optional) float timeout secs to stop gracefully.
9392
"""
9493
self._hsclient.close(timeout=timeout)

scrapinghub/client/activity.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,17 @@ def __init__(self, *args, **kwargs):
4949
self._wrap_iter_methods(['iter'])
5050

5151
def add(self, values, **kwargs):
52+
"""Add new event to the project activity.
53+
54+
:param values: a single event or a list of events, where event is
55+
represented with a dictionary of ('event', 'job', 'user') keys.
56+
"""
5257
if not isinstance(values, list):
5358
values = list(values)
5459
for activity in values:
5560
if not isinstance(activity, dict):
5661
raise ValueError("Please pass events as dictionaries")
57-
jobkey = activity.get('job')
58-
if jobkey and parse_job_key(jobkey).projectid != self.key:
62+
job_key = activity.get('job')
63+
if job_key and parse_job_key(job_key).project_id != self.key:
5964
raise ValueError('Please use same project id')
6065
self._origin.post(values, **kwargs)

scrapinghub/client/collections.py

Lines changed: 118 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@
55

66
from ..hubstorage.collectionsrt import Collection as _Collection
77

8-
from .utils import _Proxy
9-
from .utils import format_iter_filters
10-
from .utils import proxy_methods
11-
from .utils import wrap_kwargs
8+
from .utils import (
9+
_Proxy, format_iter_filters, proxy_methods, wrap_kwargs, update_kwargs,
10+
)
1211

1312

1413
class Collections(_Proxy):
@@ -25,29 +24,75 @@ class Collections(_Proxy):
2524
>>> foo_store = collections.get_store('foo_store')
2625
"""
2726

28-
def get(self, coltype, colname):
29-
"""Base method to get a collection with a given type and name."""
30-
self._origin._validate_collection(coltype, colname)
31-
return Collection(self._client, self, coltype, colname)
27+
def get(self, type_, name):
28+
"""Base method to get a collection with a given type and name.
3229
33-
def get_store(self, colname):
34-
return self.get('s', colname)
30+
:param type_: a collection type string.
31+
:param name: a collection name string.
32+
:return: :class:`Collection` object.
33+
:rtype: Collection
34+
"""
35+
self._origin._validate_collection(type_, name)
36+
return Collection(self._client, self, type_, name)
37+
38+
def get_store(self, name):
39+
"""Method to get a store collection by name.
40+
41+
:param name: a collection name string.
42+
:return: :class:`Collection` object.
43+
:rtype: Collection
44+
"""
45+
return self.get('s', name)
46+
47+
def get_cached_store(self, name):
48+
"""Method to get a cashed-store collection by name.
49+
50+
The collection type means that items expire after a month.
51+
52+
:param name: a collection name string.
53+
:return: :class:`Collection` object.
54+
:rtype: Collection
55+
"""
56+
return self.get('cs', name)
57+
58+
def get_versioned_store(self, name):
59+
"""Method to get a versioned-store collection by name.
60+
61+
The collection type retains up to 3 copies of each item.
62+
63+
:param name: a collection name string.
64+
:return: :class:`Collection` object.
65+
:rtype: Collection
66+
"""
67+
return self.get('vs', name)
3568

36-
def get_cached_store(self, colname):
37-
return self.get('cs', colname)
69+
def get_versioned_cached_store(self, name):
70+
"""Method to get a versioned-cached-store collection by name.
3871
39-
def get_versioned_store(self, colname):
40-
return self.get('vs', colname)
72+
Multiple copies are retained, and each one expires after a month.
4173
42-
def get_versioned_cached_store(self, colname):
43-
return self.get('vcs', colname)
74+
:param name: a collection name string.
75+
:return: :class:`Collection` object.
76+
:rtype: Collection
77+
"""
78+
return self.get('vcs', name)
4479

4580
def iter(self):
46-
"""Iterate through collections of a project."""
81+
"""Iterate through collections of a project.
82+
83+
:return: an iterator over collections list where each collection is
84+
represented by a dictionary with ('name','type') fields.
85+
:rtype: collections.Iterable[dict]
86+
"""
4787
return self._origin.apiget('list')
4888

4989
def list(self):
50-
"""List collections of a project."""
90+
"""List collections of a project.
91+
92+
:return: a list of collections where each collection is
93+
represented by a dictionary with ('name','type') fields.
94+
:rtype: list[dict]
95+
"""
5196
return list(self.iter())
5297

5398

@@ -56,7 +101,7 @@ class Collection(object):
56101
57102
Not a public constructor: use :class:`Collections` instance to get a
58103
:class:`Collection` instance. See :meth:`Collections.get_store` and
59-
similar methods. # noqa
104+
similar methods.
60105
61106
Usage:
62107
@@ -84,8 +129,7 @@ class Collection(object):
84129
85130
>>> for elem in foo_store.iter(count=1)):
86131
>>> ... print(elem)
87-
[{'_key': '002d050ee3ff6192dcbecc4e4b4457d7',
88-
'value': '1447221694537'}]
132+
[{'_key': '002d050ee3ff6192dcbecc4e4b4457d7', 'value': '1447221694537'}]
89133
90134
- filter by multiple keys, only values for keys that exist will be returned::
91135
@@ -97,9 +141,9 @@ class Collection(object):
97141
>>> foo_store.delete('002d050ee3ff6192dcbecc4e4b4457d7')
98142
"""
99143

100-
def __init__(self, client, collections, coltype, colname):
144+
def __init__(self, client, collections, type_, name):
101145
self._client = client
102-
self._origin = _Collection(coltype, colname, collections._origin)
146+
self._origin = _Collection(type_, name, collections._origin)
103147
proxy_methods(self._origin, self, [
104148
'create_writer', 'count',
105149
('iter', 'iter_values'),
@@ -111,35 +155,58 @@ def __init__(self, client, collections, coltype, colname):
111155
wrapped = wrap_kwargs(getattr(self, method), format_iter_filters)
112156
setattr(self, method, wrapped)
113157

114-
def list(self, *args, **kwargs):
158+
def list(self, key=None, prefix=None, prefixcount=None, startts=None,
159+
endts=None, requests_params=None, **params):
115160
"""Convenient shortcut to list iter results.
116161
117162
Please note that list() method can use a lot of memory and for a large
118163
amount of elements it's recommended to iterate through it via iter()
119164
method (all params and available filters are same for both methods).
165+
166+
:param key: a string key or a list of keys to filter with.
167+
:param prefix: a string prefix to filter items.
168+
:param prefixcount: maximum number of values to return per prefix.
169+
:param startts: UNIX timestamp at which to begin results.
170+
:param endts: UNIX timestamp at which to end results.
171+
:param requests_params: (optional) a dict with optional requests params.
172+
:param \*\*params: (optional) additional query params for the request.
173+
:return: a list of items where each item is represented with a dict.
174+
:rtype: list[dict]
175+
176+
# FIXME there should be similar docstrings for iter/iter_raw_json
177+
# but as we proxy them as-is, it's not in place, should be improved
120178
"""
121-
return list(self.iter(*args, **kwargs))
179+
update_kwargs(params, key=key, prefix=prefix, prefixcount=prefixcount,
180+
startts=startts, endts=endts,
181+
requests_params=requests_params)
182+
return list(self.iter(requests_params=None, **params))
122183

123-
def get(self, key, *args, **kwargs):
184+
def get(self, key, **params):
124185
"""Get item from collection by key.
125186
126-
:param key: string item key
127-
:return: an item dictionary if exists
187+
:param key: string item key.
188+
:param \*\*params: (optional) additional query params for the request.
189+
:return: an item dictionary if exists.
190+
:rtype: dict
128191
"""
129192
if key is None:
130193
raise ValueError("key cannot be None")
131-
return self._origin.get(key, *args, **kwargs)
194+
return self._origin.get(key, **params)
132195

133-
def set(self, *args, **kwargs):
196+
def set(self, value):
134197
"""Set item to collection by key.
135198
199+
:param value: a dict representing a collection item.
200+
136201
The method returns None (original method returns an empty generator).
137202
"""
138-
self._origin.set(*args, **kwargs)
203+
self._origin.set(value)
139204

140205
def delete(self, keys):
141206
"""Delete item(s) from collection by key(s).
142207
208+
:param keys: a single key or a list of keys.
209+
143210
The method returns None (original method returns an empty generator).
144211
"""
145212
if (not isinstance(keys, string_types) and
@@ -148,7 +215,24 @@ def delete(self, keys):
148215
"object providing string keys")
149216
self._origin.delete(keys)
150217

151-
def iter_raw_msgpack(self, requests_params=None, **apiparams):
218+
def iter_raw_msgpack(self, key=None, prefix=None, prefixcount=None,
219+
startts=None, endts=None, requests_params=None,
220+
**params):
221+
"""A method to iterate through raw msgpack-ed items.
222+
Can be convenient if data is needed in same msgpack format.
223+
224+
:param key: a string key or a list of keys to filter with.
225+
:param prefix: a string prefix to filter items.
226+
:param prefixcount: maximum number of values to return per prefix.
227+
:param startts: UNIX timestamp at which to begin results.
228+
:param endts: UNIX timestamp at which to end results.
229+
:param requests_params: (optional) a dict with optional requests params.
230+
:param \*\*params: (optional) additional query params for the request.
231+
:return: an iterator over items list packed with msgpack.
232+
:rtype: collections.Iterable[bytes]
233+
"""
234+
update_kwargs(params, key=key, prefix=prefix, prefixcount=prefixcount,
235+
startts=startts, endts=endts,
236+
requests_params=requests_params)
152237
return self._origin._collections.iter_msgpack(
153-
self._origin.coltype, self._origin.colname,
154-
requests_params=requests_params, **apiparams)
238+
self._origin.coltype, self._origin.colname, **params)

0 commit comments

Comments
 (0)