Skip to content

Commit 666378f

Browse files
committed
Merge remote-tracking branch 'origin/sc1467-1' into sc1467-1
2 parents 001f8d8 + 2c657f9 commit 666378f

23 files changed

+1574
-1500
lines changed

scrapinghub/client.py

Lines changed: 0 additions & 1478 deletions
This file was deleted.

scrapinghub/client/__init__.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
from scrapinghub import Connection as _Connection
2+
from scrapinghub import HubstorageClient as _HubstorageClient
3+
4+
from .projects import Projects
5+
from .exceptions import wrap_http_errors
6+
7+
from .utils import parse_auth
8+
from .utils import parse_project_id, parse_job_key
9+
10+
11+
__all__ = ['ScrapinghubClient']
12+
13+
14+
class Connection(_Connection):
15+
16+
@wrap_http_errors
17+
def _request(self, *args, **kwargs):
18+
return super(Connection, self)._request(*args, **kwargs)
19+
20+
21+
class HubstorageClient(_HubstorageClient):
22+
23+
@wrap_http_errors
24+
def request(self, *args, **kwargs):
25+
return super(HubstorageClient, self).request(*args, **kwargs)
26+
27+
28+
class ScrapinghubClient(object):
29+
"""Main class to work with Scrapinghub API.
30+
31+
:param auth: Scrapinghub APIKEY or other SH auth credentials.
32+
:param dash_endpoint: (optional) Scrapinghub Dash panel url.
33+
:param \*\*kwargs: (optional) Additional arguments for
34+
:class:`scrapinghub.hubstorage.HubstorageClient` constructor.
35+
36+
:ivar projects: projects collection, :class:`Projects` instance.
37+
38+
Usage::
39+
40+
>>> from scrapinghub import ScrapinghubClient
41+
>>> client = ScrapinghubClient('APIKEY')
42+
>>> client
43+
<scrapinghub.client.ScrapinghubClient at 0x1047af2e8>
44+
"""
45+
46+
def __init__(self, auth=None, dash_endpoint=None, **kwargs):
47+
self.projects = Projects(self)
48+
login, password = parse_auth(auth)
49+
self._connection = Connection(apikey=login,
50+
password=password,
51+
url=dash_endpoint)
52+
self._hsclient = HubstorageClient(auth=(login, password), **kwargs)
53+
54+
def get_project(self, projectid):
55+
"""Get :class:`Project` instance with a given project id.
56+
57+
The method is a shortcut for client.projects.get().
58+
59+
:param projectid: integer or string numeric project id.
60+
:return: :class:`Project` object.
61+
:rtype: scrapinghub.client.Project.
62+
63+
Usage::
64+
65+
>>> project = client.get_project(123)
66+
>>> project
67+
<scrapinghub.client.Project at 0x106cdd6a0>
68+
"""
69+
return self.projects.get(parse_project_id(projectid))
70+
71+
def get_job(self, jobkey):
72+
"""Get Job with a given jobkey.
73+
74+
:param jobkey: job key string in format 'project/spider/job',
75+
where all the components are integers.
76+
:return: :class:`Job` object.
77+
:rtype: scrapinghub.client.Job.
78+
79+
Usage::
80+
81+
>>> job = client.get_job('123/1/1')
82+
>>> job
83+
<scrapinghub.client.Job at 0x10afe2eb1>
84+
"""
85+
projectid = parse_job_key(jobkey).projectid
86+
return self.projects.get(projectid).jobs.get(jobkey)
87+
88+
def close(self, timeout=None):
89+
"""Close client instance.
90+
91+
:param timeout: (optional) float timeout secs to stop everything
92+
gracefully.
93+
"""
94+
self._hsclient.close(timeout=timeout)

scrapinghub/client/activity.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
from __future__ import absolute_import
2+
3+
from .utils import _Proxy
4+
from .utils import parse_job_key
5+
6+
7+
class Activity(_Proxy):
8+
"""Representation of collection of job activity events.
9+
10+
Not a public constructor: use :class:`Project` instance to get a
11+
:class:`Activity` instance. See :attr:`Project.activity` attribute.
12+
13+
Please note that list() method can use a lot of memory and for a large
14+
amount of activities it's recommended to iterate through it via iter()
15+
method (all params and available filters are same for both methods).
16+
17+
Usage:
18+
19+
- get all activity from a project::
20+
21+
>>> project.activity.iter()
22+
<generator object jldecode at 0x1049ee990>
23+
24+
- get only last 2 events from a project::
25+
26+
>>> project.activity.list(count=2)
27+
[{'event': 'job:completed', 'job': '123/2/3', 'user': 'jobrunner'},
28+
{'event': 'job:started', 'job': '123/2/3', 'user': 'john'}]
29+
30+
- post a new event::
31+
32+
>>> event = {'event': 'job:completed',
33+
'job': '123/2/4',
34+
'user': 'jobrunner'}
35+
>>> project.activity.add(event)
36+
37+
- post multiple events at once::
38+
39+
>>> events = [
40+
{'event': 'job:completed', 'job': '123/2/5', 'user': 'jobrunner'},
41+
{'event': 'job:cancelled', 'job': '123/2/6', 'user': 'john'},
42+
]
43+
>>> project.activity.add(events)
44+
45+
"""
46+
def __init__(self, *args, **kwargs):
47+
super(Activity, self).__init__(*args, **kwargs)
48+
self._proxy_methods([('iter', 'list')])
49+
self._wrap_iter_methods(['iter'])
50+
51+
def add(self, values, **kwargs):
52+
if not isinstance(values, list):
53+
values = list(values)
54+
for activity in values:
55+
if not isinstance(activity, dict):
56+
raise ValueError("Please pass events as dictionaries")
57+
jobkey = activity.get('job')
58+
if jobkey and parse_job_key(jobkey).projectid != self.key:
59+
raise ValueError('Please use same project id')
60+
self._origin.post(values, **kwargs)

scrapinghub/client/collections.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
from __future__ import absolute_import
2+
import collections
3+
4+
from six import string_types
5+
6+
from ..hubstorage.collectionsrt import Collection as _Collection
7+
8+
from .utils import _Proxy
9+
from .utils import format_iter_filters
10+
from .utils import proxy_methods
11+
from .utils import wrap_kwargs
12+
13+
14+
class Collections(_Proxy):
15+
"""Access to project collections.
16+
17+
Not a public constructor: use :class:`Project` instance to get a
18+
:class:`Collections` instance. See :attr:`Project.collections` attribute.
19+
20+
Usage::
21+
22+
>>> collections = project.collections
23+
>>> collections.list()
24+
[{'name': 'Pages', 'type': 's'}]
25+
>>> foo_store = collections.get_store('foo_store')
26+
"""
27+
28+
def get(self, coltype, colname):
29+
"""Base method to get a collection with a given type and name."""
30+
self._origin._validate_collection(coltype, colname)
31+
return Collection(self._client, self, coltype, colname)
32+
33+
def get_store(self, colname):
34+
return self.get('s', colname)
35+
36+
def get_cached_store(self, colname):
37+
return self.get('cs', colname)
38+
39+
def get_versioned_store(self, colname):
40+
return self.get('vs', colname)
41+
42+
def get_versioned_cached_store(self, colname):
43+
return self.get('vcs', colname)
44+
45+
def iter(self):
46+
"""Iterate through collections of a project."""
47+
return self._origin.apiget('list')
48+
49+
def list(self):
50+
"""List collections of a project."""
51+
return list(self.iter())
52+
53+
54+
class Collection(object):
55+
"""Representation of a project collection object.
56+
57+
Not a public constructor: use :class:`Collections` instance to get a
58+
:class:`Collection` instance. See :meth:`Collections.get_store` and
59+
similar methods. # noqa
60+
61+
Usage:
62+
63+
- add a new item to collection::
64+
65+
>>> foo_store.set({'_key': '002d050ee3ff6192dcbecc4e4b4457d7',
66+
'value': '1447221694537'})
67+
68+
- count items in collection::
69+
70+
>>> foo_store.count()
71+
1
72+
73+
- get an item from collection::
74+
75+
>>> foo_store.get('002d050ee3ff6192dcbecc4e4b4457d7')
76+
{'value': '1447221694537'}
77+
78+
- get all items from collection::
79+
80+
>>> foo_store.iter()
81+
<generator object jldecode at 0x1049eef10>
82+
83+
- iterate iterate over _key & value pair::
84+
85+
>>> for elem in foo_store.iter(count=1)):
86+
>>> ... print(elem)
87+
[{'_key': '002d050ee3ff6192dcbecc4e4b4457d7',
88+
'value': '1447221694537'}]
89+
90+
- filter by multiple keys, only values for keys that exist will be returned::
91+
92+
>>> foo_store.list(key=['002d050ee3ff6192dcbecc4e4b4457d7', 'blah'])
93+
[{'_key': '002d050ee3ff6192dcbecc4e4b4457d7', 'value': '1447221694537'}]
94+
95+
- delete an item by key::
96+
97+
>>> foo_store.delete('002d050ee3ff6192dcbecc4e4b4457d7')
98+
"""
99+
100+
def __init__(self, client, collections, coltype, colname):
101+
self._client = client
102+
self._origin = _Collection(coltype, colname, collections._origin)
103+
proxy_methods(self._origin, self, [
104+
'create_writer', 'count',
105+
('iter', 'iter_values'),
106+
('iter_raw_json', 'iter_json'),
107+
])
108+
# simplified version of _Proxy._wrap_iter_methods logic
109+
# to provide better support for filter param in iter methods
110+
for method in ['iter', 'iter_raw_json']:
111+
wrapped = wrap_kwargs(getattr(self, method), format_iter_filters)
112+
setattr(self, method, wrapped)
113+
114+
def list(self, *args, **kwargs):
115+
"""Convenient shortcut to list iter results.
116+
117+
Please note that list() method can use a lot of memory and for a large
118+
amount of elements it's recommended to iterate through it via iter()
119+
method (all params and available filters are same for both methods).
120+
"""
121+
return list(self.iter(*args, **kwargs))
122+
123+
def get(self, key, *args, **kwargs):
124+
"""Get item from collection by key.
125+
126+
:param key: string item key
127+
:return: an item dictionary if exists
128+
"""
129+
if key is None:
130+
raise ValueError("key cannot be None")
131+
return self._origin.get(key, *args, **kwargs)
132+
133+
def set(self, *args, **kwargs):
134+
"""Set item to collection by key.
135+
136+
The method returns None (original method returns an empty generator).
137+
"""
138+
self._origin.set(*args, **kwargs)
139+
140+
def delete(self, keys):
141+
"""Delete item(s) from collection by key(s).
142+
143+
The method returns None (original method returns an empty generator).
144+
"""
145+
if (not isinstance(keys, string_types) and
146+
not isinstance(keys, collections.Iterable)):
147+
raise ValueError("You should provide string key or iterable "
148+
"object providing string keys")
149+
self._origin.delete(keys)
150+
151+
def iter_raw_msgpack(self, requests_params=None, **apiparams):
152+
return self._origin._collections.iter_msgpack(
153+
self._origin.coltype, self._origin.colname,
154+
requests_params=requests_params, **apiparams)
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import
23
from functools import wraps
34

45
from requests import HTTPError
56

6-
from .legacy import APIError
7-
from .hubstorage import ValueTooLarge as _ValueTooLarge
7+
from ..legacy import APIError
8+
from ..hubstorage import ValueTooLarge as _ValueTooLarge
89

910

1011
def _get_http_error_msg(exc):

0 commit comments

Comments
 (0)