11# -*- coding: utf-8 -*-
2+ import demjson
23from scrapy .utils .misc import load_object
34from scrapy .utils .serialize import ScrapyJSONEncoder
45from twisted .internet .defer import Deferred
56from twisted .python .failure import Failure
6- from twisted .web import server , resource
7- from twisted .web .error import UnsupportedMethod , Error
8- import demjson
7+ from twisted .web import resource , server
8+ from twisted .web .error import Error , UnsupportedMethod
99
1010from . import log
1111from .conf import settings
12+ from .utils import extract_scrapy_request_args
1213
1314
1415class ServiceResource (resource .Resource , object ):
@@ -110,24 +111,14 @@ def render_GET(self, request, **kwargs):
110111 At the moment kwargs for scrapy request are not supported in GET.
111112 They are supported in POST handler.
112113 """
113- request_data = dict (
114+ api_params = dict (
114115 (name .decode ('utf-8' ), value [0 ].decode ('utf-8' ))
115116 for name , value in request .args .items ()
116117 )
117-
118- spider_data = {
119- 'url' : self .get_required_argument (request_data , 'url' ),
120- # TODO get optional Request arguments here
121- # distinguish between proper Request args and
122- # api parameters
123- }
124- try :
125- callback = request_data ['callback' ]
126- except KeyError :
127- pass
128- else :
129- spider_data ['callback' ] = callback
130- return self .prepare_crawl (request_data , spider_data , ** kwargs )
118+ scrapy_request_args = extract_scrapy_request_args (api_params ,
119+ raise_error = False )
120+ self .validate_options (scrapy_request_args , api_params )
121+ return self .prepare_crawl (api_params , scrapy_request_args , ** kwargs )
131122
132123 def render_POST (self , request , ** kwargs ):
133124 """
@@ -147,66 +138,85 @@ def render_POST(self, request, **kwargs):
147138 """
148139 request_body = request .content .getvalue ()
149140 try :
150- request_data = demjson .decode (request_body )
141+ api_params = demjson .decode (request_body )
151142 except ValueError as e :
152143 message = "Invalid JSON in POST body. {}"
153144 message .format (e .pretty_description ())
154145 raise Error ('400' , message = message )
155146
156- log .msg ("{}" .format (request_data ))
157- spider_data = self .get_required_argument (request_data , "request" )
158- error_msg = "Missing required key 'url' in 'request' object"
159- self .get_required_argument (spider_data , "url" , error_msg = error_msg )
147+ log .msg ("{}" .format (api_params ))
148+ if api_params .get ("start_requests" ):
149+ # start requests passed so 'request' argument is optional
150+ _request = api_params .get ("request" , {})
151+ else :
152+ # no start_requests, 'request' is required
153+ _request = self .get_required_argument (api_params , "request" )
154+ try :
155+ scrapy_request_args = extract_scrapy_request_args (
156+ _request , raise_error = True
157+ )
158+ except ValueError as e :
159+ raise Error (400 , e .message )
160+
161+ self .validate_options (scrapy_request_args , api_params )
162+ return self .prepare_crawl (api_params , scrapy_request_args , ** kwargs )
160163
161- return self .prepare_crawl (request_data , spider_data , ** kwargs )
164+ def validate_options (self , scrapy_request_args , api_params ):
165+ url = scrapy_request_args .get ("url" )
166+ start_requests = api_params .get ("start_requests" )
167+ if not url and not start_requests :
168+ raise Error (400 ,
169+ "'url' is required if start_requests are disabled" )
162170
163- def get_required_argument (self , request_data , name , error_msg = None ):
171+ def get_required_argument (self , api_params , name , error_msg = None ):
164172 """Get required API key from dict-like object.
165173
166- :param dict request_data :
174+ :param dict api_params :
167175 dictionary with names and values of parameters supplied to API.
168176 :param str name:
169- required key that must be found in request_data
177+ required key that must be found in api_params
170178 :return: value of required param
171179 :raises Error: Bad Request response
172180
173181 """
174182 if error_msg is None :
175183 error_msg = 'Missing required parameter: {}' .format (repr (name ))
176184 try :
177- value = request_data [name ]
185+ value = api_params [name ]
178186 except KeyError :
179187 raise Error ('400' , message = error_msg )
180188 if not value :
181189 raise Error ('400' , message = error_msg )
182190 return value
183191
184- def prepare_crawl (self , request_data , spider_data , * args , ** kwargs ):
192+ def prepare_crawl (self , api_params , scrapy_request_args , * args , ** kwargs ):
185193 """Schedule given spider with CrawlManager.
186194
187- :param dict request_data :
195+ :param dict api_params :
188196 arguments needed to find spider and set proper api parameters
189197 for crawl (max_requests for example)
190198
191- :param dict spider_data :
199+ :param dict scrapy_request_args :
192200 should contain positional and keyword arguments for Scrapy
193201 Request object that will be created
194202 """
195- spider_name = self .get_required_argument (request_data , 'spider_name' )
203+ spider_name = self .get_required_argument (api_params , 'spider_name' )
204+ start_requests = api_params .get ("start_requests" , False )
196205 try :
197- max_requests = request_data ['max_requests' ]
206+ max_requests = api_params ['max_requests' ]
198207 except (KeyError , IndexError ):
199208 max_requests = None
200209 dfd = self .run_crawl (
201- spider_name , spider_data , max_requests , * args , ** kwargs )
210+ spider_name , scrapy_request_args , max_requests ,
211+ start_requests = start_requests , * args , ** kwargs )
202212 dfd .addCallback (
203- self .prepare_response , request_data = request_data , * args , ** kwargs )
213+ self .prepare_response , request_data = api_params , * args , ** kwargs )
204214 return dfd
205215
206- def run_crawl (self , spider_name , spider_data ,
207- max_requests = None , * args , ** kwargs ):
216+ def run_crawl (self , spider_name , scrapy_request_args ,
217+ max_requests = None , start_requests = False , * args , ** kwargs ):
208218 crawl_manager_cls = load_object (settings .CRAWL_MANAGER )
209- manager = crawl_manager_cls (spider_name , spider_data , max_requests )
219+ manager = crawl_manager_cls (spider_name , scrapy_request_args , max_requests , start_requests = start_requests )
210220 dfd = manager .crawl (* args , ** kwargs )
211221 return dfd
212222
@@ -223,4 +233,3 @@ def prepare_response(self, result, *args, **kwargs):
223233 if errors :
224234 response ["errors" ] = errors
225235 return response
226-
0 commit comments