|
1 | 1 | # -*- coding: utf-8 -*- |
| 2 | +import json |
| 3 | +from urllib.parse import unquote |
| 4 | + |
2 | 5 | import demjson |
3 | 6 | from scrapy.utils.misc import load_object |
4 | 7 | from scrapy.utils.serialize import ScrapyJSONEncoder |
@@ -134,6 +137,7 @@ def render_GET(self, request, **kwargs): |
134 | 137 | scrapy_request_args = extract_scrapy_request_args(api_params, |
135 | 138 | raise_error=False) |
136 | 139 | self.validate_options(scrapy_request_args, api_params) |
| 140 | + |
137 | 141 | return self.prepare_crawl(api_params, scrapy_request_args, **kwargs) |
138 | 142 |
|
139 | 143 | def render_POST(self, request, **kwargs): |
@@ -171,7 +175,7 @@ def render_POST(self, request, **kwargs): |
171 | 175 | _request = self.get_required_argument(api_params, "request") |
172 | 176 | try: |
173 | 177 | scrapy_request_args = extract_scrapy_request_args( |
174 | | - _request, raise_error=False |
| 178 | + _request, raise_error=True |
175 | 179 | ) |
176 | 180 | except ValueError as e: |
177 | 181 | raise Error('400', str(e)) |
@@ -224,17 +228,33 @@ def prepare_crawl(self, api_params, scrapy_request_args, *args, **kwargs): |
224 | 228 | max_requests = api_params['max_requests'] |
225 | 229 | except (KeyError, IndexError): |
226 | 230 | max_requests = None |
| 231 | + |
| 232 | + crawl_args = api_params.get("crawl_args") |
| 233 | + if isinstance(crawl_args, str): |
| 234 | + try: |
| 235 | + crawl_args = json.loads(unquote(crawl_args)) |
| 236 | + except Exception as e: |
| 237 | + msg = "crawl_args must be valid url encoded JSON" |
| 238 | + msg += " this string cannot be decoded with JSON" |
| 239 | + msg += f' {str(e)}' |
| 240 | + raise Error('400', message=msg) |
| 241 | + |
227 | 242 | dfd = self.run_crawl( |
228 | 243 | spider_name, scrapy_request_args, max_requests, |
229 | | - start_requests=start_requests, *args, **kwargs) |
| 244 | + start_requests=start_requests, |
| 245 | + crawl_args=crawl_args, |
| 246 | + *args, |
| 247 | + **kwargs) |
230 | 248 | dfd.addCallback( |
231 | 249 | self.prepare_response, request_data=api_params, *args, **kwargs) |
232 | 250 | return dfd |
233 | 251 |
|
234 | 252 | def run_crawl(self, spider_name, scrapy_request_args, |
235 | | - max_requests=None, start_requests=False, *args, **kwargs): |
| 253 | + max_requests=None, crawl_args=None, start_requests=False, *args, **kwargs): |
236 | 254 | crawl_manager_cls = load_object(settings.CRAWL_MANAGER) |
237 | 255 | manager = crawl_manager_cls(spider_name, scrapy_request_args, max_requests, start_requests=start_requests) |
| 256 | + if crawl_args: |
| 257 | + kwargs.update(crawl_args) |
238 | 258 | dfd = manager.crawl(*args, **kwargs) |
239 | 259 | return dfd |
240 | 260 |
|
|
0 commit comments