11# -*- coding: utf-8 -*-
22import json
33import os
4+ from urllib .parse import quote
45
56import pytest
67import re
@@ -412,3 +413,113 @@ def test_bytes_in_item(self, server, method):
412413 'callback' : 'return_bytes' })
413414 assert res .status_code == 200
414415 assert res .json ()["items" ] == [{'name' : 'Some bytes here' }]
416+
417+ def test_crawl_with_argument_get (self , server ):
418+ url = server .url ("crawl.json" )
419+ postcode = "43-300"
420+ argument = json .dumps ({"postcode" : postcode })
421+ argument = quote (argument )
422+ res = perform_get (url , {"spider_name" : "test" }, {
423+ "url" : server .target_site .url ("page1.html" ),
424+ "crawl_args" : argument ,
425+ "callback" : 'return_argument'
426+ })
427+ expected_items = [{
428+ u'name' : postcode ,
429+ }]
430+ res_json = res .json ()
431+ assert res_json ["status" ] == "ok"
432+ assert res_json ["items_dropped" ] == []
433+ assert res_json ['items' ]
434+ assert len (res_json ['items' ]) == len (expected_items )
435+ assert res_json ["items" ] == expected_items
436+
437+ def test_crawl_with_argument_post (self , server ):
438+ url = server .url ("crawl.json" )
439+ postcode = "43-300"
440+ argument = {"postcode" : postcode }
441+ res = perform_post (url , {
442+ "spider_name" : "test" ,
443+ "crawl_args" : argument
444+ }, {
445+ "url" : server .target_site .url ("page1.html" ),
446+ "callback" : 'return_argument'
447+ })
448+ expected_items = [{
449+ u'name' : postcode ,
450+ }]
451+ res_json = res .json ()
452+ assert res .status_code == 200
453+ assert res_json ["status" ] == "ok"
454+ assert not res_json .get ("errors" )
455+ assert res_json ["items_dropped" ] == []
456+ assert res_json ['items' ]
457+ assert len (res_json ['items' ]) == len (expected_items )
458+ assert res_json ["items" ] == expected_items
459+
460+ def test_crawl_with_argument_invalid_json (self , server ):
461+ url = server .url ("crawl.json" )
462+ argument = '"this is not valid json'
463+ argument = quote (argument )
464+ res = perform_get (url , {"spider_name" : "test" }, {
465+ "url" : server .target_site .url ("page1.html" ),
466+ "crawl_args" : argument ,
467+ "callback" : 'return_argument'
468+ })
469+ assert res .status_code == 400
470+ res_json = res .json ()
471+ assert res_json ["status" ] == "error"
472+ assert res_json .get ('items' ) is None
473+ assert res_json ['code' ] == 400
474+ assert re .search (' must be valid url encoded JSON' , res_json ['message' ])
475+
476+ def test_crawl_with_argument_invalid_name (self , server ):
477+ url = server .url ("crawl.json" )
478+ argument = quote (json .dumps ({"parse" : "string" }))
479+ res = perform_get (url , {"spider_name" : "test" }, {
480+ "url" : server .target_site .url ("page1.html" ),
481+ "crawl_args" : argument ,
482+ })
483+
484+ def check_res (res ):
485+ res_json = res .json ()
486+ assert res .status_code == 400
487+ assert res_json ["status" ] == "error"
488+ assert res_json .get ('items' ) is None
489+ assert res_json ['code' ] == 400
490+
491+ msg = 'Crawl argument cannot override spider method'
492+ assert re .search (msg , res_json ['message' ])
493+
494+ check_res (res )
495+
496+ res = perform_post (url , {
497+ "spider_name" : "test" ,
498+ "crawl_args" : argument
499+ }, {
500+ "url" : server .target_site .url ("page1.html" ),
501+ "callback" : 'return_argument'
502+ })
503+
504+ check_res (res )
505+
506+ def test_crawl_with_argument_attribute_collision (self , server ):
507+ """If there is attribute collision and some argument to spider
508+ passed via API, and this argument collides with spider attribute,
509+ argument from request overrides spider attribute.
510+ """
511+ url = server .url ("crawl.json" )
512+ argument = quote (json .dumps ({"some_attribute" : "string" }))
513+ res = perform_get (url , {"spider_name" : "test" }, {
514+ "url" : server .target_site .url ("page1.html" ),
515+ "crawl_args" : argument ,
516+ })
517+
518+ def check_res (res ):
519+ res_json = res .json ()
520+ assert res_json ["status" ] == "ok"
521+ assert res .status_code == 200
522+ assert res_json ['items' ]
523+ assert len (res_json ['items' ]) == 1
524+
525+ check_res (res )
0 commit comments