@@ -136,16 +136,16 @@ with hopefully helpful error message.
136136Examples
137137~~~~~~~~
138138
139- To run sample `dmoz spider `_ from `Scrapy educational dirbot project `_
140- parsing page about Ada programming language ::
139+ To run sample `toscrape-css spider `_ from `Scrapy educational quotesbot project `_
140+ parsing page about famous quotes ::
141141
142- curl "http://localhost:9080/crawl.json?spider_name=dmoz &url=http://www.dmoz.org/Computers/Programming/Languages/Ada /"
142+ curl "http://localhost:9080/crawl.json?spider_name=toscrape-css &url=http://quotes.toscrape.com /"
143143
144144
145145To run same spider only allowing one request and parsing url
146146with callback ``parse_foo ``::
147147
148- curl "http://localhost:9080/crawl.json?spider_name=dmoz &url=http://www.dmoz.org/Computers/Programming/Languages/Ada /&callback=parse_foo&max_requests=1"
148+ curl "http://localhost:9080/crawl.json?spider_name=toscrape-css &url=http://quotes.toscrape.com /&callback=parse_foo&max_requests=1"
149149
150150POST
151151----
@@ -222,16 +222,16 @@ hopefully helpful error message.
222222Examples
223223~~~~~~~~
224224
225- To schedule spider dmoz with sample url using POST handler::
225+ To schedule spider toscrape-css with sample url using POST handler::
226226
227227 curl localhost:9080/crawl.json \
228- -d '{"request":{"url":"http://www.dmoz.org/Computers/Programming/Languages/Awk/ "}, "spider_name": "dmoz "}'
228+ -d '{"request":{"url":"http://quotes.toscrape.com/ "}, "spider_name": "toscrape-css "}'
229229
230230
231231to schedule same spider with some meta that will be passed to spider request::
232232
233233 curl localhost:9080/crawl.json \
234- -d '{"request":{"url":"http://www.dmoz.org/Computers/Programming/Languages/Awk/ ", "meta": {"alfa":"omega"}}, "spider_name": "dmoz "}'
234+ -d '{"request":{"url":"http://quotes.toscrape.com/ ", "meta": {"alfa":"omega"}}, "spider_name": "toscrape-css "}'
235235
236236Response
237237--------
@@ -265,34 +265,34 @@ errors (optional)
265265
266266Example::
267267
268- $ curl "http://localhost:9080/crawl.json?spider_name=dmoz &url=http://www.dmoz.org/Computers/Programming/Languages/Ada /"
268+ $ curl "http://localhost:9080/crawl.json?spider_name=toscrape-css &url=http://quotes.toscrape.com /"
269269 {
270270 "status": "ok"
271- "spider_name": "dmoz ",
271+ "spider_name": "toscrape-css ",
272272 "stats": {
273- "start_time": "2014 -12-29 16:04:15 ",
274- "finish_time": "2014 -12-29 16:04:16 ",
273+ "start_time": "2019 -12-06 13:01:31 ",
274+ "finish_time": "2019 -12-06 13:01:35 ",
275275 "finish_reason": "finished",
276- "downloader/response_status_count/200": 1 ,
277- "downloader/response_count": 1 ,
278- "downloader/response_bytes": 8494 ,
279- "downloader/request_method_count/GET": 1 ,
280- "downloader/request_count": 1 ,
281- "downloader/request_bytes": 247 ,
282- "item_scraped_count": 16 ,
283- "log_count/DEBUG": 17 ,
284- "log_count/INFO": 4 ,
285- "response_received_count": 1 ,
286- "scheduler/dequeued": 1 ,
287- "scheduler/dequeued/memory": 1 ,
288- "scheduler/enqueued": 1 ,
289- "scheduler/enqueued/memory": 1
276+ "downloader/response_status_count/200": 10 ,
277+ "downloader/response_count": 11 ,
278+ "downloader/response_bytes": 24812 ,
279+ "downloader/request_method_count/GET": 11 ,
280+ "downloader/request_count": 11 ,
281+ "downloader/request_bytes": 2870 ,
282+ "item_scraped_count": 100 ,
283+ "log_count/DEBUG": 111 ,
284+ "log_count/INFO": 9 ,
285+ "response_received_count": 11 ,
286+ "scheduler/dequeued": 10 ,
287+ "scheduler/dequeued/memory": 10 ,
288+ "scheduler/enqueued": 10 ,
289+ "scheduler/enqueued/memory": 10,
290290 },
291291 "items": [
292292 {
293- "description ": ...,
294- "name ": ...,
295- "url ": ...
293+ "text ": ...,
294+ "author ": ...,
295+ "tags ": ...
296296 },
297297 ...
298298 ],
@@ -315,7 +315,7 @@ message
315315
316316Example::
317317
318- $ curl "http://localhost:9080/crawl.json?spider_name=foo&url=http://www.dmoz.org/Computers/Programming/Languages/Ada /"
318+ $ curl "http://localhost:9080/crawl.json?spider_name=foo&url=http://quotes.toscrape.com /"
319319 {
320320 "status": "error"
321321 "code": 404,
@@ -456,22 +456,22 @@ in response, for example::
456456
457457 {
458458 "status": "ok"
459- "spider_name": "dmoz ",
459+ "spider_name": "toscrape-css ",
460460 "stats": {
461- "start_time": "2014 -12-29 17:26:11",
461+ "start_time": "2019 -12-06 13:11:30"
462462 "spider_exceptions/Exception": 1,
463- "finish_time": "2014 -12-29 17:26:11 ",
463+ "finish_time": "2019 -12-06 13:11:31 ",
464464 "finish_reason": "finished",
465465 "downloader/response_status_count/200": 1,
466- "downloader/response_count": 1 ,
467- "downloader/response_bytes": 8494 ,
468- "downloader/request_method_count/GET": 1 ,
469- "downloader/request_count": 1 ,
470- "downloader/request_bytes": 247 ,
471- "log_count/DEBUG": 1 ,
466+ "downloader/response_count": 2 ,
467+ "downloader/response_bytes": 2701 ,
468+ "downloader/request_method_count/GET": 2 ,
469+ "downloader/request_count": 2 ,
470+ "downloader/request_bytes": 446 ,
471+ "log_count/DEBUG": 2 ,
472472 "log_count/ERROR": 1,
473- "log_count/INFO": 4 ,
474- "response_received_count": 1 ,
473+ "log_count/INFO": 9 ,
474+ "response_received_count": 2 ,
475475 "scheduler/dequeued": 1,
476476 "scheduler/dequeued/memory": 1,
477477 "scheduler/enqueued": 1,
@@ -559,8 +559,8 @@ approach described in `Python Logging HOWTO`_ or redirect stdout to a file using
559559`bash redirection syntax `_, `supervisord logging `_ etc.
560560
561561
562- .. _ dmoz spider : https://github.com/scrapy/dirbot /blob/master/dirbot /spiders/dmoz .py
563- .. _Scrapy educational dirbot project : https://github.com/scrapy/dirbot
562+ .. _ toscrape-css spider : https://github.com/scrapy/quotesbot /blob/master/quotesbot /spiders/toscrape-css .py
563+ .. _Scrapy educational quotesbot project : https://github.com/scrapy/quotesbot
564564.. _Scrapy Request : http://doc.scrapy.org/en/latest/topics/request-response.html#scrapy.http.Request
565565.. _Scrapy Crawler : http://doc.scrapy.org/en/latest/topics/api.html#scrapy.crawler.Crawler
566566.. _parse : http://doc.scrapy.org/en/latest/topics/spiders.html#scrapy.spider.Spider.parse
0 commit comments