@@ -96,6 +96,12 @@ def __init__(
96
96
self .json_transform = []
97
97
self ._extract_values = extract_values
98
98
99
+ self ._json_types = (
100
+ "application/json" ,
101
+ "application/javascript" ,
102
+ "application/geo+json" ,
103
+ )
104
+
99
105
# This may be removed. Using for testing
100
106
self .requests = None
101
107
@@ -392,12 +398,12 @@ def get_io_data(self, feed_key):
392
398
io_client = self ._get_io_client ()
393
399
394
400
while True :
395
- try :
396
- return io_client .receive_all_data (feed_key )
397
- except RuntimeError as exception :
398
- print ("An error occured, retrying! 1 -" , exception )
399
- continue
400
- break
401
+ # try:
402
+ return io_client .receive_all_data (feed_key )
403
+ # except RuntimeError as exception:
404
+ # print("An error occured, retrying! 1 -", exception)
405
+ # continue
406
+ # break
401
407
402
408
def fetch (self , url , * , headers = None , timeout = 10 ):
403
409
"""Fetch data from the specified url and return a response object
@@ -427,47 +433,30 @@ def fetch(self, url, *, headers=None, timeout=10):
427
433
428
434
return response
429
435
430
- def fetch_data (
431
- self ,
432
- url ,
433
- * ,
434
- headers = None ,
435
- json_path = None ,
436
- regexp_path = None ,
437
- timeout = 10 ,
438
- ):
439
- """Fetch data from the specified url and perfom any parsing
440
-
441
- :param str url: The URL to fetch from.
442
- :param list headers: Extra headers to include in the request.
443
- :param json_path: The path to drill down into the JSON data.
444
- :param regexp_path: The path formatted as a regular expression to drill down
445
- into the JSON data.
446
- :param int timeout: The timeout period in seconds.
436
+ def _detect_content_type (self , headers ):
437
+ if "content-type" in headers :
438
+ if "image/" in headers ["content-type" ]:
439
+ return CONTENT_IMAGE
440
+ for json_type in self ._json_types :
441
+ if json_type in headers ["content-type" ]:
442
+ return CONTENT_JSON
443
+ return CONTENT_TEXT
447
444
445
+ def check_response (self , response ):
448
446
"""
449
- json_out = None
450
- values = []
451
- content_type = CONTENT_TEXT
447
+ Check the response object status code, change the lights, and return content type
452
448
453
- response = self .fetch (url , headers = headers , timeout = timeout )
449
+ :param response: The response object from a network call
450
+
451
+ """
452
+ headers = self ._get_headers (response )
454
453
455
- headers = {}
456
- for title , content in response .headers .items ():
457
- headers [title .lower ()] = content
458
- gc .collect ()
459
454
if self ._debug :
460
455
print ("Headers:" , headers )
461
456
if response .status_code == 200 :
462
457
print ("Reply is OK!" )
463
458
self .neo_status (STATUS_DATA_RECEIVED ) # green = got data
464
- if "content-type" in headers :
465
- if "image/" in headers ["content-type" ]:
466
- content_type = CONTENT_IMAGE
467
- elif "application/json" in headers ["content-type" ]:
468
- content_type = CONTENT_JSON
469
- elif "application/javascript" in headers ["content-type" ]:
470
- content_type = CONTENT_JSON
459
+ content_type = self ._detect_content_type (headers )
471
460
else :
472
461
if self ._debug :
473
462
if "content-length" in headers :
@@ -481,11 +470,56 @@ def fetch_data(
481
470
)
482
471
)
483
472
484
- if content_type == CONTENT_JSON and json_path is not None :
485
- if isinstance (json_path , (list , tuple )) and (
486
- not json_path or not isinstance (json_path [0 ], (list , tuple ))
487
- ):
488
- json_path = (json_path ,)
473
+ return content_type
474
+
475
+ @staticmethod
476
+ def _get_headers (response ):
477
+ headers = {}
478
+ for title , content in response .headers .items ():
479
+ headers [title .lower ()] = content
480
+ gc .collect ()
481
+ return headers
482
+
483
+ def fetch_data (
484
+ self ,
485
+ url ,
486
+ * ,
487
+ headers = None ,
488
+ json_path = None ,
489
+ regexp_path = None ,
490
+ timeout = 10 ,
491
+ ):
492
+ """Fetch data from the specified url and perfom any parsing
493
+
494
+ :param str url: The URL to fetch from.
495
+ :param list headers: Extra headers to include in the request.
496
+ :param json_path: The path to drill down into the JSON data.
497
+ :param regexp_path: The path formatted as a regular expression to search
498
+ the text data.
499
+ :param int timeout: The timeout period in seconds.
500
+
501
+ """
502
+ response = self .fetch (url , headers = headers , timeout = timeout )
503
+ return self ._parse_data (response , json_path = json_path , regexp_path = regexp_path )
504
+
505
+ def _parse_data (
506
+ self ,
507
+ response ,
508
+ * ,
509
+ json_path = None ,
510
+ regexp_path = None ,
511
+ ):
512
+
513
+ json_out = None
514
+ content_type = self .check_response (response )
515
+
516
+ if content_type == CONTENT_JSON :
517
+ if json_path is not None :
518
+ # Drill down to the json path and set json_out as that node
519
+ if isinstance (json_path , (list , tuple )) and (
520
+ not json_path or not isinstance (json_path [0 ], (list , tuple ))
521
+ ):
522
+ json_path = (json_path ,)
489
523
try :
490
524
gc .collect ()
491
525
json_out = response .json ()
@@ -498,43 +532,71 @@ def fetch_data(
498
532
except MemoryError :
499
533
supervisor .reload ()
500
534
535
+ if content_type == CONTENT_JSON :
536
+ values = self .process_json (json_out , json_path )
537
+ elif content_type == CONTENT_TEXT :
538
+ values = self .process_text (response .text , regexp_path )
539
+
540
+ # Clean up
541
+ json_out = None
542
+ response = None
543
+ if self ._extract_values and len (values ) == 1 :
544
+ values = values [0 ]
545
+
546
+ gc .collect ()
547
+
548
+ return values
549
+
550
+ @staticmethod
551
+ def process_text (text , regexp_path ):
552
+ """
553
+ Process text content
554
+
555
+ :param str text: The entire text content
556
+ :param regexp_path: The path formatted as a regular expression to search
557
+ the text data.
558
+
559
+ """
560
+ values = []
501
561
if regexp_path :
502
562
import re # pylint: disable=import-outside-toplevel
503
563
564
+ for regexp in regexp_path :
565
+ values .append (re .search (regexp , text ).group (1 ))
566
+ else :
567
+ values = text
568
+ return values
569
+
570
+ def process_json (self , json_data , json_path ):
571
+ """
572
+ Process JSON content
573
+
574
+ :param dict json_data: The JSON data as a dict
575
+ :param json_path: The path to drill down into the JSON data.
576
+
577
+ """
578
+ values = []
579
+
504
580
# optional JSON post processing, apply any transformations
505
581
# these MAY change/add element
506
582
for idx , json_transform in enumerate (self .json_transform ):
507
583
try :
508
- json_transform (json_out )
584
+ json_transform (json_data )
509
585
except Exception as error :
510
586
print ("Exception from json_transform: " , idx , error )
511
587
raise
512
588
513
589
# extract desired text/values from json
514
- if json_out is not None and json_path :
590
+ if json_data is not None and json_path :
515
591
for path in json_path :
516
592
try :
517
- values .append (self .json_traverse (json_out , path ))
593
+ values .append (self .json_traverse (json_data , path ))
518
594
except KeyError :
519
- print (json_out )
595
+ print (json_data )
520
596
raise
521
- elif content_type == CONTENT_TEXT and regexp_path :
522
- for regexp in regexp_path :
523
- values .append (re .search (regexp , response .text ).group (1 ))
524
597
else :
525
- if json_out :
526
- # No path given, so return JSON as string for compatibility
527
- import json # pylint: disable=import-outside-toplevel
528
-
529
- values = json .dumps (response .json ())
530
- else :
531
- values = response .text
532
-
533
- # we're done with the requests object, lets delete it so we can do more!
534
- json_out = None
535
- response = None
536
- gc .collect ()
537
- if self ._extract_values and len (values ) == 1 :
538
- return values [0 ]
598
+ # No path given, so return JSON as string for compatibility
599
+ import json # pylint: disable=import-outside-toplevel
539
600
601
+ values = json .dumps (json_data )
540
602
return values
0 commit comments