@@ -13,14 +13,14 @@ class Command(BaseCommand):
1313
1414 @monitor (monitor_slug = SentryMonitor .INGEST_ICRC )
1515 def handle (self , * args , ** kwargs ):
16- logger .info ("Strating ICRC data ingest" )
16+ logger .info ("Starting ICRC data ingest" )
1717 HEADERS = {
1818 "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" , # noqa
1919 }
20- response = requests . get (
21- url = "https://www.icrc.org/en/where-we-work" ,
22- headers = HEADERS ,
23- )
20+ icrc_url = "https://www.icrc.org"
21+ icrc_where_we_work_url = "https://www.icrc.org/en/where-we-work"
22+ response = requests . get ( url = icrc_where_we_work_url , headers = HEADERS )
23+
2424 if response .status_code != 200 :
2525 text_to_log = "Error querying ICRC feed at https://www.icrc.org/en/where-we-work"
2626 logger .error (text_to_log )
@@ -36,32 +36,36 @@ def handle(self, *args, **kwargs):
3636 response .raise_for_status ()
3737 soup = BeautifulSoup (response .content , "html.parser" )
3838
39- # Get the countries information from the "Where we work" page
40- regions_list = soup .find ("div" , {"id" : "blockRegionalList" }).find_all ("ul" , {"class" : "list" })
39+ # Get countries information from "Where we work" page
40+ regions_list = soup .find ("div" , {"class" : "js-select-country-list" }).find ("ul" ).find_all ("ul" )
41+
4142 country_list = []
4243 for region in regions_list :
43- for country in region .find_all ("li" , {"class" : "item" }):
44- # Get key information
44+ for country in region .find_all ("li" ):
4545 name = country .text .strip ()
46- url = country .find ("a" )["href" ] if country .find ("a" ) else None
47- presence = True if url else False
48- key_operation = True if "keyOperations" in country ["class" ] else False
49- # Get the description from the country page
46+ href = country .find ("a" )["href" ] if country .find ("a" ) else None
47+ country_url = icrc_url + href if href else None
48+ presence = bool (country_url )
5049 description = None
51- if url :
50+ key_operation = False
51+
52+ if country_url :
5253 try :
53- country_page = requests .get (url = url , headers = { "User-Agent" : "" } )
54+ country_page = requests .get (url = country_url , headers = HEADERS )
5455 country_page .raise_for_status ()
5556 country_soup = BeautifulSoup (country_page .content , "html.parser" )
56- description = country_soup .find ("div" , {"class" : "block-introduction" }).find_all ()[2 ].text .strip ()
57+ description_tag = country_soup .find ("div" , class_ = "description" ).find ("div" , class_ = "ck-text" )
58+ key_operation = bool (description_tag )
59+ description = description_tag .text .strip () if description_tag else None
5760 except Exception :
5861 pass
59- # Append all the information to the list
62+
63+ # Append to list
6064 country_list .append (
6165 {
6266 "Country" : name ,
6367 "ICRC presence" : presence ,
64- "URL" : url ,
68+ "URL" : country_url ,
6569 "Key operation" : key_operation ,
6670 "Description" : description ,
6771 }
@@ -72,15 +76,14 @@ def handle(self, *args, **kwargs):
7276 country = Country .objects .filter (name__exact = data ["Country" ]).first ()
7377 if country :
7478 country_icrc_presence , _ = CountryICRCPresence .objects .get_or_create (country = country )
75-
7679 country_icrc_presence .icrc_presence = data ["ICRC presence" ]
7780 country_icrc_presence .url = data ["URL" ]
7881 country_icrc_presence .key_operation = data ["Key operation" ]
7982 country_icrc_presence .description = data ["Description" ]
8083 country_icrc_presence .save ()
8184 added += 1
8285
83- text_to_log = "%s ICRC added" % added
86+ text_to_log = f" { added } ICRC added"
8487 logger .info (text_to_log )
8588 body = {"name" : "ingest_icrc" , "message" : text_to_log , "num_result" : added , "status" : CronJobStatus .SUCCESSFUL }
8689 CronJob .sync_cron (body )
0 commit comments