Skip to content

Commit 2b3f32f

Browse files
Merge pull request #2306 from IFRCGo/fix/icrc-issue
ICRC: Fix operations country scraping logic
2 parents 892cf33 + cd2fb52 commit 2b3f32f

File tree

1 file changed

+22
-5
lines changed

1 file changed

+22
-5
lines changed

api/management/commands/ingest_icrc.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,29 +39,39 @@ def handle(self, *args, **kwargs):
3939
# Get countries information from "Where we work" page
4040
regions_list = soup.find("div", {"class": "js-select-country-list"}).find("ul").find_all("ul")
4141

42-
country_list = []
42+
# Holds the list of countries that are part of the key operations
43+
key_operations_country_list = soup.find("div", {"class": "key-operations-content"}).find_all("div", class_="title")
44+
45+
# NOTE: Mapping this Country, as it doesnot match with the name in the database
46+
country_name_mapping = {"Syria": "Syrian Arab Republic", "Israel and the occupied territories": "Israel"}
47+
48+
country_operations_list = [
49+
country.text.strip() for key_operation in key_operations_country_list for country in key_operation.find_all("a")
50+
]
51+
52+
countries = []
4353
for region in regions_list:
4454
for country in region.find_all("li"):
4555
name = country.text.strip()
4656
href = country.find("a")["href"] if country.find("a") else None
4757
country_url = icrc_url + href if href else None
4858
presence = bool(country_url)
4959
description = None
50-
key_operation = False
60+
# Check if country is part of the key operations
61+
key_operation = name in country_operations_list
5162

5263
if country_url:
5364
try:
5465
country_page = requests.get(url=country_url, headers=HEADERS)
5566
country_page.raise_for_status()
5667
country_soup = BeautifulSoup(country_page.content, "html.parser")
5768
description_tag = country_soup.find("div", class_="description").find("div", class_="ck-text")
58-
key_operation = bool(description_tag)
5969
description = description_tag.text.strip() if description_tag else None
6070
except Exception:
6171
pass
6272

6373
# Append to list
64-
country_list.append(
74+
countries.append(
6575
{
6676
"Country": name,
6777
"ICRC presence": presence,
@@ -72,7 +82,11 @@ def handle(self, *args, **kwargs):
7282
)
7383

7484
added = 0
75-
for data in country_list:
85+
created_ns_presence_pk = []
86+
for data in countries:
87+
# NOTE: mapping the country name
88+
data["Country"] = country_name_mapping.get(data["Country"], data["Country"])
89+
7690
country = Country.objects.filter(name__exact=data["Country"]).first()
7791
if country:
7892
country_icrc_presence, _ = CountryICRCPresence.objects.get_or_create(country=country)
@@ -81,7 +95,10 @@ def handle(self, *args, **kwargs):
8195
country_icrc_presence.key_operation = data["Key operation"]
8296
country_icrc_presence.description = data["Description"]
8397
country_icrc_presence.save()
98+
created_ns_presence_pk.append(country_icrc_presence.pk)
8499
added += 1
100+
# NOTE: Delete the CountryICRCPresence that are not in the source
101+
CountryICRCPresence.objects.exclude(id__in=created_ns_presence_pk).delete()
85102

86103
text_to_log = f"{added} ICRC added"
87104
logger.info(text_to_log)

0 commit comments

Comments
 (0)