Skip to content

Commit 4ce07b9

Browse files
Merge pull request #2321 from IFRCGo/fix/icrc-countries-issue
ICRC: Fix grouping countries scarping logic
2 parents 2a3cb5d + 2a6da51 commit 4ce07b9

File tree

1 file changed

+39
-1
lines changed

1 file changed

+39
-1
lines changed

api/management/commands/ingest_icrc.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,23 @@ def handle(self, *args, **kwargs):
4343
key_operations_country_list = soup.find("div", {"class": "key-operations-content"}).find_all("div", class_="title")
4444

4545
# NOTE: Mapping this Country, as it doesnot match with the name in the database
46-
country_name_mapping = {"Syria": "Syrian Arab Republic", "Israel and the occupied territories": "Israel"}
46+
country_name_mapping = {
47+
"Syria": "Syrian Arab Republic",
48+
"Israel and the occupied territories": "Israel",
49+
"Republic of Moldova": "Moldova, Republic of",
50+
"United Arab Emirates (UAE) delegation": "United Arab Emirates",
51+
"The Republic of South Sudan": "South Sudan",
52+
"United States of America": "United States",
53+
"Russia": "Russian Federation",
54+
"Iran": "Iran, Islamic Republic of",
55+
"Democratic Republic of the Congo": "Democratic Republic of Congo",
56+
}
57+
58+
# NOTE: Group country names
59+
countries_group = {
60+
"Indonesia and Timor-Leste",
61+
"Gulf Cooperation Council (GCC) Countries",
62+
}
4763

4864
country_operations_list = [
4965
country.text.strip() for key_operation in key_operations_country_list for country in key_operation.find_all("a")
@@ -67,6 +83,25 @@ def handle(self, *args, **kwargs):
6783
country_soup = BeautifulSoup(country_page.content, "html.parser")
6884
description_tag = country_soup.find("div", class_="description").find("div", class_="ck-text")
6985
description = description_tag.text.strip() if description_tag else None
86+
87+
# NOTE: Getting the countries that are part of the country_group
88+
if name in countries_group:
89+
country_contact_soup = (
90+
country_soup.find(id="contact-country")
91+
.find("div", class_="icrc-container")
92+
.find_all("div", class_="contact-row")
93+
)
94+
for contact in country_contact_soup:
95+
country_name = contact.find("div", class_="title").text.strip()
96+
countries.append(
97+
{
98+
"Country": country_name,
99+
"ICRC presence": presence,
100+
"URL": country_url,
101+
"Key operation": key_operation,
102+
"Description": description,
103+
}
104+
)
70105
except Exception:
71106
pass
72107

@@ -81,6 +116,9 @@ def handle(self, *args, **kwargs):
81116
}
82117
)
83118

119+
# Remove the countries_group countries from the countries list
120+
countries = [country for country in countries if country["Country"] not in countries_group]
121+
84122
added = 0
85123
created_ns_presence_pk = []
86124
for data in countries:

0 commit comments

Comments
 (0)