Skip to content

Commit 763858f

Browse files
Merge pull request #1671 from IFRCGo/fix/auto-internal-plan-load
Country plan import: Only download pdf files
2 parents 23845f2 + 745dcdf commit 763858f

File tree

2 files changed

+76
-26
lines changed

2 files changed

+76
-26
lines changed

country_plan/management/commands/ingest_country_plan_file.py

Lines changed: 46 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22
import requests
3-
from typing import Union
3+
from typing import Union, Optional, Tuple
44
from datetime import datetime
55
from django.utils.timezone import make_aware
66
from django.core.management.base import BaseCommand
@@ -20,38 +20,57 @@
2020
INTERNAL_SOURCE = 'https://go-api.ifrc.org/Api/FedNetAppeals?AppealsTypeId=1844&Hidden=false'
2121

2222

23+
def parse_date(text: Optional[str]) -> Optional[datetime]:
24+
"""
25+
Convert Appeal API datetime into django datetime
26+
Parameters
27+
----------
28+
text : str
29+
Datetime eg: 2022-11-29T11:24:00
30+
"""
31+
if text:
32+
return make_aware(
33+
# NOTE: Format is assumed by looking at the data from Appeal API
34+
datetime.strptime(text, '%Y-%m-%dT%H:%M:%S')
35+
)
36+
37+
38+
def get_meta_from_url(url) -> Tuple[Optional[str], str]:
39+
"""
40+
Fetch url headers and return content-type and filename
41+
"""
42+
def _get_filename_from_headers(resp):
43+
try:
44+
# Eg: Content-Disposition: 'attachment;filename=UP_Botswana_2023.pdf'
45+
return resp.headers.get('Content-Disposition').split(';')[1].split('=')[1]
46+
except Exception:
47+
return 'document.pdf'
48+
49+
# Check if it is a pdf file
50+
resp = requests.head(url)
51+
return resp.headers.get('Content-Type'), _get_filename_from_headers(resp)
52+
53+
2354
class Command(BaseCommand):
2455
@staticmethod
25-
def parse_date(text: str) -> Union[datetime, None]:
56+
def load_file_to_country_plan(country_plan: CountryPlan, url: str, filename: str, field_name: str):
2657
"""
27-
Convert Appeal API datetime into django datetime
28-
Parameters
29-
----------
30-
text : str
31-
Datetime eg: 2022-11-29T11:24:00
58+
Fetch file using url and save to country_plan
3259
"""
33-
if text:
34-
return make_aware(
35-
# NOTE: Format is assumed by looking at the data from Appeal API
36-
datetime.strptime(text, '%Y-%m-%dT%H:%M:%S')
37-
)
38-
39-
@staticmethod
40-
def load_file_to_country_plan(country_plan: CountryPlan, url: str, filename: str, field_name: str):
4160
with DownloadFileManager(url, suffix='.pdf') as f:
4261
getattr(country_plan, field_name).save(
4362
filename,
4463
File(f),
4564
)
4665

47-
def load_for_country(self, country_data, file_field, field_inserted_date_field):
66+
def load_for_country(self, country_data: dict, file_field: str, field_inserted_date_field: str):
4867
country_iso2 = country_data.get('LocationCountryCode')
4968
country_name = country_data.get('LocationCountryName')
50-
public_plan_url = country_data.get('BaseDirectory') or '' + country_data.get('BaseFileName') or ''
51-
inserted_date = self.parse_date(country_data.get('Inserted'))
69+
plan_url = country_data['BaseDirectory'] + country_data['BaseFileName']
70+
inserted_date = parse_date(country_data.get('Inserted'))
5271
if (
5372
(country_iso2 is None and country_name is None) or
54-
public_plan_url is None or
73+
plan_url is None or
5574
inserted_date is None
5675
):
5776
return
@@ -72,14 +91,18 @@ def load_for_country(self, country_data, file_field, field_inserted_date_field):
7291
if existing_inserted_date and existing_inserted_date >= inserted_date:
7392
# No need to do anything here
7493
return
75-
self.stdout.write(f'- Saving data for country:: {country_plan.country.name}')
76-
public_plan_url = country_data['BaseDirectory'] + country_data['BaseFileName']
94+
content_type, content_name = get_meta_from_url(plan_url)
95+
self.stdout.write(f'- Checking plan file for country:: {country_plan.country.name}')
96+
if content_type != 'application/pdf':
97+
# Only looking for PDFs
98+
return
99+
self.stdout.write(' - Saving data')
77100
setattr(country_plan, field_inserted_date_field, inserted_date)
78101
self.load_file_to_country_plan(
79102
country_plan,
80-
public_plan_url,
103+
plan_url,
81104
# NOTE: File provided are PDF,
82-
f"{file_field.replace('_', '-').replace('file', '')}-{country_data['BaseFileName']}.pdf",
105+
f"{file_field.replace('_', '-').replace('-file', '')}-{content_name}",
83106
file_field,
84107
)
85108
country_plan.is_publish = True
@@ -90,7 +113,6 @@ def load_for_country(self, country_data, file_field, field_inserted_date_field):
90113
'is_publish',
91114
)
92115
)
93-
return True
94116

95117
def load(self, url: str, file_field: str, field_inserted_date_field: str):
96118
auth = (settings.APPEALS_USER, settings.APPEALS_PASS)

country_plan/tests/test_commands.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,14 @@
4545
'Inserted': '2022-11-29T11:24:00',
4646
'LocationCountryCode': 'GR',
4747
'LocationCountryName': 'Greece'
48-
}
48+
},
49+
{
50+
'BaseDirectory': FILE_BASE_DIRECTORY,
51+
'BaseFileName': 'NOOP',
52+
'Inserted': '2022-11-29T11:24:00',
53+
'LocationCountryCode': 'XY',
54+
'LocationCountryName': 'Myanmar'
55+
},
4956
]
5057

5158
INTERNAL_APPEAL_COUNTRY_PLAN_MOCK_RESPONSE = [
@@ -85,13 +92,18 @@ def raise_for_status(self):
8592
def iter_content(self, **_):
8693
return self.stream
8794

88-
def __init__(self, json=None, stream=None):
95+
def __init__(self, json=None, stream=None, headers=None):
96+
self._headers = headers
8997
self._json = json
9098
self.stream = stream
9199

92100
def json(self):
93101
return self._json
94102

103+
@property
104+
def headers(self):
105+
return self._headers
106+
95107
def __enter__(self):
96108
return MockResponse.FileStream(self.stream)
97109

@@ -108,6 +120,18 @@ def mock_request(url, *_, **kwargs):
108120
if url.startswith(FILE_BASE_DIRECTORY):
109121
return MockResponse(stream=[b''])
110122

123+
def mock_request_head(url, *_, **kwargs):
124+
headers = {
125+
'Content-Type': 'application/pdf',
126+
'Content-Disposition': 'attachment;filename=Sample_document_2023.pdf',
127+
}
128+
if url.endswith('NOOP'):
129+
headers['Content-Type'] = 'html/text'
130+
elif url.endswith('000004'):
131+
headers['Content-Disposition'] = ''
132+
return MockResponse(headers=headers)
133+
134+
@mock.patch('country_plan.management.commands.ingest_country_plan_file.requests.head', side_effect=mock_request_head)
111135
@mock.patch('country_plan.management.commands.ingest_country_plan_file.requests.get', side_effect=mock_request)
112136
@mock.patch('main.utils.requests.get', side_effect=mock_request)
113137
def test_country_plan_ingest(self, *_):
@@ -139,3 +163,7 @@ def test_country_plan_ingest(self, *_):
139163
assert CountryPlan.objects.filter(is_publish=True).count() == 5
140164
assert CountryPlan.objects.exclude(public_plan_file='').count() == 4
141165
assert CountryPlan.objects.exclude(internal_plan_file='').count() == 2
166+
# First downloaded document. Others will have Sample_document_2023{random-chars}.pdf
167+
assert CountryPlan.objects.filter(country__iso='SY').first().public_plan_file.name.endswith('Sample_document_2023.pdf')
168+
# Without attachment filename
169+
assert CountryPlan.objects.filter(country__iso='GR').first().public_plan_file.name.endswith('document.pdf')

0 commit comments

Comments
 (0)