Skip to content

Commit 58a399e

Browse files
authored
Improve import code: fix problems in value field and avoid empty (#47)
metadata values
1 parent 767e8b8 commit 58a399e

File tree

5 files changed

+30
-13
lines changed

5 files changed

+30
-13
lines changed

vbos/datasets/management/commands/import_datasets.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,14 @@ def handle(self, *args, **options):
2424
]
2525

2626
for row in reader:
27-
type = REVERSE_TYPE_MAPPING[row["Type"]]
27+
type = (
28+
REVERSE_TYPE_MAPPING[row["Type"].strip()]
29+
if row["Type"]
30+
else "baseline"
31+
)
2832
if (
29-
row["Indicator"],
30-
row["Cluster"],
33+
row["Indicator"].strip(),
34+
row["Cluster"].strip(),
3135
type,
3236
) not in datasets:
3337
TabularDataset.objects.create(
@@ -41,8 +45,8 @@ def handle(self, *args, **options):
4145
)
4246
datasets.append(
4347
(
44-
row["Indicator"],
45-
row["Cluster"],
48+
row["Indicator"].strip(),
49+
row["Cluster"].strip(),
4650
type,
4751
)
4852
)

vbos/datasets/management/commands/import_tabular_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def handle(self, *args, **options):
2727

2828
created_count += 1
2929
except Exception as e:
30-
print(e)
30+
print(f"{e}: {row}")
3131
error_count += 1
3232

3333
self.stdout.write(f"{created_count} tabular items created from {filename}.")

vbos/datasets/test/test_management_commands.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,16 @@ def test_import(self):
4444
"55 tabular items created",
4545
self.out.getvalue(),
4646
)
47+
self.assertEqual(
48+
len(
49+
[
50+
i[1]
51+
for i in TabularItem.objects.first().metadata.items()
52+
if i[1] == ""
53+
]
54+
),
55+
0,
56+
)
4757

4858
# Clean redundant entries
4959
call_command("clean_tabular_data", stdout=self.out)

vbos/datasets/test/test_tabular_views.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
from datetime import date
2+
3+
from django.urls import reverse
24
from rest_framework import status
35
from rest_framework.test import APITestCase
4-
from django.urls import reverse
56

6-
from ..models import AreaCouncil, Cluster, Province, TabularDataset, TabularItem
77
from ...users.test.factories import UserFactory
8+
from ..models import AreaCouncil, Cluster, Province, TabularDataset, TabularItem
89

910

1011
class TestTabularDatasetListDetailViews(APITestCase):
@@ -79,7 +80,7 @@ def setUp(self):
7980
date=date(2025, 1, 1),
8081
province=Province.objects.get(name="TORBA"),
8182
attribute="Population",
82-
value=13874,
83+
value=float("13874"),
8384
)
8485
TabularItem.objects.create(
8586
dataset=self.dataset_1,

vbos/datasets/utils.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def __init__(self, row: Dict):
5252
except ValueError:
5353
self.date = None
5454
self.remove_keys()
55+
self.clean_metadata()
5556

5657
def get_property(self, keys: List[str]) -> str:
5758
value = ""
@@ -66,6 +67,7 @@ def get_property(self, keys: List[str]) -> str:
6667
def remove_keys(self):
6768
keys = [
6869
"Unit",
70+
"National",
6971
"Source",
7072
"Year Collected",
7173
"Frequency Collection",
@@ -119,9 +121,9 @@ def group_by_dataset(data):
119121

120122
def get_dataset(row):
121123
return TabularDataset.objects.get(
122-
name=row["Indicator"],
123-
cluster=Cluster.objects.get_or_create(name=row["Cluster"])[0],
124-
type=REVERSE_TYPE_MAPPING[row["Type"]],
124+
name=row["Indicator"].strip(),
125+
cluster=Cluster.objects.get_or_create(name=row["Cluster"].strip())[0],
126+
type=REVERSE_TYPE_MAPPING[row["Type"]] if row["Type"] else "baseline",
125127
)
126128

127129

@@ -130,7 +132,7 @@ def create_tabular_item(csv_row: CSVRow, dataset: TabularDataset):
130132
dataset=dataset,
131133
metadata=csv_row.metadata,
132134
attribute=csv_row.attribute.strip(),
133-
value=csv_row.value,
135+
value=float(csv_row.value.replace(",", "")),
134136
date=csv_row.date,
135137
province=Province.objects.filter(name__iexact=csv_row.province).first(),
136138
area_council=AreaCouncil.objects.filter(

0 commit comments

Comments
 (0)