diff --git a/vbos/datasets/management/commands/import_datasets.py b/vbos/datasets/management/commands/import_datasets.py index 70b3275..c92259d 100644 --- a/vbos/datasets/management/commands/import_datasets.py +++ b/vbos/datasets/management/commands/import_datasets.py @@ -24,10 +24,14 @@ def handle(self, *args, **options): ] for row in reader: - type = REVERSE_TYPE_MAPPING[row["Type"]] + type = ( + REVERSE_TYPE_MAPPING[row["Type"].strip()] + if row["Type"] + else "baseline" + ) if ( - row["Indicator"], - row["Cluster"], + row["Indicator"].strip(), + row["Cluster"].strip(), type, ) not in datasets: TabularDataset.objects.create( @@ -41,8 +45,8 @@ def handle(self, *args, **options): ) datasets.append( ( - row["Indicator"], - row["Cluster"], + row["Indicator"].strip(), + row["Cluster"].strip(), type, ) ) diff --git a/vbos/datasets/management/commands/import_tabular_data.py b/vbos/datasets/management/commands/import_tabular_data.py index 9397956..57b7baf 100644 --- a/vbos/datasets/management/commands/import_tabular_data.py +++ b/vbos/datasets/management/commands/import_tabular_data.py @@ -27,7 +27,7 @@ def handle(self, *args, **options): created_count += 1 except Exception as e: - print(e) + print(f"{e}: {row}") error_count += 1 self.stdout.write(f"{created_count} tabular items created from {filename}.") diff --git a/vbos/datasets/test/test_management_commands.py b/vbos/datasets/test/test_management_commands.py index 1b487da..43033ab 100644 --- a/vbos/datasets/test/test_management_commands.py +++ b/vbos/datasets/test/test_management_commands.py @@ -44,6 +44,16 @@ def test_import(self): "55 tabular items created", self.out.getvalue(), ) + self.assertEqual( + len( + [ + i[1] + for i in TabularItem.objects.first().metadata.items() + if i[1] == "" + ] + ), + 0, + ) # Clean redundant entries call_command("clean_tabular_data", stdout=self.out) diff --git a/vbos/datasets/test/test_tabular_views.py b/vbos/datasets/test/test_tabular_views.py index 087ee37..a0785d0 100644 --- a/vbos/datasets/test/test_tabular_views.py +++ b/vbos/datasets/test/test_tabular_views.py @@ -1,10 +1,11 @@ from datetime import date + +from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase -from django.urls import reverse -from ..models import AreaCouncil, Cluster, Province, TabularDataset, TabularItem from ...users.test.factories import UserFactory +from ..models import AreaCouncil, Cluster, Province, TabularDataset, TabularItem class TestTabularDatasetListDetailViews(APITestCase): @@ -79,7 +80,7 @@ def setUp(self): date=date(2025, 1, 1), province=Province.objects.get(name="TORBA"), attribute="Population", - value=13874, + value=float("13874"), ) TabularItem.objects.create( dataset=self.dataset_1, diff --git a/vbos/datasets/utils.py b/vbos/datasets/utils.py index 0aa11cb..effbb8f 100644 --- a/vbos/datasets/utils.py +++ b/vbos/datasets/utils.py @@ -52,6 +52,7 @@ def __init__(self, row: Dict): except ValueError: self.date = None self.remove_keys() + self.clean_metadata() def get_property(self, keys: List[str]) -> str: value = "" @@ -66,6 +67,7 @@ def get_property(self, keys: List[str]) -> str: def remove_keys(self): keys = [ "Unit", + "National", "Source", "Year Collected", "Frequency Collection", @@ -119,9 +121,9 @@ def group_by_dataset(data): def get_dataset(row): return TabularDataset.objects.get( - name=row["Indicator"], - cluster=Cluster.objects.get_or_create(name=row["Cluster"])[0], - type=REVERSE_TYPE_MAPPING[row["Type"]], + name=row["Indicator"].strip(), + cluster=Cluster.objects.get_or_create(name=row["Cluster"].strip())[0], + type=REVERSE_TYPE_MAPPING[row["Type"]] if row["Type"] else "baseline", ) @@ -130,7 +132,7 @@ def create_tabular_item(csv_row: CSVRow, dataset: TabularDataset): dataset=dataset, metadata=csv_row.metadata, attribute=csv_row.attribute.strip(), - value=csv_row.value, + value=float(csv_row.value.replace(",", "")), date=csv_row.date, province=Province.objects.filter(name__iexact=csv_row.province).first(), area_council=AreaCouncil.objects.filter(