Skip to content

Commit b87e4e6

Browse files
committed
Adding intake asset plugin.
1 parent 33192c5 commit b87e4e6

File tree

4 files changed

+24
-14
lines changed

4 files changed

+24
-14
lines changed

extraction_methods/plugins/intake_esm_assets.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
# Python imports
5252
import logging
5353
from datetime import datetime
54+
from pathlib import Path
5455

5556
# Thirdparty imports
5657
import intake
@@ -87,15 +88,17 @@ def open_catalog(self):
8788
return catalog
8889

8990
def update_search_kwargs(self, body:dict):
90-
for search_kwarg_key, search_kwarg_value in search_kwargs.items():
91-
if self.search_kwarg_value[0] == self.exists_key:
92-
self.search_kwargs[search_kwarg_key] = body[self.search_kwarg_value[1:]]
91+
for search_kwarg_key, search_kwarg_value in self.search_kwargs.items():
92+
if search_kwarg_value[0] == self.exists_key:
93+
self.search_kwargs[search_kwarg_key] = body[search_kwarg_value[1:]]
9394

9495
def run(self, body: dict, **kwargs) -> dict:
9596

9697
self.update_search_kwargs(body)
9798
catalog = self.open_catalog()
9899

100+
assets = body.get("assets", {})
101+
99102
for _, row in catalog.df.iterrows():
100103
href = getattr(row, self.object_attr)
101104
asset = {
@@ -106,7 +109,7 @@ def run(self, body: dict, **kwargs) -> dict:
106109
for extraction_method in self.extraction_methods:
107110
asset = extraction_method.run(asset)
108111

109-
assets[Path(path).name] = asset
112+
assets[Path(href).name] = asset
110113

111114
body["assets"] = assets
112115

extraction_methods/plugins/json_file.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414

1515
# Python imports
1616
from collections import defaultdict
17+
import os
1718
import json
1819
import logging
1920
from typing import Optional
2021

2122
from extraction_methods.core.extraction_method import ExtractionMethod
22-
from extraction_methods.core.types import SpatialExtent, TemporalExtent
2323

2424
LOGGER = logging.getLogger(__name__)
2525

@@ -51,27 +51,33 @@ class JsonFileExtract(ExtractionMethod):
5151
"""
5252

5353
def get_facet_values(self) -> list:
54-
output = defaultdict(list)
54+
output = defaultdict(set)
5555

5656
for filepath in os.listdir(self.dirpath):
5757

58-
with open(filepath, "r") as file:
58+
with open(os.path.join(self.dirpath, filepath), "r") as file:
5959
item = json.load(file)
6060

61-
item_properties = item["body"]["properties"]
61+
item_properties = item["properties"]
6262

6363
for facet in self.terms:
6464
if facet in item_properties:
65-
output[facet].extend(item_properties[facet])
65+
if isinstance(item_properties[facet], list):
66+
output[facet].update(item_properties[facet])
67+
else:
68+
output[facet].add(item_properties[facet])
69+
70+
for facet in self.terms:
71+
output[facet] = list(output[facet])
6672

6773
return output
6874

6975
@staticmethod
70-
def get_spatial_extent(item_list: list) -> Optional[SpatialExtent]:
76+
def get_spatial_extent(item_list: list) -> dict:
7177
...
7278

7379
@staticmethod
74-
def get_temporal_extent(item_list: list) -> Optional[TemporalExtent]:
80+
def get_temporal_extent(item_list: list) -> dict:
7581
start_datetime = []
7682
end_datetime = []
7783
datetime = []
@@ -91,7 +97,7 @@ def get_extent(self, file_id: str) -> dict:
9197
file_data = json.load(file)
9298

9399
for item in file_data:
94-
if item["body"]["collection_id"] == file_id:
100+
if item["collection_id"] == file_id:
95101
item_list.append(item)
96102

97103
# spatial_extent = self.get_spatial_extent(item_list)
@@ -100,7 +106,7 @@ def get_extent(self, file_id: str) -> dict:
100106
def run(self, body: dict, **kwargs) -> dict:
101107
output = self.get_facet_values()
102108

103-
if values:
109+
if output:
104110
body |= output
105111

106112
# No need to include extents since the example scanner has none.

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ elasticsearch==7.13.1
1414
fsspec==2021.10.0
1515
gcsfs==2021.10.0
1616
idna==2.10
17+
intake-esm==2024.2.6
1718
imagesize==1.2.0
1819
Jinja2==3.0.1
1920
jmespath==0.10.0

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
"lambda = extraction_methods.plugins.lambda:LambdaExtract",
7272
"netcdf = extraction_methods.plugins.netcdf:NetCDFfExtract",
7373
"asset_aggregator = extraction_methods.plugins.asset_aggregator:AssetAggregatorExtract",
74-
"intake_asset = extraction_methods.plugins.intake_esm_assets:IntakeESMAssetExtract",
74+
"intake_assets = extraction_methods.plugins.intake_esm_assets:IntakeESMAssetExtract",
7575
],
7676
"extraction_methods.header.backends": [
7777
"ncml = extraction_methods.plugins.header.backends.ncml:NcMLBackend",

0 commit comments

Comments
 (0)