Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion common/config/attributes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pathlib import Path
from typing import Annotated, Any

import yaml
import xarray as xr
from pydantic import BaseModel, Field

Expand All @@ -25,6 +25,25 @@ class NcAttributes(BaseModel):
Field(description="Variable-specific attributes", default_factory=dict),
]


additional_attributes_path: Annotated[
str,
Field(
description="Path to the file that defines additional attributes for source"
)
] = None

def add_attributes_from_yaml(self):

if self.additional_attributes_path is not None:
with Path.open(self.additional_attributes_path, 'r') as file:
data = yaml.safe_load(file)
if 'global_attributes' in data:
self.global_attributes = data['global_attributes'] | self.global_attributes
if 'variable_attributes' in data:
self.variables = data['variable_attributes'] | self.variables


def apply_to_dataset(self, ds: xr.Dataset):
"""Apply the configured attributes to an xarray Dataset"""
for var_name, attrs in self.variables.items():
Expand Down
67 changes: 67 additions & 0 deletions common/config/mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,72 @@ class OptionalDepthMappingMixin:
list[DepthGroup] | None,
Field(
description="Depth mappings for variables with multiple depth levels",

),
] = None



class SplitOperator(BaseModel):
''' Takes the source variable, splits it on the separator and maps the resulting array to new variables'''

sep : Annotated [
str,
Field(description="The separator"),
]

output_variables : Annotated[
dict[int,str],
Field(description="Mapping of index number to output variable." ),
]


source_variable : Annotated[
str,
Field(description="The source variable to split into multiple columns"),
]

class SplitOperations(BaseModel):
split_operations : Annotated[
list[SplitOperator],
Field(description="List of variables to split into multiple variables"),
]


class VariableConverterMixIn:
''' Mixin to add column conversion rules to a dataset '''
variable_converter : Annotated [
SplitOperations,
Field(
description="Split variable converter"),
] =None



class ProfileDepthMappings(BaseModel):
depth : Annotated [
float,
Field(
description="Optional- fixed depth for the mapping."
),
] = None

mappings : Annotated [
dict[str,str],
Field(
description="Maps input variables to output variables at the current depth ",
),
]



class OptionalProfileDepthMixin:
''' Mixin to add profile depth mappings configuration to a dataset'''

profile_data: Annotated[
list[ProfileDepthMappings],
Field(
description="Mapping for variables with multiple depth levels",
),
] =None

2 changes: 2 additions & 0 deletions common/readers/pandas_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class PandasCSVReader(BaseModel):
comment: Annotated[str | None, Field(description="CSV line comment character")] = (
None
)
na_values: str = "None"
# delim_whitespace
# skiprows
# sep
Expand All @@ -24,4 +25,5 @@ class PandasCSVReader(BaseModel):
def read_df(self, file_path) -> pd.DataFrame:
"""Read a CSV file from S3 into a Pandas DataFrame"""
reader_kwargs = self.model_dump()

return pd.read_csv(file_path, **reader_kwargs)
88 changes: 88 additions & 0 deletions pipeline/s3_timeseries/datasets_config/cvow.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"reader": {
"sep": ",",
"comment": "#",
"na_values": "No data"
},
"station": "CVOW Wave Rider 1",
"drop_vars": [
"CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV DATAWELL DIRECTIONAL SPECTRUM () (last)",
"CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV PEAK SPECTRAL DENSITY POWER (m2/Hz) (nodatamode_repeat)",
"CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WATER CURRENT VERTICAL MEAN SPEED (m/s) (nodatamode_repeat)",
"CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WAVE M01 PERIOD (s) (nodatamode_repeat)",
"CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WAVE M13 PERIOD (s) (nodatamode_repeat)",
"CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WAVE M24 PERIOD (s) (nodatamode_repeat)",
"CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WAVE MM10 PERIOD (s) (nodatamode_repeat)",
"CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WAVE MM20 PERIOD (s) (nodatamode_repeat)",
"CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WAVE PEAK PERIOD (s) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV LOCATION () (last)",
"CVOW WRB01_DWR4_HQ PRV PEAK SPECTRAL DENSITY POWER (m2/Hz) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV WATER CURRENT MEAN DIRECTION (Deg) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV WATER CURRENT MEAN SPEED (m/s) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV WATER TEMPERATURE (Cel) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV WAVE M01 PERIOD (s) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV WAVE M13 PERIOD (s) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV WAVE M24 PERIOD (s) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV WAVE MEAN DIRECTION (Deg) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV WAVE MM10 PERIOD (s) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV WAVE MM20 PERIOD (s) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV WAVE PEAK PERIOD (s) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV WAVE PERIOD (s) (nodatamode_repeat)",
"CVOW WRB01_DWR4_HQ PRV WAVE SIGNIFICANT HEIGHT (m) (nodatamode_repeat)"
],
"s3_source": {
"bucket": "ott-cvow-temp",
"prefix": "/"
},
"attributes": {
"additional_attributes_path": "datasets_config/cvow.yaml"
},
"start_date": "2025-05-02",
"dataset_type": "timeseries",
"file_pattern": {
"day_pattern": "CVOW_{partition_date:%Y-%m-%d}*.csv"
},
"source_time_var": "Time range",
"variable_mappings": [
{
"output": "time",
"source": "Time range"
},
{
"output": "WATER_CURRENT_MEAN_DIRECTION",
"source": "CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WATER CURRENT MEAN DIRECTION (Deg) (nodatamode_repeat)"
},
{
"output": "WATER_CURRENT_MEAN_SPEED",
"source": "CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WATER CURRENT MEAN SPEED (m/s) (nodatamode_repeat)"
},
{
"output": "WATER_TEMPERATURE",
"source": "CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WATER TEMPERATURE (Cel) (nodatamode_repeat)"
},
{
"output": "WAVE_MEAN_DIRECTION",
"source": "CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WAVE MEAN DIRECTION (Deg) (nodatamode_repeat)"
},
{
"output": "WAVE_PERIOD",
"source": "CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WAVE PERIOD (s) (nodatamode_repeat)"
},
{
"output": "WAVE_SIGNIFICANT_HEIGHT",
"source": "CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV WAVE SIGNIFICANT HEIGHT (m) (nodatamode_repeat)"
}
],
"variable_converter": {
"split_operations": [
{
"sep": ":",
"source_variable": "CVOW WRB03_DWR4_OSS PRV_HRD_150 PRV LOCATION () (last)",
"output_variables": {
"0": "latitude",
"2": "longitude"
}
}
]
}
}
84 changes: 84 additions & 0 deletions pipeline/s3_timeseries/datasets_config/cvow.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
global_attributes:
title: CVOW Wave Buoy, Timeseries Data
acknowledgement: This dataset is part of a MetOcean Cyberinfrastructure supported by the Integrated Ocean Observing System (IOOS) to streamline integration of data from offshore operations.
comment: CVOW Wave Rider Buoy deployed by DEME Offshore US
contributor_name: DEME Offshore US
contributor_address: 150 Boush St, Norfolk, Suite 1000, VA 23510
contributor_url: www.deme-group.com
contributor_email: de.beuf.hendrik at deme-group.com
creator_country: USA
creator_name: Coastal Virginia Offshore Wind
creator_address: 500 Orapax Street, Norfolk, VA 23507
creator_state: Virginia
creator_URL: https://coastalvawind.com/
creator_email: cvowops@dominionenergy.com
institution: Virginia Electric and Power Company, d/b/a Dominion Energy Virginia
creator_sector: industry
geospatial_lat_max: 36.93842989
geospatial_lat_min: 36.93842989
geospatial_lon_max: -75.4423258
geospatial_lon_min: -75.4423258
id: CVOW_WR1
infoUrl: https://coastalvawind.com/
license: The data may be used and redistributed for free but is not intended for legal use, since it may contain inaccuracies. Neither the data Contributor, Dominion, NOAA, nor the United States Government, nor any of their employees or contractors, makes any warranty, express or implied, including warranties of merchantability and fitness for a particular purpose, or assumes any legal liability for the accuracy, completeness, or usefulness, of this information.
platform_name: CVOW_WR1
processing_level: raw dataset, no qc provided
project: Coastal Virginia Offshore Wind
publisher_country: USA
publisher_email: devops at rpsgroup.com, info at neracoos.org
publisher_institution: MARACOOS, NERACOOS
publisher_name: MARACOOS, NERACOOS
publisher_phone: (401) 789-6224, (603) 319-1785
publisher_type: institution
publisher_url: https://www.maracoos.org, https://www.neracoos.org
summary: This dataset contains observations to support further understanding of metocean conditions off the coast of Virginia. Data is collected from a wave rider buoy and the dataset includes measurements of wave peak height, wave significant height, wave peak direction, and wave significant period.
Instrument: Datawell DWR4 (https://datawell.nl/products/directional-waverider-4/)
cdm_data_type: TimeSeries
featureType: timeSeries
cdm_timeseries_variables: 'station,longitude,latitude'
variable_attributes:
time:
ioos_category: Time
long_name: Time
short_name: time
standard_name: time
station:
ioos_category: Identifier
long_name: Station CVOW Wave Buoy
cf_role: timeseries_id
WATER_CURRENT_MEAN_DIRECTION:
standard_name: sea_water_velocity_to_direction
long_name: Current direction at water surface
ioos_category: currents
units: degree
WATER_CURRENT_MEAN_SPEED:
standard_name: sea_water_speed
long_name: Current speed
ioos_category: currents
units: m s-1
WATER_TEMPERATURE:
ioos_category: Water Property
standard_name: sea_water_temperature
long_name: Sea Water Temperature
units: degree_Celsius
WAVE_MEAN_DIRECTION:
long_name: mean wave direction
standard_name: sea_surface_wave_mean_from_direction
units: degrees
ioos_category: Surface Waves
WAVE_PEAK_PERIOD:
long_name: peak period
standard_name: sea_surface_wave_period_at_variance_spectral_density_maximum
units: seconds
ioos_category: Surface Waves
WAVE_PERIOD:
long_name: mean wave period
standard_name: sea_surface_wave_mean_period
units: seconds
ioos_category: Surface Waves
WAVE_SIGNIFICANT_HEIGHT:
long_name: significant wave height
standard_name: sea_surface_wave_significant_height
units: m
ioos_category: Surface Waves

Loading
Loading