Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 100 additions & 13 deletions speasy/core/codecs/bundled_codecs/hapi_csv/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from speasy.core.codecs import CodecInterface, register_codec
from speasy.core.codecs.codec_interface import Buffer
from speasy.core.cache import CacheCall
from speasy.core.data_containers import VariableAxis
from speasy.products import SpeasyVariable, VariableTimeAxis, DataContainer
from speasy.products.variable import same_time_axis
from .csv_file import HapiCsvFile, HapiCsvParameter
Expand All @@ -17,19 +18,28 @@
from .writer import save_hapi_csv


def _time_dependent_axis_name(ax: VariableAxis) -> str:
return f"{ax.name}_centers_time_varying"

def _get_variable_axes(variable: SpeasyVariable, is_time_dependent: bool) -> List[VariableAxis]:
return [ax for ax in variable.axes[1:] if ax.is_time_dependent == is_time_dependent]

def _numpy_dtype_to_hapi_type(dtype: np.dtype) -> str:
if np.issubdtype(dtype, np.integer):
return "int"
elif np.issubdtype(dtype, np.floating):
return "double"
else:
raise ValueError(f"Unsupported data type {dtype}")

def _create_meta(variable:SpeasyVariable) -> Dict[str, Any]:
meta = {
"name": variable.name,
"units": variable.unit,
"fill": variable.fill_value,
"description": variable.meta.get("description", "")
}
if np.issubdtype(variable.values.dtype, np.integer):
meta["type"] = "int"
elif np.issubdtype(variable.values.dtype, np.floating):
meta["type"] = "double"
else:
raise ValueError(f"Unsupported data type {variable.values.dtype}")
meta["type"] = _numpy_dtype_to_hapi_type(variable.values.dtype)

labels = variable.columns
if labels is not None and len(labels) > 0:
Expand All @@ -42,6 +52,25 @@ def _create_meta(variable:SpeasyVariable) -> Dict[str, Any]:

if len(variable.values.shape) > 1:
meta["size"] = variable.values.shape[1:]

bins = []
time_independent_axes = _get_variable_axes(variable, is_time_dependent=False)
if time_independent_axes:
bins.extend([
{"name": ax.name, "units": ax.unit, "centers": ax.values.tolist()}
for ax in time_independent_axes
])

time_dependent_axes = _get_variable_axes(variable, is_time_dependent=True)
if time_dependent_axes:
bins.extend([
{"name": ax.name, "units": ax.unit, "centers": _time_dependent_axis_name(ax)}
for ax in time_dependent_axes
])

if bins:
meta["bins"] = bins

return meta


Expand All @@ -51,27 +80,83 @@ def _decode_meta(meta: Dict[str, Any]) -> Dict[str, Any]:
return meta


def _bin_to_axis(json_bin: Dict[str, Any], hap_csv_file: HapiCsvFile) -> VariableAxis:
centers = json_bin.get("centers")
name = json_bin.get("name", "bin_axis")
if centers is None:
raise ValueError("Invalid bin specification: missing 'centers' field")
if isinstance(centers, str):
hapi_parameter = hap_csv_file.get_parameter(centers)
Comment on lines +83 to +89
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parameter name hap_csv_file looks like a typo and is easy to confuse with hapi_csv_file used elsewhere. Renaming it to hapi_csv_file would improve readability and reduce the chance of mistakes when editing these helpers.

Suggested change
def _bin_to_axis(json_bin: Dict[str, Any], hap_csv_file: HapiCsvFile) -> VariableAxis:
centers = json_bin.get("centers")
name = json_bin.get("name", "bin_axis")
if centers is None:
raise ValueError("Invalid bin specification: missing 'centers' field")
if isinstance(centers, str):
hapi_parameter = hap_csv_file.get_parameter(centers)
def _bin_to_axis(json_bin: Dict[str, Any], hapi_csv_file: HapiCsvFile) -> VariableAxis:
centers = json_bin.get("centers")
name = json_bin.get("name", "bin_axis")
if centers is None:
raise ValueError("Invalid bin specification: missing 'centers' field")
if isinstance(centers, str):
hapi_parameter = hapi_csv_file.get_parameter(centers)

Copilot uses AI. Check for mistakes.
_meta = _decode_meta(hapi_parameter.meta)
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_decode_meta mutates the provided dict in-place (it pops units). Passing hapi_parameter.meta directly here will permanently remove units from the parameter metadata, which can affect later logic that expects the original meta. Pass a shallow copy (e.g., dict(hapi_parameter.meta)) into _decode_meta to avoid side effects.

Suggested change
_meta = _decode_meta(hapi_parameter.meta)
_meta = _decode_meta(dict(hapi_parameter.meta))

Copilot uses AI. Check for mistakes.
variable_axis = VariableAxis(values=hapi_parameter.values,
meta=_meta,
is_time_dependent=True,
name=name)
elif isinstance(centers, list):
try:
axis_values = np.array(centers, dtype=float)
except ValueError:
raise ValueError("Invalid bin specification: 'centers' list must contain numeric values")
variable_axis = VariableAxis(values=axis_values,
meta={"name": "centers", "UNITS": json_bin.get("units", None)},
is_time_dependent=False,
name=name)
else:
raise ValueError("Invalid bin specification: 'centers' must be either a string or a list")
return variable_axis


def _bins_to_axes(json_bins: List[Dict[str, Any]], hap_csv_file: HapiCsvFile) -> List[VariableAxis]:
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parameter name hap_csv_file looks like a typo and is easy to confuse with hapi_csv_file used elsewhere. Renaming it to hapi_csv_file would improve readability and reduce the chance of mistakes when editing these helpers.

Copilot uses AI. Check for mistakes.
axes = []
for json_bin in json_bins:
try:
axis = _bin_to_axis(json_bin, hap_csv_file)
axes.append(axis)
except ValueError as e:
log.warning(f"Skipping invalid bin specification: {e}")
return axes
Comment on lines +109 to +117
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Silently skipping invalid bin specifications can lead to a loaded variable having fewer axes than its declared size/bins imply, which is hard to diagnose and can produce inconsistent downstream behavior. Prefer failing fast (raise) or include enough context to reliably detect the error (e.g., include the bin dict and variable name), and only skip when the caller explicitly opts into best-effort parsing.

Copilot uses AI. Check for mistakes.


def _hapi_csv_to_speasy_variables(hapi_csv_file: HapiCsvFile, variables: List[AnyStr]) -> Mapping[str, SpeasyVariable]:
time_axis = VariableTimeAxis(values=hapi_csv_file.time_axis, meta=hapi_csv_file.time_axis_meta)
loaded_vars = {}
for var_name in variables:
parameter = hapi_csv_file.get_parameter(var_name)
if parameter is not None:
loaded_vars[var_name] = SpeasyVariable(axes=[time_axis], values=DataContainer(parameter.values,
meta=_decode_meta(
parameter.meta)))
if parameter is None:
continue
_axes = [time_axis]
if 'bins' in parameter.meta.keys():
_axes.extend(_bins_to_axes(parameter.meta.get("bins", []), hapi_csv_file))
loaded_vars[var_name] = SpeasyVariable(axes=_axes, values=DataContainer(parameter.values,
name=parameter.name,
meta=_decode_meta(
parameter.meta)))
return loaded_vars


def _make_hapi_csv_parameter(variable: SpeasyVariable) -> HapiCsvParameter:
return HapiCsvParameter(values=variable.values.values,
return HapiCsvParameter(values=variable.values,
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This changes HapiCsvParameter.values from the underlying numeric array to variable.values (which appears to be a container object in this codebase). Downstream, the writer expects .ndim, .shape, and slicing behavior consistent with a NumPy array. To avoid runtime failures, ensure values is the raw ndarray (or np.asarray(...)) that matches the previous behavior.

Suggested change
return HapiCsvParameter(values=variable.values,
# Ensure we pass the underlying numeric array, not the container object,
# so that downstream code sees NumPy-like `.ndim`, `.shape`, and slicing.
return HapiCsvParameter(values=np.asarray(variable.values),

Copilot uses AI. Check for mistakes.
meta=_create_meta(variable))


def _make_hapi_csv_time_axis(time_axis: VariableTimeAxis) -> HapiCsvParameter:
return HapiCsvParameter(values=time_axis.values,
return HapiCsvParameter(values=time_axis,
meta={"name": "Time", "type": "isotime", "units": "UTC", "length": 30, "fill": None})

def _get_hapi_csv_varying_axes(variable: SpeasyVariable) -> List[HapiCsvParameter]:
result = []
for ax in _get_variable_axes(variable, is_time_dependent=True):
# VariableTimeAxis has member 'unit' not 'units'
# but HAPICSVParameter expects 'units' in meta
meta = {
"name": _time_dependent_axis_name(ax),
"units": ax.unit,
"size": [ax.values.shape[1]]
}
meta["type"] = _numpy_dtype_to_hapi_type(ax.values.dtype)
result.append(HapiCsvParameter(values=ax.values, meta=meta))
return result


def _speasy_variables_to_hapi_csv(variables: List[SpeasyVariable]) -> HapiCsvFile:
if not same_time_axis(variables):
Expand All @@ -82,6 +167,8 @@ def _speasy_variables_to_hapi_csv(variables: List[SpeasyVariable]) -> HapiCsvFil
hapi_csv_file.add_parameter(_make_hapi_csv_time_axis(variables[0].time))
for var in variables:
hapi_csv_file.add_parameter(_make_hapi_csv_parameter(var))
for hapi_axis_parameter in _get_hapi_csv_varying_axes(var):
hapi_csv_file.add_parameter(hapi_axis_parameter)
return hapi_csv_file


Expand Down Expand Up @@ -110,7 +197,7 @@ def save_variables(self,
**kwargs
) -> Union[bool, Buffer]:
hapi_csv_file = _speasy_variables_to_hapi_csv(variables)
return save_hapi_csv(hapi_csv_file, file)
return save_hapi_csv(hapi_csv_file, file, **kwargs)

@property
def supported_extensions(self) -> List[str]:
Expand Down
33 changes: 21 additions & 12 deletions speasy/core/codecs/bundled_codecs/hapi_csv/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,46 @@


def _to_csv(hapi_csv_file: HapiCsvFile, dest:io.IOBase, with_headers=True) -> bool:
np_start_date = hapi_csv_file.time_axis[0]
np_stop_date = hapi_csv_file.time_axis[-1]
start_date = np_start_date.astype("datetime64[us]").astype("O")
stop_date = np_stop_date.astype("datetime64[us]").astype("O")
if with_headers:
headers = {
"HAPI": "3.2",
"startDate": start_date.isoformat() + "Z",
"stopDate": stop_date.isoformat() + "Z",
"format": "csv",
"status": {
"code": 1200,
"message": "OK request successful"
},
"parameters": [column.meta for column in hapi_csv_file.parameters]
}
dest.write(f'#{json.dumps(headers)}\n')
dest.write(("#" + json.dumps(headers) + "\n").encode("utf-8"))

data = {}
for param in hapi_csv_file.parameters:
data[param.name] = param.values
vals = param.values
if vals.ndim == 1:
data[param.name] = vals
else:
for i in range(vals.shape[1]):
data[f"{param.name}_{i}"] = vals[:, i]
Comment on lines +30 to +35
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Multi-dimensional values are only expanded along shape[1] and sliced with vals[:, i]. For ndim > 2, vals[:, i] remains multi-dimensional and will produce object-like columns or inconsistent CSV output. Consider reshaping values to 2D (time, -1) and generating column names for all flattened components (or iterating across all non-time indices) so ndData variables with >2 dimensions serialize predictably.

Copilot uses AI. Check for mistakes.

df = pds.DataFrame(data)
df.to_csv(dest, index=False, header=False, date_format='%Y-%m-%dT%H:%M:%S.%fZ', float_format='%.3g')
df.to_csv(dest, index=False, header=False, date_format='%Y-%m-%dT%H:%M:%S.%fZ', float_format='%.7g')
return True





def save_hapi_csv(hapi_csv_file: HapiCsvFile, file: Optional[Union[str, io.IOBase]] = None) -> Union[bool, Buffer]:
def save_hapi_csv(hapi_csv_file: HapiCsvFile, file: Optional[Union[str, io.IOBase]] = None, with_headers: bool = True) -> Union[bool, Buffer]:
if type(file) is str:
with open(file, 'wb') as f:
return _to_csv(hapi_csv_file, f)
return _to_csv(hapi_csv_file, f, with_headers=with_headers)
elif hasattr(file, 'write'):
return _to_csv(hapi_csv_file, file)
return _to_csv(hapi_csv_file, file, with_headers=with_headers)
elif file is None:
buff = io.BytesIO()
_to_csv(hapi_csv_file, buff)
_to_csv(hapi_csv_file, buff, with_headers=with_headers)
return buff.getvalue()
raise ValueError("Invalid file type")


85 changes: 85 additions & 0 deletions tests/resources/HAPI_ndData_TimeIndependent_Axis.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#{
# "startDate": "1997-08-30T17:01:00Z",
# "stopDate": "1997-08-30T18:40:00Z",
# "cadence": "PT5M",
# "description": "ACE Electron Proton Alpha Monitor (EPAM) 5-min Level 2 Data<br/> Sampling: 5M<br/> Provider: CDAWeb",
# "resourceID": "spase://CNES/NumericalData/CDPP-AMDA/ACE/EPAM/ace-epam-de",
# "parameters": [
# {
# "name": "Time",
# "type": "isotime",
# "length": 24,
# "units": "UTC",
# "fill": null
# },
# {
# "name": "ace_epam_de_e",
# "type": "double",
# "size": [
# 4
# ],
# "bins": [
# {
# "name": "Energy",
# "units": "MeV",
# "centers": [
# 0.0455,
# 0.078,
# 0.139,
# 0.245
# ],
# "ranges": [
# [
# 0.038,
# 0.053
# ],
# [
# 0.053,
# 0.103
# ],
# [
# 0.103,
# 0.175
# ],
# [
# 0.175,
# 0.315
# ]
# ]
# }
# ],
# "units": "1/(cm² s sr MeV)",
# "fill": "-1e31",
# "description": "DE MeV Electron Flux"
# }
# ],
# "HAPI": "2.0",
# "status": {
# "code": 1200,
# "message": "OK",
# "x_request": "http://localhost:8999/amda/hapi/data?id=ace-epam-de&parameters=ace_epam_de_e&time.min=1997-08-30T17:01:00Z&time.max=1997-09-03T17:01:00.000Z&include=header",
# "x_startDateRequested": "1997-08-30T17:01:00Z",
# "x_stopDateRequested": "1997-09-03T17:01:00.000Z",
# "x_parentDataset": "ace-epam-de"
# },
# "format": "csv"
#}
1997-08-30T17:06:00.000Z,6576.2,639.43,125.4,55.765
1997-08-30T17:12:00.000Z,6114.3,569.29,117.36,47.878
1997-08-30T17:17:00.000Z,6747.6,655.14,125.1,57.296
1997-08-30T17:22:00.000Z,6819,657.43,122.12,52.653
1997-08-30T17:27:00.000Z,6423.8,598,122.12,48.679
1997-08-30T17:33:00.000Z,5733.3,511.86,105.56,46.888
1997-08-30T17:38:00.000Z,6419,632.14,128.17,60.204
1997-08-30T17:43:00.000Z,6342.9,632.86,122.52,51.888
1997-08-30T17:48:00.000Z,6600,652.86,119.64,44.673
1997-08-30T17:53:00.000Z,5795.2,571.86,93.929,46.357
1997-08-30T17:59:00.000Z,6557.1,641.86,128.47,50.724
1997-08-30T18:04:00.000Z,6566.7,642.14,130.26,59.592
1997-08-30T18:09:00.000Z,6581,646.43,122.52,55
1997-08-30T18:14:00.000Z,816.19,159.43,67.46,32.138
1997-08-30T18:19:00.000Z,163.9,113.14,67.46,30.52
1997-08-30T18:25:00.000Z,184,106.67,56.984,24.423
1997-08-30T18:30:00.000Z,171.57,111.64,57.847,24.776
1997-08-30T18:35:00.000Z,265,124.14,58.026,28.48
1997-08-30T18:40:00.000Z,197.24,118.36,69.415,30.367
69 changes: 69 additions & 0 deletions tests/resources/HAPI_ndData_TimeVarying_Axis.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#{
# "startDate": "1970-01-01Z",
# "stopDate": "2016-12-31Z",
# "sampleStartDate": "1970-01-01Z",
# "sampleStopDate": "1970-01-01T00:01:11Z",
# "cadence": "PT1S",
# "unitsSchema": "udunits2",
# "parameters": [
# {
# "name": "Time",
# "type": "isotime",
# "units": "UTC",
# "fill": null,
# "length": 24
# },
# {
# "name": "spectra_time_dependent_bins",
# "type": "double",
# "units": "m",
# "fill": "-1e31",
# "size": [
# 10
# ],
# "description": "A 10-element spectra with time-dependent bins",
# "bins": [
# {
# "name": "frequency",
# "units": "Hz",
# "centers": "frequency_centers_time_varying",
# "ranges": "frequency_ranges_time_varying"
# }
# ]
# },
# {
# "name": "frequency_centers_time_varying",
# "type": "double",
# "units": "Hz",
# "fill": "-1e31",
# "size": [
# 10
# ],
# "description": "Bin centers for spectra frequencies"
# },
# {
# "name": "frequency_ranges_time_varying",
# "type": "double",
# "units": "Hz",
# "fill": "-1e31",
# "size": [
# 10,
# 2
# ],
# "description": "Bin ranges for spectra frequencies"
# }
# ],
# "HAPI": "3.0",
# "status": {
# "code": 1200,
# "message": "OK",
# "x_request": "http://localhost:8999/...",
# "x_startDateRequested": "1970-01-01Z",
# "x_stopDateRequested": "1970-01-01T00:01:11Z",
# "x_parentDataset": "dataset1"
# },
# "format": "csv"
#}

1970-01-01T00:00:00.000Z,0,1,0.5,0.3333333333333333,0.25,0.2,0.16666666666666666,0.14285714285714285,0.125,0.1111111111111111,11,13,15,17,19,21,23,25,27,29,10,12,12,14,14,16,16,18,18,20,20,22,22,24,24,26,26,28,28,30
1970-01-01T00:01:08.000Z,0,1,0.5,0.3333333333333333,0.25,0.2,0.16666666666666666,0.14285714285714285,0.125,0.1111111111111111,1,3,5,7,9,11,13,15,17,19
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The header declares three parameters after Time: spectra_time_dependent_bins (10 values), frequency_centers_time_varying (10 values), and frequency_ranges_time_varying (10x2 = 20 values). Row 68 includes the additional 20 range values, but row 69 does not, so the CSV row length is inconsistent with the declared schema. Update row 69 to include the missing 20 values (or remove/adjust the declared ranges parameter) so the fixture can be parsed reliably.

Suggested change
1970-01-01T00:01:08.000Z,0,1,0.5,0.3333333333333333,0.25,0.2,0.16666666666666666,0.14285714285714285,0.125,0.1111111111111111,1,3,5,7,9,11,13,15,17,19
1970-01-01T00:01:08.000Z,0,1,0.5,0.3333333333333333,0.25,0.2,0.16666666666666666,0.14285714285714285,0.125,0.1111111111111111,1,3,5,7,9,11,13,15,17,19,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14,16,16,18,18,20

Copilot uses AI. Check for mistakes.
Loading
Loading