Skip to content

Commit a52b8fe

Browse files
Merge pull request #42 from nsidc/21-support-glah06-v34
Add code to support GLAH06 v034
2 parents 03afac1 + 14a3d24 commit a52b8fe

File tree

4 files changed

+383
-2
lines changed

4 files changed

+383
-2
lines changed

src/nsidc/iceflow/data/glah06.py

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
from __future__ import annotations
2+
3+
import datetime as dt
4+
from pathlib import Path
5+
from typing import cast
6+
7+
import h5py
8+
import numpy as np
9+
import numpy.typing as npt
10+
import pandas as pd
11+
12+
from nsidc.iceflow.data.models import GLAH06DataFrame
13+
14+
# Note: the ITRF is given by the NSIDC landing page:
15+
# https://nsidc.org/data/glah06/versions/34#anchor-data-access-tools
16+
# The ITRF does not appear to be present in the source data or the user guide.
17+
GLAH06_ITRF = "ITRF2008"
18+
VERSION = "034"
19+
# All scalar variables with single dimension 'DS_UTCTime_40'
20+
VARIABLES = [
21+
("DS_UTCTime_40", "/Data_40HZ/DS_UTCTime_40"),
22+
("i_rec_ndx", "/Data_40HZ/Time/i_rec_ndx"),
23+
("i_shot_count", "/Data_40HZ/Time/i_shot_count"),
24+
("d_lat", "/Data_40HZ/Geolocation/d_lat"),
25+
("d_lon", "/Data_40HZ/Geolocation/d_lon"),
26+
("d_elev", "/Data_40HZ/Elevation_Surfaces/d_elev"),
27+
("d_refRng", "/Data_40HZ/Elevation_Surfaces/d_refRng"),
28+
("d_dTrop", "/Data_40HZ/Elevation_Corrections/d_dTrop"),
29+
("d_satElevCorr", "/Data_40HZ/Elevation_Corrections/d_satElevCorr"),
30+
("d_GmC", "/Data_40HZ/Elevation_Corrections/d_GmC"),
31+
("d_wTrop", "/Data_40HZ/Elevation_Corrections/d_wTrop"),
32+
("d_beamCoelv", "/Data_40HZ/Elevation_Angles/d_beamCoelv"),
33+
("d_beamAzimuth", "/Data_40HZ/Elevation_Angles/d_beamAzimuth"),
34+
("d_SigBegOff", "/Data_40HZ/Elevation_Offsets/d_SigBegOff"),
35+
("d_TrshRngOff", "/Data_40HZ/Elevation_Offsets/d_TrshRngOff"),
36+
("d_SigEndOff", "/Data_40HZ/Elevation_Offsets/d_SigEndOff"),
37+
("d_cntRngOff", "/Data_40HZ/Elevation_Offsets/d_cntRngOff"),
38+
("d_isRngOff", "/Data_40HZ/Elevation_Offsets/d_isRngOff"),
39+
("d_siRngOff", "/Data_40HZ/Elevation_Offsets/d_siRngOff"),
40+
("d_ldRngOff", "/Data_40HZ/Elevation_Offsets/d_ldRngOff"),
41+
("d_ocRngOff", "/Data_40HZ/Elevation_Offsets/d_ocRngOff"),
42+
("rng_uqf_sigbeg1_flg", "/Data_40HZ/Quality/rng_uqf_sigbeg1_flg"),
43+
("rng_uqf_sigend1_flg", "/Data_40HZ/Quality/rng_uqf_sigend1_flg"),
44+
("rng_uqf_thres1_flg", "/Data_40HZ/Quality/rng_uqf_thres1_flg"),
45+
("rng_uqf_cent1_flg", "/Data_40HZ/Quality/rng_uqf_cent1_flg"),
46+
("rng_uqf_sigbeg2_flg", "/Data_40HZ/Quality/rng_uqf_sigbeg2_flg"),
47+
("rng_uqf_sigend2_flg", "/Data_40HZ/Quality/rng_uqf_sigend2_flg"),
48+
("rng_uqf_thres2_flg", "/Data_40HZ/Quality/rng_uqf_thres2_flg"),
49+
("rng_uqf_cent2_flg", "/Data_40HZ/Quality/rng_uqf_cent2_flg"),
50+
("rng_uqf_is_flg", "/Data_40HZ/Quality/rng_uqf_is_flg"),
51+
("rng_uqf_si_flg", "/Data_40HZ/Quality/rng_uqf_si_flg"),
52+
("rng_uqf_ld_flg", "/Data_40HZ/Quality/rng_uqf_ld_flg"),
53+
("rng_uqf_oc_flg", "/Data_40HZ/Quality/rng_uqf_oc_flg"),
54+
("sat_corr_flg", "/Data_40HZ/Quality/sat_corr_flg"),
55+
("elev_use_flg", "/Data_40HZ/Quality/elev_use_flg"),
56+
("att_pad_use_flg", "/Data_40HZ/Quality/att_pad_use_flg"),
57+
("att_calc_pad_flg", "/Data_40HZ/Quality/att_calc_pad_flg"),
58+
("att_lpa_flg", "/Data_40HZ/Quality/att_lpa_flg"),
59+
("sigma_att_flg", "/Data_40HZ/Quality/sigma_att_flg"),
60+
("i_satNdx", "/Data_40HZ/Quality/i_satNdx"),
61+
("d_pctSAT", "/Data_40HZ/Quality/d_pctSAT"),
62+
("elv_cnt_1_flg", "/Data_40HZ/Elevation_Flags/elv_cnt_1_flg"),
63+
("elv_cnt_2_flg", "/Data_40HZ/Elevation_Flags/elv_cnt_2_flg"),
64+
("elv_peak_1_flg", "/Data_40HZ/Elevation_Flags/elv_peak_1_flg"),
65+
("elv_peak_2_flg", "/Data_40HZ/Elevation_Flags/elv_peak_2_flg"),
66+
("elv_thres_flg", "/Data_40HZ/Elevation_Flags/elv_thres_flg"),
67+
("elv_gauss_flg", "/Data_40HZ/Elevation_Flags/elv_gauss_flg"),
68+
("elv_other_flg", "/Data_40HZ/Elevation_Flags/elv_other_flg"),
69+
("elv_cloud_flg", "/Data_40HZ/Elevation_Flags/elv_cloud_flg"),
70+
("d_TxNrg", "/Data_40HZ/Transmit_Energy/d_TxNrg"),
71+
("d_d2refTrk", "/Data_40HZ/Geophysical/d_d2refTrk"),
72+
("d_DEM_elv", "/Data_40HZ/Geophysical/d_DEM_elv"),
73+
("d_ocElv", "/Data_40HZ/Geophysical/d_ocElv"),
74+
("d_poTide", "/Data_40HZ/Geophysical/d_poTide"),
75+
("d_gdHt", "/Data_40HZ/Geophysical/d_gdHt"),
76+
("d_erElv", "/Data_40HZ/Geophysical/d_erElv"),
77+
("d_eqElv", "/Data_40HZ/Geophysical/d_eqElv"),
78+
("d_ldElv", "/Data_40HZ/Geophysical/d_ldElv"),
79+
("d_deltaEllip", "/Data_40HZ/Geophysical/d_deltaEllip"),
80+
("d_ElevBiasCorr", "/Data_40HZ/Geophysical/d_ElevBiasCorr"),
81+
("i_DEM_hires_src_1", "/Data_40HZ/Geophysical/i_DEM_hires_src_1"),
82+
("d_reflctUC", "/Data_40HZ/Reflectivity/d_reflctUC"),
83+
("d_sDevNsOb1", "/Data_40HZ/Reflectivity/d_sDevNsOb1"),
84+
("d_satNrgCorr", "/Data_40HZ/Reflectivity/d_satNrgCorr"),
85+
("d_RecNrgAll", "/Data_40HZ/Reflectivity/d_RecNrgAll"),
86+
("d_skew2", "/Data_40HZ/Waveform/d_skew2"),
87+
("d_kurt2", "/Data_40HZ/Waveform/d_kurt2"),
88+
("d_maxRecAmp", "/Data_40HZ/Waveform/d_maxRecAmp"),
89+
("d_maxSmAmp", "/Data_40HZ/Waveform/d_maxSmAmp"),
90+
("i_nPeaks1", "/Data_40HZ/Waveform/i_nPeaks1"),
91+
("i_numPk", "/Data_40HZ/Waveform/i_numPk"),
92+
("i_gval_rcv", "/Data_40HZ/Waveform/i_gval_rcv"),
93+
("d_FRir_cldtop", "/Data_40HZ/Atmosphere/d_FRir_cldtop"),
94+
("FRir_qa_flg", "/Data_40HZ/Atmosphere/FRir_qa_flg"),
95+
("d_FRir_intsig", "/Data_40HZ/Atmosphere/d_FRir_intsig"),
96+
]
97+
98+
TIMESTAMP_COLUMN = "utc_datetime"
99+
100+
DATA_COLUMNS = [
101+
("i_rec_ndx", b"%d"),
102+
("i_shot_count", b"%d"),
103+
("d_lat", b"%f"),
104+
("d_lon", b"%f"),
105+
("d_elev", b"%f"),
106+
("d_refRng", b"%f"),
107+
("d_dTrop", b"%f"),
108+
("d_satElevCorr", b"%f"),
109+
("d_GmC", b"%f"),
110+
("d_wTrop", b"%f"),
111+
("d_beamCoelv", b"%f"),
112+
("d_beamAzimuth", b"%f"),
113+
("d_SigBegOff", b"%f"),
114+
("d_TrshRngOff", b"%f"),
115+
("d_SigEndOff", b"%f"),
116+
("d_cntRngOff", b"%f"),
117+
("d_isRngOff", b"%f"),
118+
("d_siRngOff", b"%f"),
119+
("d_ldRngOff", b"%f"),
120+
("d_ocRngOff", b"%f"),
121+
("rng_uqf_sigbeg1_flg", b"%d"),
122+
("rng_uqf_sigend1_flg", b"%d"),
123+
("rng_uqf_thres1_flg", b"%d"),
124+
("rng_uqf_cent1_flg", b"%d"),
125+
("rng_uqf_sigbeg2_flg", b"%d"),
126+
("rng_uqf_sigend2_flg", b"%d"),
127+
("rng_uqf_thres2_flg", b"%d"),
128+
("rng_uqf_cent2_flg", b"%d"),
129+
("rng_uqf_is_flg", b"%d"),
130+
("rng_uqf_si_flg", b"%d"),
131+
("rng_uqf_ld_flg", b"%d"),
132+
("rng_uqf_oc_flg", b"%d"),
133+
("sat_corr_flg", b"%d"),
134+
("elev_use_flg", b"%d"),
135+
("att_pad_use_flg", b"%d"),
136+
("att_calc_pad_flg", b"%d"),
137+
("att_lpa_flg", b"%d"),
138+
("sigma_att_flg", b"%d"),
139+
("i_satNdx", b"%d"),
140+
("d_pctSAT", b"%f"),
141+
("elv_cnt_1_flg", b"%d"),
142+
("elv_cnt_2_flg", b"%d"),
143+
("elv_peak_1_flg", b"%d"),
144+
("elv_peak_2_flg", b"%d"),
145+
("elv_thres_flg", b"%d"),
146+
("elv_gauss_flg", b"%d"),
147+
("elv_other_flg", b"%d"),
148+
("elv_cloud_flg", b"%d"),
149+
("d_TxNrg", b"%f"),
150+
("d_d2refTrk", b"%f"),
151+
("d_DEM_elv", b"%f"),
152+
("d_ocElv", b"%f"),
153+
("d_poTide", b"%f"),
154+
("d_gdHt", b"%f"),
155+
("d_erElv", b"%f"),
156+
("d_eqElv", b"%f"),
157+
("d_ldElv", b"%f"),
158+
("d_deltaEllip", b"%f"),
159+
("d_ElevBiasCorr", b"%f"),
160+
("i_DEM_hires_src_1", b"%d"),
161+
("d_reflctUC", b"%f"),
162+
("d_sDevNsOb1", b"%f"),
163+
("d_satNrgCorr", b"%f"),
164+
("d_RecNrgAll", b"%f"),
165+
("d_skew2", b"%f"),
166+
("d_kurt2", b"%f"),
167+
("d_maxRecAmp", b"%f"),
168+
("d_maxSmAmp", b"%f"),
169+
("i_nPeaks1", b"%d"),
170+
("i_numPk", b"%d"),
171+
("i_gval_rcv", b"%d"),
172+
("d_FRir_cldtop", b"%f"),
173+
("FRir_qa_flg", b"%d"),
174+
("d_FRir_intsig", b"%f"),
175+
]
176+
177+
178+
def _utc_datetime(seconds):
179+
"""Return 'utc_datetime' Series with values calculated from the shot data.
180+
181+
The transmit time of each shot in the 1 second frame is measured as UTC seconds elapsed
182+
since Jan 1 2000 12:00:00 UTC. This time has been derived from the GPS time accounting
183+
for leap seconds.
184+
"""
185+
epoc = dt.datetime(2000, 1, 1, 12, 0, 0)
186+
utc = seconds.apply(lambda s: epoc + dt.timedelta(seconds=s))
187+
188+
return utc
189+
190+
191+
def _mask_invalid(var) -> npt.NDArray[np.bool_]:
192+
assert len(var.shape) == 1, "Expected only 1 dimensional data"
193+
values = var[:]
194+
195+
invalid: npt.NDArray[np.bool_] = np.full(var.shape, False)
196+
197+
# Mask no data values
198+
if "_FillValue" in var.attrs:
199+
invalid |= values == var.attrs["_FillValue"][0]
200+
201+
# Mask out-of-range values
202+
if "valid_min" in var.attrs and "valid_max" in var.attrs:
203+
valid_min = var.attrs["valid_min"][0]
204+
valid_max = var.attrs["valid_max"][0]
205+
invalid |= (values < valid_min) | (values > valid_max)
206+
207+
return invalid
208+
209+
210+
def _glah06_dataframe(filepath):
211+
"""Returns a GLAH06 DataFrame read from an HDF5 file."""
212+
df = pd.DataFrame()
213+
with h5py.File(filepath, "r") as glah06:
214+
invalid_geo = None
215+
for name, path in VARIABLES:
216+
var = glah06[path]
217+
df[name] = var[:]
218+
219+
# Mask row completely if geolocation is missing
220+
if name in ("d_lat", "d_lon", "d_elev"):
221+
if invalid_geo is None:
222+
invalid_geo = np.full(var.shape, False)
223+
224+
invalid_geo |= _mask_invalid(var)
225+
226+
invalid_geo = cast(npt.NDArray[np.bool_], invalid_geo)
227+
df = df[~invalid_geo]
228+
229+
return df
230+
231+
232+
def _glah06_data(filepath):
233+
df = _glah06_dataframe(filepath)
234+
235+
df["utc_datetime"] = _utc_datetime(df["DS_UTCTime_40"]) if not df.empty else None
236+
237+
df = df.drop(columns="DS_UTCTime_40")
238+
239+
return df
240+
241+
242+
def glah06_data(filepath: Path) -> GLAH06DataFrame:
243+
"""Return an GLAH06 file DataFrame, performing all necessary
244+
conversions / augmentation on the data.
245+
"""
246+
df = _glah06_data(filepath)
247+
248+
# Add the ITRF
249+
df["ITRF"] = GLAH06_ITRF
250+
# To be consistent with the other `iceflow` datasets, copy the primary
251+
# lat/lon/elev fields to the standard "latitude", "longitude", "elevation"
252+
# field names.
253+
df["latitude"] = df["d_lat"]
254+
df["longitude"] = df["d_lon"]
255+
df["elevation"] = df["d_elev"]
256+
257+
# We index the data by utc datetime.
258+
df = df.set_index("utc_datetime")
259+
260+
return GLAH06DataFrame(df)

src/nsidc/iceflow/data/models.py

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,93 @@ class Config:
100100
add_missing_columns = True
101101

102102

103+
class GLAH06Schema(CommonDataColumnsSchema):
104+
# Note: all of these variables are extracted from the "Data_40HZ" group. We
105+
# may want to support accessing data from the "Data_1HZ" group in the
106+
# future.
107+
i_rec_ndx: Series[int] = pa.Field(coerce=True)
108+
i_shot_count: Series[int] = pa.Field(coerce=True)
109+
d_lat: Series[float]
110+
d_lon: Series[float]
111+
d_elev: Series[float]
112+
d_refRng: Series[float]
113+
d_dTrop: Series[float]
114+
d_satElevCorr: Series[float]
115+
d_GmC: Series[float]
116+
d_wTrop: Series[float]
117+
d_beamCoelv: Series[float]
118+
d_beamAzimuth: Series[float]
119+
d_SigBegOff: Series[float]
120+
d_TrshRngOff: Series[float]
121+
d_SigEndOff: Series[float]
122+
d_cntRngOff: Series[float]
123+
d_isRngOff: Series[float]
124+
d_siRngOff: Series[float]
125+
d_ldRngOff: Series[float]
126+
d_ocRngOff: Series[float]
127+
rng_uqf_sigbeg1_flg: Series[int] = pa.Field(coerce=True)
128+
rng_uqf_sigend1_flg: Series[int] = pa.Field(coerce=True)
129+
rng_uqf_thres1_flg: Series[int] = pa.Field(coerce=True)
130+
rng_uqf_cent1_flg: Series[int] = pa.Field(coerce=True)
131+
rng_uqf_sigbeg2_flg: Series[int] = pa.Field(coerce=True)
132+
rng_uqf_sigend2_flg: Series[int] = pa.Field(coerce=True)
133+
rng_uqf_thres2_flg: Series[int] = pa.Field(coerce=True)
134+
rng_uqf_cent2_flg: Series[int] = pa.Field(coerce=True)
135+
rng_uqf_is_flg: Series[int] = pa.Field(coerce=True)
136+
rng_uqf_si_flg: Series[int] = pa.Field(coerce=True)
137+
rng_uqf_ld_flg: Series[int] = pa.Field(coerce=True)
138+
rng_uqf_oc_flg: Series[int] = pa.Field(coerce=True)
139+
sat_corr_flg: Series[int] = pa.Field(coerce=True)
140+
elev_use_flg: Series[int] = pa.Field(coerce=True)
141+
att_pad_use_flg: Series[int] = pa.Field(coerce=True)
142+
att_calc_pad_flg: Series[int] = pa.Field(coerce=True)
143+
att_lpa_flg: Series[int] = pa.Field(coerce=True)
144+
sigma_att_flg: Series[int] = pa.Field(coerce=True)
145+
i_satNdx: Series[int] = pa.Field(coerce=True)
146+
d_pctSAT: Series[float]
147+
elv_cnt_1_flg: Series[int] = pa.Field(coerce=True)
148+
elv_cnt_2_flg: Series[int] = pa.Field(coerce=True)
149+
elv_peak_1_flg: Series[int] = pa.Field(coerce=True)
150+
elv_peak_2_flg: Series[int] = pa.Field(coerce=True)
151+
elv_thres_flg: Series[int] = pa.Field(coerce=True)
152+
elv_gauss_flg: Series[int] = pa.Field(coerce=True)
153+
elv_other_flg: Series[int] = pa.Field(coerce=True)
154+
elv_cloud_flg: Series[int] = pa.Field(coerce=True)
155+
d_TxNrg: Series[float]
156+
d_d2refTrk: Series[float]
157+
d_DEM_elv: Series[float]
158+
d_ocElv: Series[float]
159+
d_poTide: Series[float]
160+
d_gdHt: Series[float]
161+
d_erElv: Series[float]
162+
d_eqElv: Series[float]
163+
d_ldElv: Series[float]
164+
d_deltaEllip: Series[float]
165+
d_ElevBiasCorr: Series[float]
166+
i_DEM_hires_src_1: Series[int] = pa.Field(coerce=True)
167+
d_reflctUC: Series[float]
168+
d_sDevNsOb1: Series[float]
169+
d_satNrgCorr: Series[float]
170+
d_RecNrgAll: Series[float]
171+
d_skew2: Series[float]
172+
d_kurt2: Series[float]
173+
d_maxRecAmp: Series[float]
174+
d_maxSmAmp: Series[float]
175+
i_nPeaks1: Series[int] = pa.Field(coerce=True)
176+
i_numPk: Series[int] = pa.Field(coerce=True)
177+
i_gval_rcv: Series[int] = pa.Field(coerce=True)
178+
d_FRir_cldtop: Series[float]
179+
FRir_qa_flg: Series[int] = pa.Field(coerce=True)
180+
d_FRir_intsig: Series[float]
181+
182+
103183
IceflowDataFrame = DataFrame[CommonDataColumnsSchema]
104184
ATM1BDataFrame = DataFrame[ATM1BSchema]
105185
ILVIS2DataFrame = DataFrame[ILVIS2Schema]
186+
GLAH06DataFrame = DataFrame[GLAH06Schema]
106187

107188
ATM1BShortName = Literal["ILATM1B", "BLATM1B"]
108-
DatasetShortName = ATM1BShortName | Literal["ILVIS2"]
189+
DatasetShortName = ATM1BShortName | Literal["ILVIS2"] | Literal["GLAH06"]
109190

110191

111192
class Dataset(pydantic.BaseModel):
@@ -133,6 +214,14 @@ class ILVIS2Dataset(Dataset):
133214
version: Literal["1", "2"]
134215

135216

217+
class GLAH06Dataset(Dataset):
218+
short_name: DatasetShortName = "GLAH06"
219+
# Note: some dataset versions are padded with zeros like GLAH06. NSIDC
220+
# documentation refers to "version 34", but CMR only recognizes "034". As a
221+
# rule-of-thumb, ICESat-2, SMAP, and GLAH/GLA datasets have zero padding.
222+
version: Literal["034"] = "034"
223+
224+
136225
class BoundingBox(pydantic.BaseModel):
137226
lower_left_lon: float
138227
lower_left_lat: float

0 commit comments

Comments
 (0)