Skip to content

Commit 8d806c5

Browse files
author
Malcolm White
committed
Add documentation and tests.
1 parent c5b6f31 commit 8d806c5

File tree

3 files changed

+237
-49
lines changed

3 files changed

+237
-49
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
*.egg-info
22
doc/_build/
33
.cache
4+
*.pyc

pyasdf/asdf_data_set.py

Lines changed: 206 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,95 +1234,186 @@ def __parse_waveform_input_and_validate(self, waveform):
12341234
def create_reference(self, ref, starttime, endtime, net=None, sta=None,
12351235
loc=None, chan=None, tag=None, overwrite=False):
12361236
"""
1237-
Create a region reference for fast lookup of segements of
1238-
continuous data.
1237+
Creates a region reference for fast lookup of data segments.
1238+
1239+
:param ref: The reference label to apply.
1240+
:type ref: str
1241+
:param starttime: Start time of reference.
1242+
:type starttime: :class:`obspy.core.utcdatetime.UTCDateTime`
1243+
:param endtime: End time of reference.
1244+
:type endtime: :class:`obspy.core.utcdatetime.UTCDateTime`
1245+
:param net: Networks to create references for.
1246+
:type net: str, tuple
1247+
:param sta: Stations to create references for.
1248+
:type sta: str, tuple
1249+
:param loc: Location codes to create references for.
1250+
:type loc: str, tuple
1251+
:param chan: Channels to create references for.
1252+
:type chan: str, tuple
1253+
:param tag: Tag to create references for.
1254+
:type tag: str, tuple
1255+
:param overwrite: Overwrite existing references for this label.
1256+
:type overwrite: bool
1257+
1258+
This methodology is useful for creating subsets of a dataset
1259+
without duplicating waveforms.
1260+
1261+
.. rubric:: Example
1262+
1263+
Consider an ASDFDataSet populated with continuous waveforms for
1264+
stations from two networks (AA and BB):
1265+
1266+
- AA.XXX
1267+
- AA.YYY
1268+
- AA.ZZZ
1269+
- BB.UUU
1270+
- BB.VVV
1271+
- BB.WWW
1272+
1273+
It may be useful to process event-segmented waveforms, where
1274+
a one-minute window of data is needed. We can create references
1275+
to these windowed data segments for fast extraction:
1276+
1277+
.. code-block:: python
1278+
1279+
>>> ds.create_reference("event000001",
1280+
... obspy.UTCDateTime("2016001T01:00:00"),
1281+
... obspy.UTCDateTime("2016001T01:01:00"))
1282+
>>> ds.get_data_for_reference("event000001")
1283+
18 Trace(s) in Stream:
1284+
AA.XXX..HHZ | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1285+
...
1286+
(16 other traces)
1287+
...
1288+
BB.WWW..HHE | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1289+
1290+
Or perhaps we only want to include data from network AA in the
1291+
referenced data set:
1292+
1293+
.. code-block:: python
1294+
1295+
>>> ds.create_reference("event000001",
1296+
... obspy.UTCDateTime("2016001T01:00:00"),
1297+
... obspy.UTCDateTime("2016001T01:01:00"),
1298+
... net="AA",
1299+
... overwrite=True)
1300+
>>> ds.get_data_for_reference("event000001")
1301+
9 Trace(s) in Stream:
1302+
AA.XXX..HHZ | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1303+
...
1304+
(7 other traces)
1305+
...
1306+
AA.ZZZ..HHE | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1307+
1308+
Or only horizontal component data:
1309+
1310+
.. code-block:: python
1311+
1312+
>>> ds.create_reference("event000001",
1313+
... obspy.UTCDateTime("2016001T01:00:00"),
1314+
... obspy.UTCDateTime("2016001T01:01:00"),
1315+
... chan=("HHN", "HHE"),
1316+
... overwrite=True)
1317+
>>> ds.get_data_for_reference("event000001")
1318+
12 Trace(s) in Stream:
1319+
AA.XXX..HHN | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1320+
...
1321+
(10 other traces)
1322+
...
1323+
BB.WWW..HHE | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1324+
1325+
etc...
12391326
"""
12401327

1241-
if isinstance(net, str):
1328+
if isinstance(net, str) or isinstance(net, unicode):
12421329
net = (net,)
12431330
elif isinstance(net, tuple) or isinstance(net, list) or net is None:
12441331
pass
12451332
else:
12461333
raise(TypeError(net))
12471334

1248-
if isinstance(sta, str):
1335+
if isinstance(sta, str) or isinstance(sta, unicode):
12491336
sta = (sta,)
12501337
elif isinstance(sta, tuple) or isinstance(sta, list) or sta is None:
12511338
pass
12521339
else:
12531340
raise(TypeError(sta))
12541341

1255-
if isinstance(loc, str):
1342+
if isinstance(loc, str) or isinstance(loc, unicode):
12561343
loc = (loc,)
12571344
elif isinstance(loc, tuple) or isinstance(loc, list) or loc is None:
12581345
pass
12591346
else:
12601347
raise(TypeError(loc))
12611348

1262-
if isinstance(chan, str):
1349+
if isinstance(chan, str) or isinstance(chan, unicode):
12631350
chan = (chan,)
12641351
elif isinstance(chan, tuple) or isinstance(chan, list) or chan is None:
12651352
pass
12661353
else:
12671354
raise(TypeError(chan))
12681355

1269-
if isinstance(tag, str):
1356+
if isinstance(tag, str) or isinstance(tag, unicode):
12701357
tag = (tag,)
12711358
elif isinstance(tag, tuple) or isinstance(tag, list) or tag is None:
12721359
pass
12731360
else:
12741361
raise(TypeError(tag))
12751362

1276-
_predicate_net = lambda _key: net == None\
1277-
or _key.split(".")[0] in net
1363+
_ref_dtype = h5py.special_dtype(ref=h5py.RegionReference)
12781364

1279-
_predicate_sta = lambda _key: sta == None\
1280-
or _key.split(".")[1] in sta
1365+
def _predicate_net(_key):
1366+
return(net is None or _key.split(".")[0] in net)
12811367

1282-
_predicate_loc = lambda _key: loc == None\
1283-
or _key.split(".")[2] in loc
1368+
def _predicate_sta(_key):
1369+
return(sta is None or _key.split(".")[1] in sta)
12841370

1285-
_predicate_chan = lambda _key: chan == None\
1286-
or _key.split(".")[-1].split("__")[0] in chan
1371+
def _predicate_loc(_key):
1372+
return(loc is None or _key.split(".")[2] in loc)
12871373

1288-
_predicate_tag = lambda _key: tag == None\
1289-
or _key.split(".")[-1].split("__")[-1] in tag
1374+
def _predicate_chan(_key):
1375+
return(chan is None or _key.split(".")[-1].split("__")[0] in chan)
12901376

1291-
_predicate_netsta = lambda _key: _predicate_net(_key)\
1292-
and _predicate_sta(_key)
1377+
def _predicate_tag(_key):
1378+
return(tag is None or _key.split(".")[-1].split("__")[-1] in tag)
12931379

1294-
_predicate_locchantag = lambda _key: _predicate_loc(_key)\
1295-
and _predicate_chan(_key)\
1296-
and _predicate_tag(_key)
1380+
def _predicate_netsta(_key):
1381+
return(_predicate_net(_key) and _predicate_sta(_key))
12971382

1383+
def _predicate_locchantag(_key):
1384+
return(_predicate_loc(_key)
1385+
and _predicate_chan(_key)
1386+
and _predicate_tag(_key))
1387+
1388+
_wf_grp = self._waveform_group
12981389
for _station_name in itertools.ifilter(_predicate_netsta,
12991390
self._waveform_group.keys()):
13001391
for _key in itertools.ifilter(_predicate_locchantag,
1301-
self._waveform_group[_station_name].keys()):
1392+
_wf_grp[_station_name].keys()):
13021393

13031394
_net, _sta, _loc, _remainder = _key.split(".")
13041395
_chan = _remainder.split("__")[0]
13051396

13061397
_ds = self._waveform_group["%s/%s" % (_station_name, _key)]
13071398

1308-
_ts = obspy.UTCDateTime(_ds.attrs["starttime"]*1e-9)
1399+
_ts = obspy.UTCDateTime(_ds.attrs["starttime"]*1e-9)
13091400
_samprate = _ds.attrs["sampling_rate"]
1310-
_te = _ts + len(_ds)/_samprate
1401+
_te = _ts + len(_ds)/_samprate
13111402
if _te < starttime or _ts > endtime:
13121403
continue
13131404

13141405
_offset = int((starttime-_ts)*_samprate)
1315-
_nsamp = int((endtime-starttime)*_samprate)
1316-
_ref = _ds.regionref[_offset:_offset+_nsamp+1]
1406+
_nsamp = int((endtime-starttime)*_samprate)
1407+
_ref = _ds.regionref[_offset:_offset+_nsamp+1]
13171408

13181409
if ref not in self._reference_group:
13191410
_ref_grp = self._reference_group.create_group(ref)
13201411
else:
13211412
_ref_grp = self._reference_group[ref]
13221413

1323-
_net = "__" if _net == "" else _net
1324-
_sta = "__" if _sta == "" else _sta
1325-
_loc = "__" if _loc == "" else _loc
1414+
_net = "__" if _net == "" else _net
1415+
_sta = "__" if _sta == "" else _sta
1416+
_loc = "__" if _loc == "" else _loc
13261417
_chan = "__" if _chan == "" else _chan
13271418
_handle = "/".join((_net, _sta, _loc, _chan))
13281419

@@ -1332,9 +1423,10 @@ def create_reference(self, ref, starttime, endtime, net=None, sta=None,
13321423
if _handle not in _ref_grp:
13331424
_ref_ds = _ref_grp.create_dataset(_handle,
13341425
(1,),
1335-
dtype=h5py.special_dtype(ref=h5py.RegionReference))
1426+
dtype=_ref_dtype)
13361427
_ref_ds.attrs["sampling_rate"] = _ds.attrs["sampling_rate"]
1337-
_ref_ds.attrs["starttime"] = _ds.attrs["starttime"] + int(_offset/_samprate*1.e9)
1428+
_ts = _ds.attrs["starttime"] + int(_offset/_samprate*1.e9)
1429+
_ref_ds.attrs["starttime"] = _ts
13381430
_ref_ds[0] = _ref
13391431
else:
13401432
print("Will not overwrite existing reference")
@@ -1343,46 +1435,109 @@ def create_reference(self, ref, starttime, endtime, net=None, sta=None,
13431435
def get_data_for_reference(self, ref, net=None, sta=None, loc=None,
13441436
chan=None):
13451437
"""
1346-
Create a region reference for fast lookup of segements of
1347-
continuous data.
1438+
Retrieve referenced data.
1439+
1440+
:param ref: Reference label.
1441+
:type ref: str
1442+
:param net: Networks to retrieve referenced data for.
1443+
:type net: str, tuple
1444+
:param sta: Stations to retrieve referenced data for.
1445+
:type sta: str, tuple
1446+
:param loc: Location codes to retrieve referenced data for.
1447+
:type loc: str, tuple
1448+
:param chan: Channels to retrieve referenced data for.
1449+
:type chan: str, tuple
1450+
:returns: Referenced data.
1451+
:rtype: :class:`~obspy.core.stream.Stream`
1452+
1453+
.. rubric:: Example
1454+
1455+
Consider an ASDFDataSet with references pointing to event-segmented
1456+
waveforms (see :func:`create_reference`). We can retrieve data
1457+
for a particular reference label:
1458+
1459+
.. code-block:: python
1460+
1461+
>>> ds.get_data_for_reference("event000001")
1462+
18 Trace(s) in Stream:
1463+
AA.XXX..HHZ | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1464+
...
1465+
(16 other traces)
1466+
...
1467+
BB.WWW..HHE | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1468+
1469+
Or for only the BB network:
1470+
1471+
.. code-block:: python
1472+
1473+
>>> ds.get_data_for_reference("event000001",
1474+
... net="BB")
1475+
9 Trace(s) in Stream:
1476+
BB.UUU..HHZ | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1477+
...
1478+
(7 other traces)
1479+
...
1480+
BB.WWW..HHE | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1481+
1482+
Or for only horizontal components:
1483+
1484+
.. code-block:: python
1485+
1486+
>>> ds.get_data_for_reference("event000001",
1487+
... chan=("HHN","HHE"))
1488+
12 Trace(s) in Stream:
1489+
AA.XXX..HHN | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1490+
...
1491+
(10 other traces)
1492+
...
1493+
BB.WWW..HHE | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1494+
1495+
etc...
13481496
"""
1349-
if not isinstance(ref, str):
1497+
if not isinstance(ref, str) and not isinstance(ref, unicode):
13501498
raise(TypeError("reference must be type ::str::"))
13511499
if ref not in self._reference_group:
13521500
raise(IOError("reference does not exist: %s" % ref))
13531501

1354-
if isinstance(net, str):
1502+
if isinstance(net, str) or isinstance(net, unicode):
13551503
net = (net,)
13561504
elif isinstance(net, tuple) or isinstance(net, list) or net is None:
13571505
pass
13581506
else:
13591507
raise(TypeError(net))
13601508

1361-
if isinstance(sta, str):
1509+
if isinstance(sta, str) or isinstance(sta, unicode):
13621510
sta = (sta,)
13631511
elif isinstance(sta, tuple) or isinstance(sta, list) or sta is None:
13641512
pass
13651513
else:
13661514
raise(TypeError(sta))
13671515

1368-
if isinstance(loc, str):
1516+
if isinstance(loc, str) or isinstance(loc, unicode):
13691517
loc = (loc,)
13701518
elif isinstance(loc, tuple) or isinstance(loc, list) or loc is None:
13711519
pass
13721520
else:
13731521
raise(TypeError(loc))
13741522

1375-
if isinstance(chan, str):
1523+
if isinstance(chan, str) or isinstance(chan, unicode):
13761524
chan = (chan,)
13771525
elif isinstance(chan, tuple) or isinstance(chan, list) or chan is None:
13781526
pass
13791527
else:
13801528
raise(TypeError(chan))
13811529

1382-
_predicate_net = lambda _key: net == None or _key in net
1383-
_predicate_sta = lambda _key: sta == None or _key in sta
1384-
_predicate_loc = lambda _key: loc == None or _key in loc
1385-
_predicate_chan = lambda _key: chan == None or _key in chan
1530+
def _predicate_net(_key):
1531+
return(net is None or _key in net)
1532+
1533+
def _predicate_sta(_key):
1534+
return(sta is None or _key in sta)
1535+
1536+
def _predicate_loc(_key):
1537+
return(loc is None or _key in loc)
1538+
1539+
def _predicate_chan(_key):
1540+
return(chan is None or _key in chan)
13861541

13871542
_st = obspy.Stream()
13881543
_ref_grp = self._reference_group[ref]
@@ -1400,14 +1555,16 @@ def get_data_for_reference(self, ref, net=None, sta=None, loc=None,
14001555

14011556
for _chan in itertools.ifilter(_predicate_chan,
14021557
_loc_grp.keys()):
1403-
_ds = _loc_grp[_chan]
1558+
_ds = _loc_grp[_chan]
14041559
_ref = _ds[0]
14051560
_tr = obspy.Trace(data=self.__file[_ref][_ref])
1406-
_tr.stats.network = _net if _net != "__" else ""
1407-
_tr.stats.station = _sta if _sta != "__" else ""
1408-
_tr.stats.location = _loc if _loc != "__" else ""
1409-
_tr.stats.channel = _chan if _chan != "__" else ""
1410-
_tr.stats.starttime = obspy.UTCDateTime(_ds.attrs["starttime"]*1e-9)
1561+
_tr.stats.network = _net if _net != "__" else ""
1562+
_tr.stats.station = _sta if _sta != "__" else ""
1563+
_tr.stats.location = _loc if _loc != "__" else ""
1564+
_tr.stats.channel = _chan if _chan != "__" else ""
1565+
_tr.stats.starttime = obspy.UTCDateTime(
1566+
_ds.attrs["starttime"]*1e-9
1567+
)
14111568
_tr.stats.delta = 1/_ds.attrs["sampling_rate"]
14121569
_st.append(_tr)
14131570
return(_st)

0 commit comments

Comments
 (0)