@@ -1234,95 +1234,186 @@ def __parse_waveform_input_and_validate(self, waveform):
12341234 def create_reference (self , ref , starttime , endtime , net = None , sta = None ,
12351235 loc = None , chan = None , tag = None , overwrite = False ):
12361236 """
1237- Create a region reference for fast lookup of segements of
1238- continuous data.
1237+ Creates a region reference for fast lookup of data segments.
1238+
1239+ :param ref: The reference label to apply.
1240+ :type ref: str
1241+ :param starttime: Start time of reference.
1242+ :type starttime: :class:`obspy.core.utcdatetime.UTCDateTime`
1243+ :param endtime: End time of reference.
1244+ :type endtime: :class:`obspy.core.utcdatetime.UTCDateTime`
1245+ :param net: Networks to create references for.
1246+ :type net: str, tuple
1247+ :param sta: Stations to create references for.
1248+ :type sta: str, tuple
1249+ :param loc: Location codes to create references for.
1250+ :type loc: str, tuple
1251+ :param chan: Channels to create references for.
1252+ :type chan: str, tuple
1253+ :param tag: Tag to create references for.
1254+ :type tag: str, tuple
1255+ :param overwrite: Overwrite existing references for this label.
1256+ :type overwrite: bool
1257+
1258+ This methodology is useful for creating subsets of a dataset
1259+ without duplicating waveforms.
1260+
1261+ .. rubric:: Example
1262+
1263+ Consider an ASDFDataSet populated with continuous waveforms for
1264+ stations from two networks (AA and BB):
1265+
1266+ - AA.XXX
1267+ - AA.YYY
1268+ - AA.ZZZ
1269+ - BB.UUU
1270+ - BB.VVV
1271+ - BB.WWW
1272+
1273+ It may be useful to process event-segmented waveforms, where
1274+ a one-minute window of data is needed. We can create references
1275+ to these windowed data segments for fast extraction:
1276+
1277+ .. code-block:: python
1278+
1279+ >>> ds.create_reference("event000001",
1280+ ... obspy.UTCDateTime("2016001T01:00:00"),
1281+ ... obspy.UTCDateTime("2016001T01:01:00"))
1282+ >>> ds.get_data_for_reference("event000001")
1283+ 18 Trace(s) in Stream:
1284+ AA.XXX..HHZ | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1285+ ...
1286+ (16 other traces)
1287+ ...
1288+ BB.WWW..HHE | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1289+
1290+ Or perhaps we only want to include data from network AA in the
1291+ referenced data set:
1292+
1293+ .. code-block:: python
1294+
1295+ >>> ds.create_reference("event000001",
1296+ ... obspy.UTCDateTime("2016001T01:00:00"),
1297+ ... obspy.UTCDateTime("2016001T01:01:00"),
1298+ ... net="AA",
1299+ ... overwrite=True)
1300+ >>> ds.get_data_for_reference("event000001")
1301+ 9 Trace(s) in Stream:
1302+ AA.XXX..HHZ | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1303+ ...
1304+ (7 other traces)
1305+ ...
1306+ AA.ZZZ..HHE | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1307+
1308+ Or only horizontal component data:
1309+
1310+ .. code-block:: python
1311+
1312+ >>> ds.create_reference("event000001",
1313+ ... obspy.UTCDateTime("2016001T01:00:00"),
1314+ ... obspy.UTCDateTime("2016001T01:01:00"),
1315+ ... chan=("HHN", "HHE"),
1316+ ... overwrite=True)
1317+ >>> ds.get_data_for_reference("event000001")
1318+ 12 Trace(s) in Stream:
1319+ AA.XXX..HHN | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1320+ ...
1321+ (10 other traces)
1322+ ...
1323+ BB.WWW..HHE | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1324+
1325+ etc...
12391326 """
12401327
1241- if isinstance (net , str ):
1328+ if isinstance (net , str ) or isinstance ( net , unicode ) :
12421329 net = (net ,)
12431330 elif isinstance (net , tuple ) or isinstance (net , list ) or net is None :
12441331 pass
12451332 else :
12461333 raise (TypeError (net ))
12471334
1248- if isinstance (sta , str ):
1335+ if isinstance (sta , str ) or isinstance ( sta , unicode ) :
12491336 sta = (sta ,)
12501337 elif isinstance (sta , tuple ) or isinstance (sta , list ) or sta is None :
12511338 pass
12521339 else :
12531340 raise (TypeError (sta ))
12541341
1255- if isinstance (loc , str ):
1342+ if isinstance (loc , str ) or isinstance ( loc , unicode ) :
12561343 loc = (loc ,)
12571344 elif isinstance (loc , tuple ) or isinstance (loc , list ) or loc is None :
12581345 pass
12591346 else :
12601347 raise (TypeError (loc ))
12611348
1262- if isinstance (chan , str ):
1349+ if isinstance (chan , str ) or isinstance ( chan , unicode ) :
12631350 chan = (chan ,)
12641351 elif isinstance (chan , tuple ) or isinstance (chan , list ) or chan is None :
12651352 pass
12661353 else :
12671354 raise (TypeError (chan ))
12681355
1269- if isinstance (tag , str ):
1356+ if isinstance (tag , str ) or isinstance ( tag , unicode ) :
12701357 tag = (tag ,)
12711358 elif isinstance (tag , tuple ) or isinstance (tag , list ) or tag is None :
12721359 pass
12731360 else :
12741361 raise (TypeError (tag ))
12751362
1276- _predicate_net = lambda _key : net == None \
1277- or _key .split ("." )[0 ] in net
1363+ _ref_dtype = h5py .special_dtype (ref = h5py .RegionReference )
12781364
1279- _predicate_sta = lambda _key : sta == None \
1280- or _key .split ("." )[1 ] in sta
1365+ def _predicate_net ( _key ):
1366+ return ( net is None or _key .split ("." )[0 ] in net )
12811367
1282- _predicate_loc = lambda _key : loc == None \
1283- or _key .split ("." )[2 ] in loc
1368+ def _predicate_sta ( _key ):
1369+ return ( sta is None or _key .split ("." )[1 ] in sta )
12841370
1285- _predicate_chan = lambda _key : chan == None \
1286- or _key .split ("." )[- 1 ]. split ( "__" )[ 0 ] in chan
1371+ def _predicate_loc ( _key ):
1372+ return ( loc is None or _key .split ("." )[2 ] in loc )
12871373
1288- _predicate_tag = lambda _key : tag == None \
1289- or _key .split ("." )[- 1 ].split ("__" )[- 1 ] in tag
1374+ def _predicate_chan ( _key ):
1375+ return ( chan is None or _key .split ("." )[- 1 ].split ("__" )[0 ] in chan )
12901376
1291- _predicate_netsta = lambda _key : _predicate_net (_key )\
1292- and _predicate_sta ( _key )
1377+ def _predicate_tag (_key ):
1378+ return ( tag is None or _key . split ( "." )[ - 1 ]. split ( "__" )[ - 1 ] in tag )
12931379
1294- _predicate_locchantag = lambda _key : _predicate_loc (_key )\
1295- and _predicate_chan (_key )\
1296- and _predicate_tag (_key )
1380+ def _predicate_netsta (_key ):
1381+ return (_predicate_net (_key ) and _predicate_sta (_key ))
12971382
1383+ def _predicate_locchantag (_key ):
1384+ return (_predicate_loc (_key )
1385+ and _predicate_chan (_key )
1386+ and _predicate_tag (_key ))
1387+
1388+ _wf_grp = self ._waveform_group
12981389 for _station_name in itertools .ifilter (_predicate_netsta ,
12991390 self ._waveform_group .keys ()):
13001391 for _key in itertools .ifilter (_predicate_locchantag ,
1301- self . _waveform_group [_station_name ].keys ()):
1392+ _wf_grp [_station_name ].keys ()):
13021393
13031394 _net , _sta , _loc , _remainder = _key .split ("." )
13041395 _chan = _remainder .split ("__" )[0 ]
13051396
13061397 _ds = self ._waveform_group ["%s/%s" % (_station_name , _key )]
13071398
1308- _ts = obspy .UTCDateTime (_ds .attrs ["starttime" ]* 1e-9 )
1399+ _ts = obspy .UTCDateTime (_ds .attrs ["starttime" ]* 1e-9 )
13091400 _samprate = _ds .attrs ["sampling_rate" ]
1310- _te = _ts + len (_ds )/ _samprate
1401+ _te = _ts + len (_ds )/ _samprate
13111402 if _te < starttime or _ts > endtime :
13121403 continue
13131404
13141405 _offset = int ((starttime - _ts )* _samprate )
1315- _nsamp = int ((endtime - starttime )* _samprate )
1316- _ref = _ds .regionref [_offset :_offset + _nsamp + 1 ]
1406+ _nsamp = int ((endtime - starttime )* _samprate )
1407+ _ref = _ds .regionref [_offset :_offset + _nsamp + 1 ]
13171408
13181409 if ref not in self ._reference_group :
13191410 _ref_grp = self ._reference_group .create_group (ref )
13201411 else :
13211412 _ref_grp = self ._reference_group [ref ]
13221413
1323- _net = "__" if _net == "" else _net
1324- _sta = "__" if _sta == "" else _sta
1325- _loc = "__" if _loc == "" else _loc
1414+ _net = "__" if _net == "" else _net
1415+ _sta = "__" if _sta == "" else _sta
1416+ _loc = "__" if _loc == "" else _loc
13261417 _chan = "__" if _chan == "" else _chan
13271418 _handle = "/" .join ((_net , _sta , _loc , _chan ))
13281419
@@ -1332,9 +1423,10 @@ def create_reference(self, ref, starttime, endtime, net=None, sta=None,
13321423 if _handle not in _ref_grp :
13331424 _ref_ds = _ref_grp .create_dataset (_handle ,
13341425 (1 ,),
1335- dtype = h5py . special_dtype ( ref = h5py . RegionReference ) )
1426+ dtype = _ref_dtype )
13361427 _ref_ds .attrs ["sampling_rate" ] = _ds .attrs ["sampling_rate" ]
1337- _ref_ds .attrs ["starttime" ] = _ds .attrs ["starttime" ] + int (_offset / _samprate * 1.e9 )
1428+ _ts = _ds .attrs ["starttime" ] + int (_offset / _samprate * 1.e9 )
1429+ _ref_ds .attrs ["starttime" ] = _ts
13381430 _ref_ds [0 ] = _ref
13391431 else :
13401432 print ("Will not overwrite existing reference" )
@@ -1343,46 +1435,109 @@ def create_reference(self, ref, starttime, endtime, net=None, sta=None,
13431435 def get_data_for_reference (self , ref , net = None , sta = None , loc = None ,
13441436 chan = None ):
13451437 """
1346- Create a region reference for fast lookup of segements of
1347- continuous data.
1438+ Retrieve referenced data.
1439+
1440+ :param ref: Reference label.
1441+ :type ref: str
1442+ :param net: Networks to retrieve referenced data for.
1443+ :type net: str, tuple
1444+ :param sta: Stations to retrieve referenced data for.
1445+ :type sta: str, tuple
1446+ :param loc: Location codes to retrieve referenced data for.
1447+ :type loc: str, tuple
1448+ :param chan: Channels to retrieve referenced data for.
1449+ :type chan: str, tuple
1450+ :returns: Referenced data.
1451+ :rtype: :class:`~obspy.core.stream.Stream`
1452+
1453+ .. rubric:: Example
1454+
1455+ Consider an ASDFDataSet with references pointing to event-segmented
1456+ waveforms (see :func:`create_reference`). We can retrieve data
1457+ for a particular reference label:
1458+
1459+ .. code-block:: python
1460+
1461+ >>> ds.get_data_for_reference("event000001")
1462+ 18 Trace(s) in Stream:
1463+ AA.XXX..HHZ | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1464+ ...
1465+ (16 other traces)
1466+ ...
1467+ BB.WWW..HHE | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1468+
1469+ Or for only the BB network:
1470+
1471+ .. code-block:: python
1472+
1473+ >>> ds.get_data_for_reference("event000001",
1474+ ... net="BB")
1475+ 9 Trace(s) in Stream:
1476+ BB.UUU..HHZ | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1477+ ...
1478+ (7 other traces)
1479+ ...
1480+ BB.WWW..HHE | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1481+
1482+ Or for only horizontal components:
1483+
1484+ .. code-block:: python
1485+
1486+ >>> ds.get_data_for_reference("event000001",
1487+ ... chan=("HHN","HHE"))
1488+ 12 Trace(s) in Stream:
1489+ AA.XXX..HHN | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1490+ ...
1491+ (10 other traces)
1492+ ...
1493+ BB.WWW..HHE | 2016-01-01T01:00:00.00Z ... | 100.0 Hz, 6001 samples
1494+
1495+ etc...
13481496 """
1349- if not isinstance (ref , str ):
1497+ if not isinstance (ref , str ) and not isinstance ( ref , unicode ) :
13501498 raise (TypeError ("reference must be type ::str::" ))
13511499 if ref not in self ._reference_group :
13521500 raise (IOError ("reference does not exist: %s" % ref ))
13531501
1354- if isinstance (net , str ):
1502+ if isinstance (net , str ) or isinstance ( net , unicode ) :
13551503 net = (net ,)
13561504 elif isinstance (net , tuple ) or isinstance (net , list ) or net is None :
13571505 pass
13581506 else :
13591507 raise (TypeError (net ))
13601508
1361- if isinstance (sta , str ):
1509+ if isinstance (sta , str ) or isinstance ( sta , unicode ) :
13621510 sta = (sta ,)
13631511 elif isinstance (sta , tuple ) or isinstance (sta , list ) or sta is None :
13641512 pass
13651513 else :
13661514 raise (TypeError (sta ))
13671515
1368- if isinstance (loc , str ):
1516+ if isinstance (loc , str ) or isinstance ( loc , unicode ) :
13691517 loc = (loc ,)
13701518 elif isinstance (loc , tuple ) or isinstance (loc , list ) or loc is None :
13711519 pass
13721520 else :
13731521 raise (TypeError (loc ))
13741522
1375- if isinstance (chan , str ):
1523+ if isinstance (chan , str ) or isinstance ( chan , unicode ) :
13761524 chan = (chan ,)
13771525 elif isinstance (chan , tuple ) or isinstance (chan , list ) or chan is None :
13781526 pass
13791527 else :
13801528 raise (TypeError (chan ))
13811529
1382- _predicate_net = lambda _key : net == None or _key in net
1383- _predicate_sta = lambda _key : sta == None or _key in sta
1384- _predicate_loc = lambda _key : loc == None or _key in loc
1385- _predicate_chan = lambda _key : chan == None or _key in chan
1530+ def _predicate_net (_key ):
1531+ return (net is None or _key in net )
1532+
1533+ def _predicate_sta (_key ):
1534+ return (sta is None or _key in sta )
1535+
1536+ def _predicate_loc (_key ):
1537+ return (loc is None or _key in loc )
1538+
1539+ def _predicate_chan (_key ):
1540+ return (chan is None or _key in chan )
13861541
13871542 _st = obspy .Stream ()
13881543 _ref_grp = self ._reference_group [ref ]
@@ -1400,14 +1555,16 @@ def get_data_for_reference(self, ref, net=None, sta=None, loc=None,
14001555
14011556 for _chan in itertools .ifilter (_predicate_chan ,
14021557 _loc_grp .keys ()):
1403- _ds = _loc_grp [_chan ]
1558+ _ds = _loc_grp [_chan ]
14041559 _ref = _ds [0 ]
14051560 _tr = obspy .Trace (data = self .__file [_ref ][_ref ])
1406- _tr .stats .network = _net if _net != "__" else ""
1407- _tr .stats .station = _sta if _sta != "__" else ""
1408- _tr .stats .location = _loc if _loc != "__" else ""
1409- _tr .stats .channel = _chan if _chan != "__" else ""
1410- _tr .stats .starttime = obspy .UTCDateTime (_ds .attrs ["starttime" ]* 1e-9 )
1561+ _tr .stats .network = _net if _net != "__" else ""
1562+ _tr .stats .station = _sta if _sta != "__" else ""
1563+ _tr .stats .location = _loc if _loc != "__" else ""
1564+ _tr .stats .channel = _chan if _chan != "__" else ""
1565+ _tr .stats .starttime = obspy .UTCDateTime (
1566+ _ds .attrs ["starttime" ]* 1e-9
1567+ )
14111568 _tr .stats .delta = 1 / _ds .attrs ["sampling_rate" ]
14121569 _st .append (_tr )
14131570 return (_st )
0 commit comments