Skip to content

Commit 80f0b07

Browse files
authored
Merge pull request #117 from hvasbath/nonT_2d
covariance, models.geodetic introduce 2d spatial noise estimation
2 parents 5606e9b + 99acc71 commit 80f0b07

27 files changed

+1181
-437
lines changed

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,23 @@ All notable changes to BEAT will be documented in this file.
44

55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
66

7+
## [1.2.4] 14 February 2023
8+
Contributors: Hannes Vasyura-Bathke @hvasbath
9+
10+
### Added
11+
- covariance.GeodeticNoiseAnalyser: parametrize the residual noise allowing for non-Toeplitz/import
12+
- plotting.geodetic/seismic added standardized residual histograms to residuals
13+
- plotting.geodetic: new plot "geodetic_covariances"
14+
- plotting.geodetic: add "individual" plot_projection to scene_fits to show stdz residuals
15+
- plotting.seismic: fuzzy_bb, lune, hudson and fuzzy_mt_decomp support n_sources > 1
16+
- plotting.seismic: allow plotting of fuzzy_bb for RectangularSource
17+
18+
### Fixed
19+
- plotting.marginals: stage_posteriors fixed axis unification and erroneous histogram plotting
20+
- docs: short_installation fix python version to 3.8
21+
- heart: pol_synthetics allow for RectangularSource
22+
- covariance: estimation of variance on amplitude spectra instead of complex spectra
23+
724

825
## [1.2.3] 20 November 2022
926
Contributors: Hannes Vasyura-Bathke @hvasbath

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ If you came looking for the beat package calculating internet time you can find
77

88
Based on pyrocko, theano and pymc3
99

10+
14 February 2023
11+
Version 1.2.4 is released. Details in the [changelog](https://github.com/hvasbath/beat/blob/master/CHANGELOG.md).
12+
1013
20 November 2022
1114
Version 1.2.3 is released. Bugfix-release.
1215

beat/apps/beat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1212,7 +1212,7 @@ def setup(parser):
12121212
if options.calc_derived:
12131213
varnames, shapes = pc.get_derived_variables_shapes()
12141214
rtrace.add_derived_variables(varnames, shapes)
1215-
splitinds = cumsum([shape[0] for shape in shapes[:-1]])
1215+
splitinds = range(1, len(varnames))
12161216

12171217
rtrace.setup(draws=draws, chain=-1, overwrite=True)
12181218

beat/config.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from theano import config as tconfig
3333

3434
from beat import utility
35-
from beat.covariance import available_noise_structures
35+
from beat.covariance import available_noise_structures, available_noise_structures_2d
3636
from beat.heart import (
3737
ArrivalTaper,
3838
Filter,
@@ -254,6 +254,7 @@ def multi_event_stations_name(nevent=0):
254254
_quantity_choices = ["displacement", "velocity", "acceleration"]
255255
_interpolation_choices = ["nearest_neighbor", "multilinear"]
256256
_structure_choices = available_noise_structures()
257+
_structure_choices_2d = available_noise_structures_2d()
257258
_mode_choices = [geometry_mode_str, ffi_mode_str]
258259
_regularization_choices = ["laplacian", "none"]
259260
_correlation_function_choices = ["nearest_neighbor", "gaussian", "exponential"]
@@ -691,6 +692,21 @@ class SeismicNoiseAnalyserConfig(Object):
691692
)
692693

693694

695+
class GeodeticNoiseAnalyserConfig(Object):
696+
697+
structure = StringChoice.T(
698+
choices=_structure_choices_2d,
699+
default="import",
700+
help="Determines data-covariance matrix structure."
701+
" Choices: %s" % utility.list2string(_structure_choices_2d),
702+
)
703+
max_dist_perc = Float.T(
704+
default=0.2,
705+
help="Distance in decimal percent from maximum distance in scene to use for "
706+
"non-Toeplitz noise estimation",
707+
)
708+
709+
694710
class SeismicConfig(Object):
695711
"""
696712
Config for seismic data optimization related parameters.
@@ -1075,9 +1091,9 @@ class GeodeticConfig(Object):
10751091
default={"SAR": SARDatasetConfig.D(), "GNSS": GNSSDatasetConfig.D()},
10761092
help="Types of geodetic data, i.e. SAR, GNSS, with their configs",
10771093
)
1078-
calc_data_cov = Bool.T(
1079-
default=True,
1080-
help="Flag for calculating the data covariance matrix, " 'outsourced to "kite"',
1094+
noise_estimator = GeodeticNoiseAnalyserConfig.T(
1095+
default=GeodeticNoiseAnalyserConfig.D(),
1096+
help="Determines the structure of the data-covariance matrix.",
10811097
)
10821098
interpolation = StringChoice.T(
10831099
choices=_interpolation_choices,

beat/covariance.py

Lines changed: 186 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55
from pymc3 import Point
66
from pyrocko import gf, trace
77
from scipy.linalg import toeplitz
8+
from scipy.spatial import KDTree
89
from theano import config as tconfig
910

1011
from beat import heart
11-
from beat.utility import ensure_cov_psd, list2string, running_window_rms
12+
from beat.utility import ensure_cov_psd, list2string, running_window_rms, distances
13+
1214

1315
logger = logging.getLogger("covariance")
1416

@@ -91,10 +93,20 @@ def ones_data_covariance(n, dt=None, tzero=None):
9193
}
9294

9395

96+
NoiseStructureCatalog2d = {
97+
"import": ones_data_covariance,
98+
"non-toeplitz": ones_data_covariance,
99+
}
100+
101+
94102
def available_noise_structures():
95103
return list(NoiseStructureCatalog.keys())
96104

97105

106+
def available_noise_structures_2d():
107+
return list(NoiseStructureCatalog2d.keys())
108+
109+
98110
def import_data_covariance(data_trace, arrival_taper, sample_rate, domain="time"):
99111
"""
100112
Use imported covariance matrixes and check size consistency with taper.
@@ -141,14 +153,98 @@ def import_data_covariance(data_trace, arrival_taper, sample_rate, domain="time"
141153
return data_cov
142154

143155

156+
class GeodeticNoiseAnalyser(object):
157+
"""
158+
Geodetic noise analyser
159+
160+
Parameters
161+
----------
162+
structure : string
163+
either, import, variance, non-toeplitz
164+
events : list
165+
of :class:`pyrocko.meta.Event` reference event(s) from catalog
166+
"""
167+
168+
def __init__(
169+
self,
170+
config,
171+
events=None,
172+
):
173+
174+
avail = available_noise_structures_2d()
175+
176+
if config.structure not in avail:
177+
raise AttributeError(
178+
'Selected noise structure "%s" not supported! Implemented'
179+
" noise structures: %s" % (structure, list2string(avail))
180+
)
181+
182+
self.events = events
183+
self.config = config
184+
185+
def get_structure(self, dataset):
186+
return NoiseStructureCatalog2d[self.config.structure](dataset.ncoords)
187+
188+
def do_import(self, dataset):
189+
if dataset.covariance.data is not None:
190+
return dataset.covariance.data
191+
else:
192+
raise ValueError(
193+
"Data covariance for dataset %s needs to be defined!" % dataset.name
194+
)
195+
196+
def do_non_toeplitz(self, dataset, result):
197+
198+
if dataset.typ == "SAR":
199+
dataset.update_local_coords(self.events[0])
200+
coords = num.vstack([dataset.east_shifts, dataset.north_shifts]).T
201+
202+
scaling = non_toeplitz_covariance_2d(
203+
coords, result.processed_res, max_dist_perc=self.config.max_dist_perc
204+
)
205+
else:
206+
scaling = dataset.covariance.data
207+
208+
if num.isnan(scaling).any():
209+
raise ValueError(
210+
"Estimated Non-Toeplitz covariance matrix for dataset %s contains Nan! "
211+
"Please increase 'max_dist_perc'!" % dataset.name
212+
)
213+
214+
return scaling
215+
216+
def get_data_covariance(self, dataset, result=None):
217+
"""
218+
Estimated data covariances of seismic traces
219+
220+
Parameters
221+
----------
222+
datasets
223+
results
224+
225+
Returns
226+
-------
227+
:class:`numpy.ndarray`
228+
"""
229+
230+
covariance_structure = self.get_structure(dataset)
231+
232+
if self.config.structure == "import":
233+
scaling = self.do_import(dataset)
234+
elif self.config.structure == "non-toeplitz":
235+
scaling = self.do_non_toeplitz(dataset, result)
236+
237+
return ensure_cov_psd(scaling * covariance_structure)
238+
239+
144240
class SeismicNoiseAnalyser(object):
145241
"""
146242
Seismic noise analyser
147243
148244
Parameters
149245
----------
150246
structure : string
151-
either identity, exponential, import
247+
either, variance, exponential, import, non-toeplitz
152248
pre_arrival_time : float
153249
in [s], time before P arrival until variance is estimated
154250
engine : :class:`pyrocko.gf.seismosizer.LocalEngine`
@@ -162,7 +258,7 @@ class SeismicNoiseAnalyser(object):
162258

163259
def __init__(
164260
self,
165-
structure="identity",
261+
structure="variance",
166262
pre_arrival_time=5.0,
167263
engine=None,
168264
events=None,
@@ -277,11 +373,15 @@ def do_variance_estimate(self, wmap, chop_bounds=None):
277373
nslc_id_str = list2string(ctrace.nslc_id)
278374

279375
if wmap.config.domain == "spectrum":
280-
lower_idx, upper_idx = wmap.get_valid_spectrum_indices(
376+
valid_spectrum_indices = wmap.get_valid_spectrum_indices(
281377
chop_bounds=chop_bounds, pad_to_pow2=True
282378
)
283-
data = ctrace.spectrum(True, 0.05)[1]
284-
data = data[lower_idx:upper_idx]
379+
data = heart.fft_transforms(
380+
[ctrace],
381+
valid_spectrum_indices,
382+
outmode="stacked_traces",
383+
pad_to_pow2=True,
384+
)[0].get_ydata()
285385
else:
286386
data = ctrace.get_ydata()
287387

@@ -294,7 +394,7 @@ def do_variance_estimate(self, wmap, chop_bounds=None):
294394

295395
scaling = num.nanvar(data)
296396
if num.isfinite(scaling).all():
297-
logger.debug("Variance estimate of %s = %g" % (nslc_id_str, scaling))
397+
logger.info("Variance estimate of %s = %g" % (nslc_id_str, scaling))
298398
scalings.append(scaling)
299399
else:
300400
raise ValueError(
@@ -651,7 +751,7 @@ def toeplitz_covariance(data, window_size):
651751
652752
Returns
653753
-------
654-
toeplitz : :class:`numpy.ndarray` 2-d, (size data, size data)
754+
toeplitz : :class:`numpy.ndarray` 1-d, (size data)
655755
stds : :class:`numpy.ndarray` 1-d, size data
656756
of running windows
657757
"""
@@ -663,7 +763,7 @@ def toeplitz_covariance(data, window_size):
663763
def non_toeplitz_covariance(data, window_size):
664764
"""
665765
Get scaled non- Toeplitz covariance matrix, which may be able to account
666-
for non-stationary data-errors.
766+
for non-stationary data-errors. For 1-d data.
667767
668768
Parameters
669769
----------
@@ -680,6 +780,83 @@ def non_toeplitz_covariance(data, window_size):
680780
return toeplitz * stds[:, num.newaxis] * stds[num.newaxis, :]
681781

682782

783+
def k_nearest_neighbor_rms(coords, data, k=None, max_dist_perc=0.2):
784+
"""
785+
Calculate running rms on irregular sampled 2d spatial data.
786+
787+
Parameters
788+
----------
789+
coords : :class:`numpy.ndarray` 2-d, (size data, n coords-dims)
790+
containing spatial coordinates (east_shifts, north_shifts)
791+
data : :class:`numpy.ndarray` 1-d, (size data)
792+
containing values of physical quantity
793+
k : int
794+
taking k - nearest neighbors for std estimation
795+
max_dist_perc : float
796+
max distance [decimal percent] to select as nearest neighbors
797+
"""
798+
799+
if k and max_dist_perc:
800+
raise ValueError("Either k or max_dist_perc should be defined!")
801+
802+
kdtree = KDTree(coords, leafsize=1)
803+
804+
dists = distances(coords, coords)
805+
r = dists.max() * max_dist_perc
806+
807+
logger.debug("Nearest neighbor distance is: %f", r)
808+
809+
stds = []
810+
for point in coords:
811+
if k is not None:
812+
_, idxs = kdtree.query(point, k=k)
813+
elif r is not None:
814+
idxs = kdtree.query_ball_point(point, r=r)
815+
else:
816+
raise ValueError()
817+
818+
stds.append(num.std(data[idxs], ddof=1))
819+
820+
return num.array(stds)
821+
822+
823+
def toeplitz_covariance_2d(coords, data, max_dist_perc=0.2):
824+
"""
825+
Get Toeplitz banded matrix for given 2d data.
826+
827+
Returns
828+
-------
829+
toeplitz : :class:`numpy.ndarray` 2-d, (size data, size data)
830+
stds : :class:`numpy.ndarray` 1-d, size data
831+
of running windows
832+
max_dist_perc : float
833+
max distance [decimal percent] to select as nearest neighbors
834+
"""
835+
stds = k_nearest_neighbor_rms(coords=coords, data=data, max_dist_perc=max_dist_perc)
836+
coeffs = autocovariance(data / stds)
837+
return toeplitz(coeffs), stds
838+
839+
840+
def non_toeplitz_covariance_2d(coords, data, max_dist_perc):
841+
"""
842+
Get scaled non- Toeplitz covariance matrix, which may be able to account
843+
for non-stationary data-errors. For 2-d geospatial data.
844+
845+
Parameters
846+
----------
847+
data : :class:`numpy.ndarray`
848+
of data to estimate non-stationary error matrix for
849+
max_dist_perc : float
850+
max distance [decimal percent] to select as nearest neighbors
851+
852+
Returns
853+
-------
854+
:class:`numpy.ndarray` (size data, size data)
855+
"""
856+
toeplitz, stds = toeplitz_covariance_2d(coords, data, max_dist_perc)
857+
return toeplitz * stds[:, num.newaxis] * stds[num.newaxis, :]
858+
859+
683860
def init_proposal_covariance(bij, vars, model, pop_size=1000):
684861
"""
685862
Create initial proposal covariance matrix based on random samples

beat/ffi/base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1130,8 +1130,10 @@ def seis_construct_gf_linear(
11301130
flag to overwrite existing linear GF Library
11311131
"""
11321132

1133+
if wavemap.config.domain == "spectrum":
1134+
raise TypeError("FFI is currently only supported for time-domain!")
1135+
11331136
# get starttimes for hypocenter at corner of fault
1134-
# TODO: make nsubfaults compatible - should work
11351137
st_mins = []
11361138
st_maxs = []
11371139
for idx, sf in enumerate(fault.iter_subfaults()):

beat/ffi/fault.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@
2323
from beat.fast_sweeping import fast_sweep
2424
from beat.heart import velocities_from_pole
2525
from beat.models.laplacian import (
26-
distances,
2726
get_smoothing_operator_correlated,
2827
get_smoothing_operator_nearest_neighbor,
2928
)
3029
from beat.utility import (
3130
Counter,
3231
check_point_keys,
32+
distances,
3333
dump_objects,
3434
find_elbow,
3535
kmtypes,

0 commit comments

Comments
 (0)