1- from numpy import *
2- from pylab import *
3- import scipy .optimize .lbfgsb as lbfgsb
4- import scipy
5- from scipy .fftpack import dct , idct
61import numpy as np
72import numpy .ma as ma
3+ from scipy .fftpack import dct , idct
4+ from scipy .optimize import fmin_l_bfgs_b
85
96
107def smoothn (
@@ -23,138 +20,83 @@ def smoothn(
2320 TolZ = 1e-3 ,
2421 weightstr = "bisquare" ,
2522):
23+ """Robust spline smoothing for 1-D to N-D data.
24+
25+ This function provides a fast, automated and robust discretized smoothing
26+ spline for data of any dimension. It can handle missing values and supports
27+ robust smoothing that minimizes the influence of outlying data.
28+
29+ Parameters
30+ ----------
31+ y : array_like
32+ The data to be smoothed. Can be any N-D noisy array (time series,
33+ images, 3D data, etc.). Non-finite data (NaN or Inf) are treated
34+ as missing values.
35+ nS0 : int, optional
36+ Number of samples used when automatically determining the smoothing
37+ parameter. Default is 10.
38+ axis : int or tuple of ints, optional
39+ Axis or axes along which the smoothing is performed. If None (default),
40+ smoothing is performed along all axes.
41+ smoothOrder : float, optional
42+ Order of the smoothing. Default is 2.0 (equivalent to cubic spline).
43+ sd : array_like, optional
44+ Standard deviation of the data. If provided, it is used to compute
45+ weights as 1/sd^2.
46+ verbose : bool, optional
47+ If True, display progress information. Default is False.
48+ s0 : float, optional
49+ Initial value for the smoothing parameter. If None (default), it is
50+ automatically determined.
51+ z0 : array_like, optional
52+ Initial guess for the smoothed data. If None (default), the original
53+ data is used.
54+ isrobust : bool, optional
55+ If True, perform robust smoothing that minimizes the influence of
56+ outlying data. Default is False.
57+ W : array_like, optional
58+ Weighting array of positive values, must have the same size as y.
59+ A zero weight corresponds to a missing value.
60+ s : float, optional
61+ Smoothing parameter. If None (default), it is automatically determined
62+ using the generalized cross-validation (GCV) method. Larger values
63+ produce smoother results.
64+ MaxIter : int, optional
65+ Maximum number of iterations allowed. Default is 100.
66+ TolZ : float, optional
67+ Termination tolerance on Z. Must be between 0 and 1. Default is 1e-3.
68+ weightstr : str, optional
69+ Type of weight function for robust smoothing. Options are 'bisquare'
70+ (default), 'cauchy', or 'talworth'.
71+
72+ Returns
73+ -------
74+ z : ndarray
75+ The smoothed array.
76+ s : float
77+ The smoothing parameter used.
78+ exitflag : int
79+ Describes the exit condition:
80+ 1 - Convergence was reached
81+ 0 - Maximum number of iterations was reached
82+ -1 - DCT/IDCT functions not available
83+ Wtot : ndarray
84+ The final weighting array used for the smoothing.
85+
86+ Notes
87+ -----
88+ The function uses the discrete cosine transform (DCT) to efficiently
89+ compute the smoothing. The smoothing parameter s is determined automatically
90+ using the generalized cross-validation (GCV) method if not provided.
91+
92+ For robust smoothing, an iteratively re-weighted process is used to
93+ minimize the influence of outliers.
94+
95+ Reference
96+ ---------
97+ Garcia D, Robust smoothing of gridded data in one and higher dimensions
98+ with missing values. Computational Statistics & Data Analysis, 2010.
2699 """
27- function [z,s,exitflag,Wtot] = smoothn(varargin)
28- SMOOTHN Robust spline smoothing for 1-D to N-D data.
29- SMOOTHN provides a fast, automatized and robust discretized smoothing
30- spline for data of any dimension.
31- Z = SMOOTHN(Y) automatically smoothes the uniformly-sampled array Y. Y
32- can be any N-D noisy array (time series, images, 3D data,...). Non
33- finite data (NaN or Inf) are treated as missing values.
34- Z = SMOOTHN(Y,S) smoothes the array Y using the smoothing parameter S.
35- S must be a real positive scalar. The larger S is, the smoother the
36- output will be. If the smoothing parameter S is omitted (see previous
37- option) or empty (i.e. S = []), it is automatically determined using
38- the generalized cross-validation (GCV) method.
39- Z = SMOOTHN(Y,W) or Z = SMOOTHN(Y,W,S) specifies a weighting array W of
40- real positive values, that must have the same size as Y. Note that a
41- nil weight corresponds to a missing value.
42- Robust smoothing
43- ----------------
44- Z = SMOOTHN(...,'robust') carries out a robust smoothing that minimizes
45- the influence of outlying data.
46- [Z,S] = SMOOTHN(...) also returns the calculated value for S so that
47- you can fine-tune the smoothing subsequently if needed.
48- An iteration process is used in the presence of weighted and/or missing
49- values. Z = SMOOTHN(...,OPTION_NAME,OPTION_VALUE) smoothes with the
50- termination parameters specified by OPTION_NAME and OPTION_VALUE. They
51- can contain the following criteria:
52- -----------------
53- TolZ: Termination tolerance on Z (default = 1e-3)
54- TolZ must be in ]0,1[
55- MaxIter: Maximum number of iterations allowed (default = 100)
56- Initial: Initial value for the iterative process (default =
57- original data)
58- -----------------
59- Syntax: [Z,...] = SMOOTHN(...,'MaxIter',500,'TolZ',1e-4,'Initial',Z0);
60- [Z,S,EXITFLAG] = SMOOTHN(...) returns a boolean value EXITFLAG that
61- describes the exit condition of SMOOTHN:
62- 1 SMOOTHN converged.
63- 0 Maximum number of iterations was reached.
64- Class Support
65- -------------
66- Input array can be numeric or logical. The returned array is of class
67- double.
68- Notes
69- -----
70- The N-D (inverse) discrete cosine transform functions <a
71- href="matlab:web('http://www.biomecardio.com/matlab/dctn.html')"
72- >DCTN</a> and <a
73- href="matlab:web('http://www.biomecardio.com/matlab/idctn.html')"
74- >IDCTN</a> are required.
75- To be made
76- ----------
77- Estimate the confidence bands (see Wahba 1983, Nychka 1988).
78- Reference
79- ---------
80- Garcia D, Robust smoothing of gridded data in one and higher dimensions
81- with missing values. Computational Statistics & Data Analysis, 2010.
82- <a
83- href="matlab:web('http://www.biomecardio.com/pageshtm/publi/csda10.pdf')">PDF download</a>
84- Examples:
85- --------
86- # 1-D example
87- x = linspace(0,100,2**8);
88- y = cos(x/10)+(x/50)**2 + randn(size(x))/10;
89- y[[70, 75, 80]] = [5.5, 5, 6];
90- z = smoothn(y); # Regular smoothing
91- zr = smoothn(y,'robust'); # Robust smoothing
92- subplot(121), plot(x,y,'r.',x,z,'k','LineWidth',2)
93- axis square, title('Regular smoothing')
94- subplot(122), plot(x,y,'r.',x,zr,'k','LineWidth',2)
95- axis square, title('Robust smoothing')
96- # 2-D example
97- xp = 0:.02:1;
98- [x,y] = meshgrid(xp);
99- f = exp(x+y) + sin((x-2*y)*3);
100- fn = f + randn(size(f))*0.5;
101- fs = smoothn(fn);
102- subplot(121), surf(xp,xp,fn), zlim([0 8]), axis square
103- subplot(122), surf(xp,xp,fs), zlim([0 8]), axis square
104- # 2-D example with missing data
105- n = 256;
106- y0 = peaks(n);
107- y = y0 + rand(size(y0))*2;
108- I = randperm(n^2);
109- y(I(1:n^2*0.5)) = NaN; # lose 1/2 of data
110- y(40:90,140:190) = NaN; # create a hole
111- z = smoothn(y); # smooth data
112- subplot(2,2,1:2), imagesc(y), axis equal off
113- title('Noisy corrupt data')
114- subplot(223), imagesc(z), axis equal off
115- title('Recovered data ...')
116- subplot(224), imagesc(y0), axis equal off
117- title('... compared with original data')
118- # 3-D example
119- [x,y,z] = meshgrid(-2:.2:2);
120- xslice = [-0.8,1]; yslice = 2; zslice = [-2,0];
121- vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06;
122- subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic')
123- title('Noisy data')
124- v = smoothn(vn);
125- subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic')
126- title('Smoothed data')
127- # Cardioid
128- t = linspace(0,2*pi,1000);
129- x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1;
130- y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1;
131- z = smoothn(complex(x,y));
132- plot(x,y,'r.',real(z),imag(z),'k','linewidth',2)
133- axis equal tight
134- # Cellular vortical flow
135- [x,y] = meshgrid(linspace(0,1,24));
136- Vx = cos(2*pi*x+pi/2).*cos(2*pi*y);
137- Vy = sin(2*pi*x+pi/2).*sin(2*pi*y);
138- Vx = Vx + sqrt(0.05)*randn(24,24); # adding Gaussian noise
139- Vy = Vy + sqrt(0.05)*randn(24,24); # adding Gaussian noise
140- I = randperm(numel(Vx));
141- Vx(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
142- Vy(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
143- Vx(I(31:60)) = NaN; # missing values
144- Vy(I(31:60)) = NaN; # missing values
145- Vs = smoothn(complex(Vx,Vy),'robust'); # automatic smoothing
146- subplot(121), quiver(x,y,Vx,Vy,2.5), axis square
147- title('Noisy velocity field')
148- subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square
149- title('Smoothed velocity field')
150- See also SMOOTH, SMOOTH3, DCTN, IDCTN.
151- -- Damien Garcia -- 2009/03, revised 2010/11
152- Visit my <a
153- href="matlab:web('http://www.biomecardio.com/matlab/smoothn.html')">website</a> for more details about SMOOTHN
154- # Check input arguments
155- error(nargchk(1,12,nargin));
156- z0=None,W=None,s=None,MaxIter=100,TolZ=1e-3
157- """
158100 is_masked = False
159101
160102 if type (y ) == ma .MaskedArray : # masked array
@@ -212,25 +154,24 @@ def smoothn(
212154 IsFinite = np .array (np .isfinite (y )).astype (bool )
213155 nof = IsFinite .sum () # number of finite elements
214156 W = W * IsFinite
215- if any (W < 0 ):
157+ if np . any (W < 0 ):
216158 raise ValueError ("smoothn:NegativeWeights" , "Weights must all be >=0" )
217159 else :
218160 # W = W/np.max(W)
219161 pass
220162 # ---
221163 # Weighted or missing data?
222- isweighted = any (W != 1 )
164+ isweighted = np . any (W != 1 )
223165 # ---
224166 # Robust smoothing?
225167 # isrobust
226168 # ---
227169 # Automatic smoothing?
228170 isauto = not s
229171 # ---
230- # DCTN and IDCTN are required
231- try :
232- from scipy .fftpack .realtransforms import dct , idct
233- except :
172+ # DCT and IDCT are required
173+ # We already imported them at the top of the file
174+ if 'dct' not in globals () or 'idct' not in globals ():
234175 z = y
235176 exitflag = - 1
236177 Wtot = 0
@@ -377,7 +318,7 @@ def smoothn(
377318 # print '==============='
378319 else :
379320 xpost = [s0 ]
380- xpost , f , d = lbfgsb . fmin_l_bfgs_b (
321+ xpost , f , d = fmin_l_bfgs_b (
381322 gcv ,
382323 xpost ,
383324 fprime = None ,
@@ -464,15 +405,15 @@ def gcv(p, Lambda, aow, DCTy, IsFinite, Wtot, y, nof, noe, smoothOrder):
464405 s = 10 ** p
465406 Gamma = 1.0 / (1 + (s * abs (Lambda )) ** smoothOrder )
466407 # --- RSS = Residual sum-of-squares
467- if aow > 0.9 : # aow = 1 means that all of the data are equally weighted
408+ if np . all ( aow > 0.9 ) : # aow = 1 means that all of the data are equally weighted
468409 # very much faster: does not require any inverse DCT
469410 RSS = np .linalg .norm (DCTy * (Gamma - 1.0 )) ** 2
470411 else :
471412 # take account of the weights to calculate RSS:
472413 yhat = dctND (Gamma * DCTy , f = idct )
473414 RSS = np .linalg .norm (np .sqrt (Wtot [IsFinite ]) * (y [IsFinite ] - yhat [IsFinite ])) ** 2
474415 # ---
475- TrH = sum (Gamma )
416+ TrH = np . sum (Gamma )
476417 GCVscore = RSS / float (nof ) / (1.0 - TrH / float (noe )) ** 2
477418 return GCVscore
478419
@@ -506,7 +447,7 @@ def RobustWeights(r, I, h, wstr):
506447# function z = InitialGuess(y,I)
507448def InitialGuess (y , I ):
508449 # -- nearest neighbor interpolation (in case of missing values)
509- if any (~ I ):
450+ if np . any (~ I ):
510451 try :
511452 from scipy .ndimage .morphology import distance_transform_edt
512453
0 commit comments