99
1010import numpy as np
1111import pandas as pd
12+ from scipy .optimize import minimize_scalar
13+ from scipy .linalg import hankel
1214
1315from pvlib import atmosphere , tools
1416
@@ -597,7 +599,118 @@ def _calc_d(aod700, p):
597599 return d
598600
599601
600- def detect_clearsky (measured , clearsky , times , window_length ,
602+ def _calc_stats (data , samples_per_window , sample_interval , H ):
603+ """ Calculates statistics for each window, used by Reno-style clear
604+ sky detection functions. Does not return the line length statistic
605+ which is provided by _calc_windowed_stat and _line_length
606+
607+ Parameters
608+ ----------
609+ data : Series
610+ samples_per_window : int
611+ Number of data points in each window
612+ sample_interval : float
613+ Time in minutes in each sample interval
614+ H : 2D ndarray
615+ Hankel matrix defining the indices for each window.
616+
617+ Returns
618+ -------
619+ data_mean : Series
620+ mean of data in each window
621+ data_max : Series
622+ maximum of data in each window
623+ data_slope_nstd : Series
624+ standard deviation of difference between data points in each window
625+ data_slope : Series
626+ difference between successive data points
627+ """
628+
629+ data_mean = data .values [H ].mean (axis = 0 )
630+ data_mean = _to_centered_series (data_mean , data .index , samples_per_window )
631+ data_max = data .values [H ].max (axis = 0 )
632+ data_max = _to_centered_series (data_max , data .index , samples_per_window )
633+ # shift to get forward difference, .diff() is backward difference instead
634+ data_diff = data .diff ().shift (- 1 )
635+ data_slope = data_diff / sample_interval
636+ data_slope_nstd = _slope_nstd_windowed (data , H , samples_per_window )
637+ data_slope_nstd = data_slope_nstd
638+
639+ return data_mean , data_max , data_slope_nstd , data_slope
640+
641+
642+ def _slope_nstd_windowed (data , H , samples_per_window ):
643+ with np .errstate (divide = 'ignore' , invalid = 'ignore' ):
644+ raw = np .diff (data )
645+ raw = raw [H [:- 1 , ]].std (ddof = 1 , axis = 0 ) / data .values [H ].mean (axis = 0 )
646+ return _to_centered_series (raw , data .index , samples_per_window )
647+
648+
649+ def _max_diff_windowed (data , H , samples_per_window ):
650+ raw = np .diff (data )
651+ raw = np .abs (raw [H [:- 1 , ]]).max (axis = 0 )
652+ return _to_centered_series (raw , data .index , samples_per_window )
653+
654+
655+ def _line_length_windowed (data , H , samples_per_window ,
656+ sample_interval ):
657+ raw = np .sqrt (np .diff (data )** 2. + sample_interval ** 2. )
658+ raw = np .sum (raw [H [:- 1 , ]], axis = 0 )
659+ return _to_centered_series (raw , data .index , samples_per_window )
660+
661+
662+ def _to_centered_series (vals , idx , samples_per_window ):
663+ vals = np .pad (vals , ((0 , len (idx ) - len (vals )),), mode = 'constant' ,
664+ constant_values = np .nan )
665+ shift = samples_per_window // 2 # align = 'center' only
666+ return pd .Series (index = idx , data = vals ).shift (shift )
667+
668+
669+ def _get_sample_intervals (times , win_length ):
670+ """ Calculates time interval and samples per window for Reno-style clear
671+ sky detection functions
672+ """
673+ deltas = np .diff (times .values ) / np .timedelta64 (1 , '60s' )
674+
675+ # determine if we can proceed
676+ if times .inferred_freq and len (np .unique (deltas )) == 1 :
677+ sample_interval = times [1 ] - times [0 ]
678+ sample_interval = sample_interval .seconds / 60 # in minutes
679+ samples_per_window = int (win_length / sample_interval )
680+ return sample_interval , samples_per_window
681+ else :
682+ raise NotImplementedError ('algorithm does not yet support unequal '
683+ 'times. consider resampling your data.' )
684+
685+
686+ def _clear_sample_index (clear_windows , samples_per_window , align , H ):
687+ """
688+ Returns indices of clear samples in clear windows
689+ """
690+ # H contains indices for each window, e.g. indices for the first window
691+ # are in first column of H.
692+ # clear_windows contains one boolean for each window and is aligned
693+ # by 'align', default to center
694+ # shift clear_windows.index to be aligned left (e.g. first value in the
695+ # left-most position) to line up with the first column of H.
696+
697+ # commented if/else block for future align='left', 'right' capability
698+ # if align == 'right':
699+ # shift = 1 - samples_per_window
700+ # elif align == 'center':
701+ # shift = - (samples_per_window // 2)
702+ # else:
703+ # shift = 0
704+ shift = - (samples_per_window // 2 )
705+ idx = clear_windows .shift (shift )
706+ # drop rows at the end corresponding to windows past the end of data
707+ idx = idx .drop (clear_windows .index [1 - samples_per_window :])
708+ idx = idx .astype (bool ) # shift changed type to object
709+ clear_samples = np .unique (H [:, idx ])
710+ return clear_samples
711+
712+
713+ def detect_clearsky (measured , clearsky , times = None , window_length = 10 ,
601714 mean_diff = 75 , max_diff = 75 ,
602715 lower_line_length = - 5 , upper_line_length = 10 ,
603716 var_diff = 0.005 , slope_dev = 8 , max_iterations = 20 ,
@@ -623,24 +736,25 @@ def detect_clearsky(measured, clearsky, times, window_length,
623736 Parameters
624737 ----------
625738 measured : array or Series
626- Time series of measured values.
739+ Time series of measured GHI. [W/m2]
627740 clearsky : array or Series
628- Time series of the expected clearsky values.
629- times : DatetimeIndex
630- Times of measured and clearsky values.
631- window_length : int
741+ Time series of the expected clearsky GHI. [W/m2]
742+ times : DatetimeIndex or None, default None.
743+ Times of measured and clearsky values. If None the index of measured
744+ will be used.
745+ window_length : int, default 10
632746 Length of sliding time window in minutes. Must be greater than 2
633747 periods.
634748 mean_diff : float, default 75
635749 Threshold value for agreement between mean values of measured
636- and clearsky in each interval, see Eq. 6 in [1].
750+ and clearsky in each interval, see Eq. 6 in [1]. [W/m2]
637751 max_diff : float, default 75
638752 Threshold value for agreement between maxima of measured and
639- clearsky values in each interval, see Eq. 7 in [1].
753+ clearsky values in each interval, see Eq. 7 in [1]. [W/m2]
640754 lower_line_length : float, default -5
641755 Lower limit of line length criterion from Eq. 8 in [1].
642- Criterion satisfied when
643- lower_line_length < line length difference < upper_line_length
756+ Criterion satisfied when lower_line_length < line length difference
757+ < upper_line_length.
644758 upper_line_length : float, default 10
645759 Upper limit of line length criterion from Eq. 8 in [1].
646760 var_diff : float, default 0.005
@@ -671,6 +785,13 @@ def detect_clearsky(measured, clearsky, times, window_length,
671785 detected clear_samples. Only provided if return_components is
672786 True.
673787
788+ Raises
789+ ------
790+ ValueError
791+ If measured is not a Series and times is not provided
792+ NotImplementedError
793+ If timestamps are not equally spaced
794+
674795 References
675796 ----------
676797 .. [1] Reno, M.J. and C.W. Hansen, "Identification of periods of clear
@@ -692,78 +813,81 @@ def detect_clearsky(measured, clearsky, times, window_length,
692813 * parameters are controllable via keyword arguments
693814 * option to return individual test components and clearsky scaling
694815 parameter
816+ * uses centered windows (Matlab function uses left-aligned windows)
695817 """
696818
697- # calculate deltas in units of minutes (matches input window_length units)
698- deltas = np .diff (times .values ) / np .timedelta64 (1 , '60s' )
819+ if times is None :
820+ try :
821+ times = measured .index
822+ except AttributeError :
823+ raise ValueError ("times is required when measured is not a Series" )
699824
700- # determine the unique deltas and if we can proceed
701- unique_deltas = np .unique (deltas )
702- if len (unique_deltas ) == 1 :
703- sample_interval = unique_deltas [0 ]
825+ # be polite about returning the same type as was input
826+ ispandas = isinstance (measured , pd .Series )
827+
828+ # for internal use, need a Series
829+ if not ispandas :
830+ meas = pd .Series (measured , index = times )
704831 else :
705- raise NotImplementedError ('algorithm does not yet support unequal '
706- 'times. consider resampling your data.' )
832+ meas = measured
707833
708- intervals_per_window = int (window_length / sample_interval )
834+ if not isinstance (clearsky , pd .Series ):
835+ clear = pd .Series (clearsky , index = times )
836+ else :
837+ clear = clearsky
709838
710- # generate matrix of integers for creating windows with indexing
711- from scipy .linalg import hankel
712- H = hankel (np .arange (intervals_per_window ), # noqa: N806
713- np .arange (intervals_per_window - 1 , len (times )))
839+ sample_interval , samples_per_window = _get_sample_intervals (times ,
840+ window_length )
714841
715- # convert pandas input to numpy array, but save knowledge of input state
716- # so we can return a series if that's what was originally provided
717- ispandas = isinstance (measured , pd .Series )
718- measured = np .asarray (measured )
719- clearsky = np .asarray (clearsky )
842+ # generate matrix of integers for creating windows with indexing
843+ H = hankel (np .arange (samples_per_window ),
844+ np .arange (samples_per_window - 1 , len (times )))
720845
721846 # calculate measurement statistics
722- meas_mean = np .mean (measured [H ], axis = 0 )
723- meas_max = np .max (measured [H ], axis = 0 )
724- meas_diff = np .diff (measured [H ], n = 1 , axis = 0 )
725- meas_slope = np .diff (measured [H ], n = 1 , axis = 0 ) / sample_interval
726- # matlab std function normalizes by N-1, so set ddof=1 here
727- meas_slope_nstd = np .std (meas_slope , axis = 0 , ddof = 1 ) / meas_mean
728- meas_line_length = np .sum (np .sqrt (
729- meas_diff * meas_diff +
730- sample_interval * sample_interval ), axis = 0 )
847+ meas_mean , meas_max , meas_slope_nstd , meas_slope = _calc_stats (
848+ meas , samples_per_window , sample_interval , H )
849+ meas_line_length = _line_length_windowed (
850+ meas , H , samples_per_window , sample_interval )
731851
732852 # calculate clear sky statistics
733- clear_mean = np .mean (clearsky [H ], axis = 0 )
734- clear_max = np .max (clearsky [H ], axis = 0 )
735- clear_diff = np .diff (clearsky [H ], n = 1 , axis = 0 )
736- clear_slope = np .diff (clearsky [H ], n = 1 , axis = 0 ) / sample_interval
737-
738- from scipy .optimize import minimize_scalar
739-
853+ clear_mean , clear_max , _ , clear_slope = _calc_stats (
854+ clear , samples_per_window , sample_interval , H )
855+
856+ # find a scaling factor for the clear sky time series that minimizes the
857+ # RMSE between the clear times identified in the measured data and the
858+ # scaled clear sky time series. Optimization to determine the scaling
859+ # factor considers all identified clear times, which is different from [1]
860+ # where the scaling factor was determined from clear times on days with
861+ # at least 50% of the day being identified as clear.
740862 alpha = 1
741863 for iteration in range (max_iterations ):
742- clear_line_length = np . sum ( np . sqrt (
743- alpha * alpha * clear_diff * clear_diff +
744- sample_interval * sample_interval ), axis = 0 )
864+ scaled_clear = alpha * clear
865+ clear_line_length = _line_length_windowed (
866+ scaled_clear , H , samples_per_window , sample_interval )
745867
746868 line_diff = meas_line_length - clear_line_length
747-
869+ slope_max_diff = _max_diff_windowed (
870+ meas - scaled_clear , H , samples_per_window )
748871 # evaluate comparison criteria
749872 c1 = np .abs (meas_mean - alpha * clear_mean ) < mean_diff
750873 c2 = np .abs (meas_max - alpha * clear_max ) < max_diff
751874 c3 = (line_diff > lower_line_length ) & (line_diff < upper_line_length )
752875 c4 = meas_slope_nstd < var_diff
753- c5 = np .max (np .abs (meas_slope -
754- alpha * clear_slope ), axis = 0 ) < slope_dev
876+ c5 = slope_max_diff < slope_dev
755877 c6 = (clear_mean != 0 ) & ~ np .isnan (clear_mean )
756878 clear_windows = c1 & c2 & c3 & c4 & c5 & c6
757879
758880 # create array to return
759- clear_samples = np .full_like (measured , False , dtype = 'bool' )
881+ clear_samples = np .full_like (meas , False , dtype = 'bool' )
760882 # find the samples contained in any window classified as clear
761- clear_samples [np .unique (H [:, clear_windows ])] = True
883+ idx = _clear_sample_index (clear_windows , samples_per_window , 'center' ,
884+ H )
885+ clear_samples [idx ] = True
762886
763887 # find a new alpha
764888 previous_alpha = alpha
765- clear_meas = measured [clear_samples ]
766- clear_clear = clearsky [clear_samples ]
889+ clear_meas = meas [clear_samples ]
890+ clear_clear = clear [clear_samples ]
767891
768892 def rmse (alpha ):
769893 return np .sqrt (np .mean ((clear_meas - alpha * clear_clear )** 2 ))
@@ -773,7 +897,7 @@ def rmse(alpha):
773897 break
774898 else :
775899 import warnings
776- warnings .warn ('failed to converge after %s iterations'
900+ warnings .warn ('rescaling failed to converge after %s iterations'
777901 % max_iterations , RuntimeWarning )
778902
779903 # be polite about returning the same type as was input
@@ -794,8 +918,7 @@ def rmse(alpha):
794918 components ['max_diff' ] = np .abs (meas_max - alpha * clear_max )
795919 components ['line_length' ] = meas_line_length - clear_line_length
796920 components ['slope_nstd' ] = meas_slope_nstd
797- components ['slope_max' ] = (np .max (
798- meas_slope - alpha * clear_slope , axis = 0 ))
921+ components ['slope_max' ] = slope_max_diff
799922
800923 return clear_samples , components , alpha
801924 else :
0 commit comments