Merge pull request #302 from HERA-Team/version030

nkern · web-flow · commit 10c290fd33e3 · 2021-03-02T11:41:04.000-05:00
Version 0.3.0
diff --git a/docs/conf.py b/docs/conf.py
@@ -26,12 +26,13 @@
 # Mock-import modules to allow build to complete without throwing errors
 import mock
 MOCK_MODULES = ['numpy', 'scipy', 'scipy.interpolate', 'scipy.integrate', 
-                'pyuvdata', 'h5py', 'aipy', 'omnical', 'linsolve', 'hera_qm', 
-                'uvtools', 'uvtools.dspec', 'hera_cal', 'hera_cal.utils', 'healpy', 
-                'scikit-learn', 'astropy', 'astropy.cosmology', 'astropy.units', 
-                'astropy.constants', 'matplotlib', 'matplotlib.pyplot', 
-                'pylab', 'yaml', 'pyuvdata.utils', ]
-                
+                'pyuvdata', 'h5py', 'aipy', 'aipy.const', 'omnical', 'linsolve', 
+                'hera_qm', 'uvtools', 'uvtools.dspec', 'hera_cal', 
+                'hera_cal.utils', 'healpy', 'scikit-learn', 'astropy', 
+                'astropy.cosmology', 'astropy.units', 'astropy.constants', 
+                'matplotlib', 'matplotlib.pyplot', 'pylab', 'yaml', 
+                'pyuvdata.utils', 'hera_sim']
+
 for mod_name in MOCK_MODULES:
     sys.modules[mod_name] = mock.Mock()
 
diff --git a/hera_pspec/VERSION b/hera_pspec/VERSION
@@ -1 +1 @@
-0.2.0
+0.3.0
diff --git a/hera_pspec/grouping.py b/hera_pspec/grouping.py
@@ -825,7 +825,7 @@ def spherical_average(uvp_in, kbins, bin_widths, blpair_groups=None, time_avg=Fa
             # bin covariance: C_sph = H.T C_cyl H
             # cm shape (Npols, Ntimes, Ndlyblps, Ndlyblps)
             cm = np.moveaxis(cov_array_real[spw], -1, 0)
-            cm = Ht @ cm @ H
+            cm = Ht @ cm.clip(-1e40, 1e40) @ H  # clip infs
             cov_array_real[spw] = np.moveaxis(cm, 0, -1)
             cov_array_imag[spw] = np.zeros_like(cov_array_real[spw])
 
@@ -874,7 +874,7 @@ def spherical_average(uvp_in, kbins, bin_widths, blpair_groups=None, time_avg=Fa
     uvp.lst_2_array = np.unique(uvp_in.lst_2_array)
 
     # Add to history
-    uvp.history += version.history_string(notes=add_to_history)
+    uvp.history = "Spherically averaged with hera_pspec [{}]\n{}\n{}\n{}".format(version.git_hash[:15], add_to_history, '-'*40, uvp.history)
 
     # validity check
     if run_check:
diff --git a/hera_pspec/plot.py b/hera_pspec/plot.py
@@ -301,7 +301,7 @@ def delay_waterfall(uvp, blpairs, spw, pol, component='abs-real',
                     deltasq=False, log=True, lst_in_hrs=True,
                     vmin=None, vmax=None, cmap='YlGnBu', axes=None, 
                     figsize=(14, 6), force_plot=False, times=None, 
-                    title_type='blpair', colorbar=True):
+                    title_type='blpair', colorbar=True, **kwargs):
     """
     Plot a 1D delay spectrum waterfall (or spectra) for a group of baselines.
     
@@ -374,6 +374,9 @@ def delay_waterfall(uvp, blpairs, spw, pol, component='abs-real',
     colorbar : bool, optional
         Whether to make a colorbar. Default: True
 
+    kwargs : keyword arguments
+        Additional kwargs to pass to ax.matshow()
+
     Returns
     -------
     fig : matplotlib.pyplot.Figure
@@ -558,7 +561,7 @@ def delay_waterfall(uvp, blpairs, spw, pol, component='abs-real',
             # plot waterfall
             cax = ax.matshow(waterfall[key], cmap=cmap, aspect='auto', 
                              vmin=vmin, vmax=vmax, 
-                             extent=[x[0], x[-1], y[-1], y[0]])
+                             extent=[x[0], x[-1], y[-1], y[0]], **kwargs)
 
             # ax config
             ax.xaxis.set_ticks_position('bottom')
diff --git a/hera_pspec/pspecdata.py b/hera_pspec/pspecdata.py
diff --git a/hera_pspec/tests/test_grouping.py b/hera_pspec/tests/test_grouping.py
@@ -469,6 +469,13 @@ def test_spherical_average():
     # bug check: in the past, combine after spherical average erroneously changed dly_array
     assert sph == sph_c
 
+    # insert an inf into the arrays as a test
+    uvp2 = copy.deepcopy(uvp)
+    uvp2.cov_array_real[0][0], uvp2.cov_array_imag[0][0] = np.inf, np.inf
+    uvp2.stats_array['err'][0][0] = np.inf
+    sph = grouping.spherical_average(uvp, kbins, bin_widths)
+    assert np.isfinite(sph.cov_array_real[0]).all()
+
     # exceptions
     nt.assert_raises(AssertionError, grouping.spherical_average, uvp, kbins, 1.0)
 
diff --git a/hera_pspec/tests/test_noise.py b/hera_pspec/tests/test_noise.py
@@ -159,22 +159,27 @@ def test_analytic_noise():
     # get P_N estimate
     auto_Tsys = utils.uvd_to_Tsys(uvd, beam, os.path.join(DATA_PATH, "test_uvd.uvh5"))
     assert os.path.exists(os.path.join(DATA_PATH, "test_uvd.uvh5"))
-    utils.uvp_noise_error(uvp, auto_Tsys, err_type=['P_N','P_SN'])
+    utils.uvp_noise_error(uvp, auto_Tsys, err_type=['P_N','P_SN'], P_SN_correction=False)
 
     # check consistency of 1-sigma standard dev. to 1%
     cov_diag = uvp.cov_array_real[0][:, range(Nchan), range(Nchan)]
     stats_diag = uvp.stats_array['P_N'][0]
     frac_ratio = (cov_diag**0.5 - stats_diag) / stats_diag
-
     assert np.abs(frac_ratio).mean() < 0.01
 
     ## check P_SN consistency of 1-sigma standard dev. to 1%
     cov_diag = uvp_fg.cov_array_real[0][:, range(Nchan), range(Nchan)]
     stats_diag = uvp.stats_array['P_SN'][0]
     frac_ratio = (cov_diag**0.5 - stats_diag) / stats_diag
-
     assert np.abs(frac_ratio).mean() < 0.01
 
+    # now compute unbiased P_SN and check that it matches P_N at high-k    
+    utils.uvp_noise_error(uvp, auto_Tsys, err_type=['P_N','P_SN'], P_SN_correction=True)
+    frac_ratio = (uvp.stats_array["P_SN"][0] - uvp.stats_array["P_N"][0]) / uvp.stats_array["P_N"][0]
+    dlys = uvp.get_dlys(0) * 1e9
+    select = np.abs(dlys) > 3000
+    assert np.abs(frac_ratio[:, select].mean()) < 1 / np.sqrt(uvp.Nblpairts)
+
     # clean up
     os.remove(os.path.join(DATA_PATH, "test_uvd.uvh5"))
 
diff --git a/hera_pspec/utils.py b/hera_pspec/utils.py
@@ -1301,7 +1301,7 @@ def uvd_to_Tsys(uvd, beam, Tsys_outfile=None):
     return uvd
 
 
-def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None):
+def uvp_noise_error(uvp, auto_Tsys=None, err_type='P_N', precomp_P_N=None, P_SN_correction=True):
     """
     Calculate analytic thermal noise error for a UVPSpec object.
     Adds to uvp.stats_array inplace.
@@ -1313,9 +1313,10 @@ def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None):
         If err_type == 'P_SN', uvp should not have any
         incoherent averaging applied.
 
-    auto_Tsys : UVData object
+    auto_Tsys : UVData object, optional
         Holds autocorrelation Tsys estimates in Kelvin (see uvd_to_Tsys)
         for all antennas and polarizations involved in uvp power spectra.
+        Needed for P_N computation, not needed if feeding precomp_P_N.
 
     err_type : str or list of str, options = ['P_N', 'P_SN']
         Type of thermal noise error to compute. P_N is the standard
@@ -1326,19 +1327,24 @@ def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None):
         which uses uses Re[P(tau)] as a proxy for P_S.
         To store both, feed as err_type = ['P_N', 'P_SN']
 
-    precomp_P_N : str
+    precomp_P_N : str, optional
         If computing P_SN and P_N is already computed, use this key
         to index stats_array for P_N rather than computing it from auto_Tsys.
+
+    P_SN_correctoin : bool, optional
+        Apply correction factor if computing P_SN to account for double
+        counting of noise.
     """
     from hera_pspec import uvpspec_utils
 
     # type checks
     if isinstance(err_type, str):
         err_type = [err_type]
 
-    # get metadata
-    lsts = np.unique(auto_Tsys.lst_array)
-    freqs = auto_Tsys.freq_array[0]
+    # get metadata if needed
+    if precomp_P_N is None:
+        lsts = np.unique(auto_Tsys.lst_array)
+        freqs = auto_Tsys.freq_array[0]
 
     # iterate over spectral window
     for spw in uvp.spw_array:
@@ -1393,10 +1399,47 @@ def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None):
                 if 'P_SN' in err_type:
                     # calculate P_SN: see Tan+2020 and
                     # H1C_IDR2/notebooks/validation/errorbars_with_systematics_and_noise.ipynb
+                    # get signal proxy
                     P_S = uvp.get_data(key).real
+                    # clip negative values
                     P_S[P_S < 0] = 0
                     P_SN = np.sqrt(np.sqrt(2) * P_S * P_N + P_N**2)
                     # catch nans, set to inf
                     P_SN[np.isnan(P_SN)] = np.inf
                     # set stats
                     uvp.set_stats('P_SN', key, P_SN)
+
+    # P_SN correction
+    if P_SN_correction and "P_SN" in err_type:
+        if precomp_P_N is None:
+            precomp_P_N = 'P_N'
+        apply_P_SN_correction(uvp, P_SN='P_SN', P_N=precomp_P_N)
+
+
+def apply_P_SN_correction(uvp, P_SN='P_SN', P_N='P_N'):
+    """
+    Apply correction factor to P_SN errorbar in stats_array to account
+    for double counting of noise by using data as proxy for signal.
+    See Jianrong Tan et al. 2021 (Errorbar methodologies).
+    Operates in place. Must have both P_SN and P_N in stats_array.
+
+    Args:
+        uvp : UVPSpec object
+            With P_SN and P_N errorbar (not variance) as a key in stats_array
+        P_SN : str
+            Key in stats_array for P_SN errorbar
+        P_N : str
+            Key in stats_array for P_N errorbar
+    """
+    assert P_SN in uvp.stats_array
+    assert P_N in uvp.stats_array
+    for spw in uvp.spw_array:
+        # get P_SN and P_N
+        p_n = uvp.stats_array[P_N][spw]
+        p_sn = uvp.stats_array[P_SN][spw]
+        # derive correction
+        corr = 1 - (np.sqrt(1 / np.sqrt(np.pi) + 1) - 1) * p_n.real / p_sn.real.clip(1e-40, np.inf)
+        corr[np.isclose(corr, 0)] = np.inf
+        corr[corr < 0] = np.inf
+        # apply correction
+        uvp.stats_array[P_SN][spw] *= corr
diff --git a/hera_pspec/uvpspec.py b/hera_pspec/uvpspec.py
@@ -362,24 +362,22 @@ def get_integrations(self, key, omit_flags=False):
 
     def get_r_params(self):
         """
-        decompress r_params dictionary (so it can be readily used).
-        in a pspecdata object, r_parms are stored as a dictionary
-        with one entry per baseline.
-        In uvpspec, the dictionary is compressed so that a single r_param entry
-        correspondsto multiple baselines and is stored as a json format string.
-        get_r_params() reads the compressed string and returns the dictionary
-        with the format
-        r_params: dictionary with parameters for weighting matrix.
-          Proper fields
-          and formats depend on the mode of data_weighting.
-          data_weighting == 'dayenu':
-            dictionary with fields
-            'filter_centers', list of floats (or float) specifying the (delay) channel numbers
-                              at which to center filtering windows. Can specify fractional channel number.
-            'filter_half_widths', list of floats (or float) specifying the width of each
-                             filter window in (delay) channel numbers. Can specify fractional channel number.
-            'filter_factors', list of floats (or float) specifying how much power within each filter window
-                              is to be suppressed.
+        Return an `r_params` dictionary.
+        
+        In a `PSpecData` object, the `r_params` are stored as a dictionary with 
+        one entry per baseline.
+        
+        In a `UVPSpec` object, the dictionary is compressed so that a single 
+        `r_param` entry correspondsto multiple baselines and is stored as a 
+        JSON format string.
+        
+        This function reads the compressed string and returns the dictionary
+        with the correct following fields and structure.
+        
+        Returns
+        -------
+        r_params : dict
+            Dictionary of r_params for this object.
         """
         return uvputils.decompress_r_params(self.r_params)
 
@@ -1776,13 +1774,15 @@ def generate_noise_spectra(self, spw, polpair, Tsys, blpairs=None,
         where scalar is the cosmological and beam scalar (i.e. X2Y * Omega_eff)
         calculated from pspecbeam with noise_scalar = True, integration_time is
         in seconds and comes from self.integration_array and Nincoherent is the
-        number of incoherent averaging samples and comes from
-        self.nsample_array. If component is 'real' or 'imag', P_N is divided by
-        an additional factor of sqrt(2).
+        number of incoherent averaging samples and comes from `self.nsample_array`. 
+        If `component` is `real` or `imag`, P_N is divided by an additional 
+        factor of sqrt(2).
 
         If the polarizations specified are pseudo Stokes pol (I, Q, U or V)
         then an extra factor of 2 is divided.
-        If form == 'DelSq' then a factor of |k|^3 / (2pi^2) is multiplied.
+        
+        If `form` is `DelSq` then a factor of `|k|^3 / (2pi^2)` is multiplied.
+        
         If real is True, a factor of sqrt(2) is divided to account for
         discarding imaginary noise component.
 
@@ -1822,8 +1822,9 @@ def generate_noise_spectra(self, spw, polpair, Tsys, blpairs=None,
             Number of frequency bins to use in integrating power spectrum
             scalar in pspecbeam. Default: 2000.
 
-        component : str, options=['real', 'imag', 'abs']
-            If component is real or imag, divide by an extra factor of sqrt(2)
+        component : str
+            Options=['real', 'imag', 'abs'].
+            If component is real or imag, divide by an extra factor of sqrt(2).
 
         Returns
         -------
@@ -1909,14 +1910,14 @@ def generate_noise_spectra(self, spw, polpair, Tsys, blpairs=None,
 
     def average_spectra(self, blpair_groups=None, time_avg=False,
                         blpair_weights=None, error_field=None, error_weights=None,
-                        inplace=True):
+                        inplace=True, add_to_history=''):
         """
         Average power spectra across the baseline-pair-time axis, weighted by
         each spectrum's integration time.
 
         This is an "incoherent" average, in the sense that this averages power
-        spectra, rather than visibility data. The 'nsample_array' and
-        'integration_array' will be updated to reflect the averaging.
+        spectra, rather than visibility data. The `nsample_array` and
+        `integration_array` will be updated to reflect the averaging.
 
         In the case of averaging across baseline pairs, the resultant averaged
         spectrum is assigned to the zeroth blpair in the group. In the case of
@@ -1929,14 +1930,15 @@ def average_spectra(self, blpair_groups=None, time_avg=False,
         equivalent to cylindrical binning in 3D k-space.
 
         If you want help constructing baseline-pair groups from baseline
-        groups, see self.get_blpair_groups_from_bl_groups.
+        groups, see `self.get_blpair_groups_from_bl_groups`.
 
         Parameters
         ----------
         blpair_groups : list
             List of list of baseline-pair group tuples or integers. All power
             spectra in a baseline-pair group are averaged together. If a
             baseline-pair exists in more than one group, a warning is raised.
+            
             Examples::
 
                 blpair_groups = [ [((1, 2), (1, 2)), ((2, 3), (2, 3))],
@@ -1951,6 +1953,7 @@ def average_spectra(self, blpair_groups=None, time_avg=False,
         blpair_weights : list, optional
             List of float or int weights dictating the relative weight of each
             baseline-pair when performing the average.
+            
             This is useful for bootstrapping. This should have the same shape
             as blpair_groups if specified. The weights are automatically
             normalized within each baseline-pair group. Default: None (all
@@ -1965,17 +1968,21 @@ def average_spectra(self, blpair_groups=None, time_avg=False,
 
         error_weights: string, optional
             error_weights specify which kind of errors we use for weights 
-            during averaging power spectra.
-            The weights are defined as $w_i = 1/ sigma_i^2$, 
-            where $sigma_i$ is taken from the relevant field of stats_array.
-            If `error_weight' is set to None, which means we just use the 
-            integration time as weights. If error_weights is specified,
-            then it also gets appended to error_field as a list.
-            Default: None
+            during averaging power spectra. The weights are defined as 
+            $w_i = 1/ sigma_i^2$, where $sigma_i$ is taken from the relevant 
+            field of stats_array.
+            
+            If `error_weight` is set to `None`, which means we just use the 
+            integration time as weights. If `error_weights` is specified,
+            then it also gets appended to `error_field` as a list.
+            Default: None.
 
         inplace : bool, optional
-            If True, edit data in self, else make a copy and return. Default:
-            True.
+            If True, edit data in self, else make a copy and return. 
+            Default: True.
+
+        add_to_history : str, optional
+            Added text to add to file history.
 
         Notes
         -----
@@ -1991,14 +1998,14 @@ def average_spectra(self, blpair_groups=None, time_avg=False,
                                      error_field=error_field,
                                      error_weights=error_weights,
                                      blpair_weights=blpair_weights,
-                                     inplace=True)
+                                     inplace=True, add_to_history=add_to_history)
         else:
             return grouping.average_spectra(self, blpair_groups=blpair_groups,
                                             time_avg=time_avg,
                                             error_field=error_field,
                                             error_weights=error_weights,
                                             blpair_weights=blpair_weights,
-                                            inplace=False)
+                                            inplace=False, add_to_history=add_to_history)
 
     def fold_spectra(self):
         """