|
| 1 | +# %% |
| 2 | +import numpy as np |
| 3 | +from scipy.stats import linregress |
| 4 | +import pandas as pd |
| 5 | +import xarray as xr |
| 6 | +from src import readwrite_functions as rwfuncs |
| 7 | +from src.plots_functions import plot_regression |
| 8 | + |
| 9 | +# %% Read csv BTs |
| 10 | + |
| 11 | +flights = pd.read_csv("flights.csv", index_col=0) |
| 12 | +flights_processed = flights[ |
| 13 | + (flights["location"] == "sal") | (flights["location"] == "barbados") |
| 14 | +] |
| 15 | + |
| 16 | +TB_arts_list = [] |
| 17 | +TB_hamp_list = [] |
| 18 | +for date in flights_processed.index: |
| 19 | + TB_arts_list.append( |
| 20 | + pd.read_csv(f"Data/arts_calibration/HALO-{date}a/TBs_arts.csv", index_col=0) |
| 21 | + ) |
| 22 | + TB_hamp_list.append( |
| 23 | + pd.read_csv(f"Data/arts_calibration/HALO-{date}a/TBs_hamp.csv", index_col=0) |
| 24 | + ) |
| 25 | + |
| 26 | +# load dropsonde data |
| 27 | +configfile = "config_ipns.yaml" |
| 28 | +cfg = rwfuncs.extract_config_params(configfile) |
| 29 | +ds_dropsonde = xr.open_dataset(cfg["path_dropsondes"], engine="zarr") |
| 30 | + |
| 31 | +# restructure data |
| 32 | +TB_arts = pd.concat(TB_arts_list, axis=1) |
| 33 | +TB_hamp = pd.concat(TB_hamp_list, axis=1) |
| 34 | +launch_time = ds_dropsonde.sel(sonde_id=TB_arts.columns).launch_time.values |
| 35 | +TB_arts.columns = launch_time |
| 36 | +TB_hamp.columns = launch_time |
| 37 | +TB_arts = TB_arts.T.dropna() |
| 38 | +TB_hamp = TB_hamp.T.dropna() |
| 39 | + |
| 40 | +# %% drop extreme outliers |
| 41 | +TB_arts_std = TB_arts.std("index") |
| 42 | +TB_hamp_std = TB_hamp.std("index") |
| 43 | +TB_arts_med = TB_arts.median("index") |
| 44 | +TB_hamp_med = TB_hamp.median("index") |
| 45 | +TB_arts = TB_arts.where((TB_arts - TB_arts_med).abs() < 3 * TB_arts_std) |
| 46 | +TB_hamp = TB_hamp.where((TB_hamp - TB_hamp_med).abs() < 3 * TB_hamp_std) |
| 47 | + |
| 48 | +# %% count NaN values |
| 49 | +mask_arts = ((TB_arts - TB_arts_med).abs() < 2 * TB_arts_std).mean("index") |
| 50 | +mask_hamp = ((TB_hamp - TB_hamp_med).abs() < 2 * TB_hamp_std).mean("index") |
| 51 | +mask_arts |
| 52 | + |
| 53 | +# %% |
| 54 | +mask_hamp |
| 55 | + |
| 56 | + |
| 57 | +# %% |
| 58 | +multi_index = pd.MultiIndex.from_product( |
| 59 | + [TB_arts.columns, ["slope", "intercept"]], names=["frequency", "parameter"] |
| 60 | +) |
| 61 | +regression_coeffs = pd.DataFrame( |
| 62 | + index=np.unique(TB_arts.index.date), columns=multi_index |
| 63 | +) |
| 64 | + |
| 65 | +for date in flights_processed.index: |
| 66 | + date = pd.to_datetime(date, format="%Y%m%d").date() |
| 67 | + TB_arts_day = TB_arts.loc[TB_arts.index.date == date] |
| 68 | + TB_hamp_day = TB_hamp.loc[TB_hamp.index.date == date] |
| 69 | + |
| 70 | + for f in TB_arts_day.columns: |
| 71 | + # Flatten the arrays and filter out NaN values |
| 72 | + x = TB_hamp_day[f].values.flatten() |
| 73 | + y = TB_arts_day[f].values.flatten() |
| 74 | + mask = ~np.isnan(x) & ~np.isnan(y) |
| 75 | + x = x[mask] |
| 76 | + y = y[mask] |
| 77 | + |
| 78 | + if len(x) > 0 and len(y) > 0: # Ensure there are values left after filtering |
| 79 | + regression = linregress(x, y) |
| 80 | + regression_coeffs.loc[date, (f, "slope")] = regression.slope |
| 81 | + regression_coeffs.loc[date, (f, "intercept")] = regression.intercept |
| 82 | + |
| 83 | +# %% plot regressions |
| 84 | +plot_regression(regression_coeffs, TB_arts, TB_hamp, date) |
| 85 | + |
| 86 | +# %% |
0 commit comments