diff --git a/.gitignore b/.gitignore index 213e427..65077cc 100644 --- a/.gitignore +++ b/.gitignore @@ -166,11 +166,11 @@ cython_debug/ /planet_data /Radiometer_Data /Radar_Data -/Data +/Data/arts_calibration +/Plots /arts_calibration /.vscode # other or unknown **/.DS_Store **/config.yaml -**/process_config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 213d6fa..daee8ca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,11 +31,6 @@ repos: description: python linter - id: ruff-format description: python formatter -- repo: https://github.com/psf/black-pre-commit-mirror - rev: 24.3.0 - hooks: - - id: black-jupyter - description: formating ipynb notebook - repo: https://github.com/compilerla/conventional-pre-commit rev: v3.4.0 hooks: diff --git a/config_ipns.yaml b/config_ipns.yaml new file mode 100644 index 0000000..4017682 --- /dev/null +++ b/config_ipns.yaml @@ -0,0 +1,9 @@ +date: '20240926' +flightletter: a +flightname: HALO-{date}{flightletter} +iwv: ipns://latest.orcestra-campaign.org/products/HALO/iwv/HALO-{date}{flightletter}.zarr +path_dropsondes: ipns://latest.orcestra-campaign.org/products/HALO/dropsondes/Level_3/PERCUSION_Level_3.zarr +path_saveplots: '' +position_attitude: ipns://latest.orcestra-campaign.org/products/HALO/position_attitude/HALO-{date}{flightletter}.zarr +radar: ipns://latest.orcestra-campaign.org/products/HALO/radar/moments/HALO-{date}{flightletter}.zarr +radiometer: ipns://latest.orcestra-campaign.org/products/HALO/radiometer/HALO-{date}{flightletter}.zarr diff --git a/error_files/HALO-20240818.yaml b/error_files/HALO-20240818.yaml new file mode 100644 index 0000000..e310ef4 --- /dev/null +++ b/error_files/HALO-20240818.yaml @@ -0,0 +1 @@ +'KV': [["17:01:00", "17:11:30"]] diff --git a/error_files/HALO-20240822.yaml b/error_files/HALO-20240822.yaml new file mode 100644 index 0000000..99a564c --- /dev/null +++ b/error_files/HALO-20240822.yaml @@ -0,0 +1 @@ +"90": [["18:23:30", "18:23:40"]] diff --git a/error_files/HALO-20240825.yaml b/error_files/HALO-20240825.yaml new file mode 100644 index 0000000..9ca3997 --- /dev/null +++ b/error_files/HALO-20240825.yaml @@ -0,0 +1 @@ +"90": [["17:33:20", "17:33:40"]] diff --git a/error_files/HALO-20240829.yaml b/error_files/HALO-20240829.yaml new file mode 100644 index 0000000..a8881be --- /dev/null +++ b/error_files/HALO-20240829.yaml @@ -0,0 +1 @@ +"90": [["20:00:00", "20:00:20"]] diff --git a/error_files/HALO-20240831.yaml b/error_files/HALO-20240831.yaml new file mode 100644 index 0000000..7e39e2e --- /dev/null +++ b/error_files/HALO-20240831.yaml @@ -0,0 +1 @@ +"90": [["14:05:10", "14:05:25"]] diff --git a/error_files/HALO-20240909.yaml b/error_files/HALO-20240909.yaml new file mode 100644 index 0000000..1524318 --- /dev/null +++ b/error_files/HALO-20240909.yaml @@ -0,0 +1 @@ +"90": [["13:01:00", "13:08:00"]] diff --git a/error_files/HALO-20240912.yaml b/error_files/HALO-20240912.yaml new file mode 100644 index 0000000..0fd0900 --- /dev/null +++ b/error_files/HALO-20240912.yaml @@ -0,0 +1 @@ +"119": [["12:09:00", "12:20:30"], ["15:23:00", "15:30:30"]] diff --git a/error_files/HALO-20240914.yaml b/error_files/HALO-20240914.yaml new file mode 100644 index 0000000..e030421 --- /dev/null +++ b/error_files/HALO-20240914.yaml @@ -0,0 +1,2 @@ +"KV": [["18:02:00", "18:10:30"]] +"90": [["13:01:00", "13:10:30"]] diff --git a/error_files/HALO-20240916.yaml b/error_files/HALO-20240916.yaml new file mode 100644 index 0000000..ae1c471 --- /dev/null +++ b/error_files/HALO-20240916.yaml @@ -0,0 +1 @@ +"KV": [["12:28:00", "12:30:30"]] diff --git a/error_files/HALO-20240919.yaml b/error_files/HALO-20240919.yaml new file mode 100644 index 0000000..017b104 --- /dev/null +++ b/error_files/HALO-20240919.yaml @@ -0,0 +1 @@ +"119": [["14:46:00", "14:50:30"]] diff --git a/error_files/HALO-20240921.yaml b/error_files/HALO-20240921.yaml new file mode 100644 index 0000000..f0e4eba --- /dev/null +++ b/error_files/HALO-20240921.yaml @@ -0,0 +1 @@ +"90": [["12:57:31", "12:57:38"]] diff --git a/error_files/HALO-20240926.yaml b/error_files/HALO-20240926.yaml new file mode 100644 index 0000000..822171f --- /dev/null +++ b/error_files/HALO-20240926.yaml @@ -0,0 +1,4 @@ +'119': [['12:37:00', '12:40:30'], + ['17:48:00','18:00:30'], + ['18:34:00','18:40:30'], + ['19:12:00', '19:20:30']] diff --git a/error_files/HALO-20240928.yaml b/error_files/HALO-20240928.yaml new file mode 100644 index 0000000..2811e70 --- /dev/null +++ b/error_files/HALO-20240928.yaml @@ -0,0 +1 @@ +"119": [["16:39:00", "16:40:00"]] diff --git a/error_files/HALO-20241105.yaml b/error_files/HALO-20241105.yaml new file mode 100644 index 0000000..6c64b4a --- /dev/null +++ b/error_files/HALO-20241105.yaml @@ -0,0 +1 @@ +"119": [["16:16:00", "16:20:30"]] diff --git a/error_files/HALO-20241107.yaml b/error_files/HALO-20241107.yaml new file mode 100644 index 0000000..2a2c60a --- /dev/null +++ b/error_files/HALO-20241107.yaml @@ -0,0 +1,2 @@ +"183": [["15:21:00", "15:30:30"]] +"119": [["14:47:30", "14:50:30"]] diff --git a/error_files/HALO-20241110.yaml b/error_files/HALO-20241110.yaml new file mode 100644 index 0000000..2db69ad --- /dev/null +++ b/error_files/HALO-20241110.yaml @@ -0,0 +1 @@ +"119": [["10:28:30", "10:30:30"]] diff --git a/error_files/HALO-20241112.yaml b/error_files/HALO-20241112.yaml new file mode 100644 index 0000000..1ced17f --- /dev/null +++ b/error_files/HALO-20241112.yaml @@ -0,0 +1 @@ +"119": [["11:44:30", "12:03:00"]] diff --git a/error_files/HALO-20241116.yaml b/error_files/HALO-20241116.yaml new file mode 100644 index 0000000..ff3a47e --- /dev/null +++ b/error_files/HALO-20241116.yaml @@ -0,0 +1 @@ +"119": [["17:04:00", "17:10:30"]] diff --git a/error_files/HALO-20241119.yaml b/error_files/HALO-20241119.yaml new file mode 100644 index 0000000..92a7911 --- /dev/null +++ b/error_files/HALO-20241119.yaml @@ -0,0 +1 @@ +"90": [["10:02:18", "10:02:23"]] diff --git a/flights.csv b/flights.csv new file mode 100644 index 0000000..ef7c7fa --- /dev/null +++ b/flights.csv @@ -0,0 +1,33 @@ +,flightletter,location +20240809,b,transfer +20240811,a,sal +20240813,a,sal +20240816,a,sal +20240818,a,sal +20240821,a,sal +20240822,a,sal +20240825,a,sal +20240827,a,sal +20240829,a,sal +20240831,a,sal +20240903,a,sal +20240906,a,transfer +20240907,a,barbados +20240909,a,barbados +20240912,a,barbados +20240914,a,barbados +20240916,a,barbados +20240919,a,barbados +20240921,a,barbados +20240923,a,barbados +20240924,a,barbados +20240926,a,barbados +20240928,a,barbados +20240929,a,transfer +20241105,a,op +20241107,a,op +20241110,a,op +20241112,b,op +20241114,b,op +20241116,a,op +20241119,a,op diff --git a/process_config.yaml b/process_config.yaml new file mode 100644 index 0000000..634af01 --- /dev/null +++ b/process_config.yaml @@ -0,0 +1,8 @@ +date: '20240827' +flightletter: a +flightname: HALO-{date}{flightletter} +bahamas: ipns://latest.orcestra-campaign.org/products/HALO/position_attitude.zarr +radar: ipns://latest.orcestra-campaign.org/raw/HALO/radar/HALO-{date}{flightletter}/mom/*.nc +radiometer: ipns://latest.orcestra-campaign.org/raw/HALO/radiometer/HALO-{date}{flightletter} +sea_land_mask: /work/bm1183/m301049/orcestra/sea_land_mask.nc +save_dir: /work/bm1183/m301049/orcestra/Hamp_Processed_vn2 diff --git a/scripts/arts_calibration/analyse_bt_diffs.py b/scripts/arts_calibration/analyse_bt_diffs.py index d33273a..73279f5 100644 --- a/scripts/arts_calibration/analyse_bt_diffs.py +++ b/scripts/arts_calibration/analyse_bt_diffs.py @@ -2,7 +2,8 @@ import pandas as pd import matplotlib.pyplot as plt import xarray as xr -from src import readwrite_functions as rwfuncs +import yaml +import numpy as np # %% define frequencies freq_k = [22.24, 23.04, 23.84, 25.44, 26.24, 27.84, 31.40] @@ -41,38 +42,16 @@ "20240928", ] -next_flight_183 = ["202408"] # %% Read csv BTs -dates = [ - "20240811", - "20240813", - "20240816", - "20240818", - "20240821", - "20240822", - "20240825", - "20240827", - "20240829", - "20240831", - "20240903", - "20240906", - "20240907", - "20240909", - "20240912", - "20240914", - "20240916", - "20240919", - "20240921", - "20240923", - "20240924", - "20240926", - "20240928", +flights = pd.read_csv("flights.csv", index_col=0) +flights_processed = flights[ + (flights["location"] == "sal") | (flights["location"] == "barbados") ] TB_arts_list = [] TB_hamp_list = [] -for date in dates: +for date in flights_processed.index: TB_arts_list.append( pd.read_csv(f"Data/arts_calibration/HALO-{date}a/TBs_arts.csv", index_col=0) ) @@ -81,18 +60,25 @@ ) # load dropsonde data -configfile = "config_ipns.yaml" -cfg = rwfuncs.extract_config_params(configfile) -ds_dropsonde = xr.open_dataset(cfg["path_dropsondes"], engine="zarr") +with open("process_config.yaml") as f: + cfg = yaml.safe_load(f) + +ds_dropsonde = xr.open_dataset( + "ipns://latest.orcestra-campaign.org/products/HALO/dropsondes/Level_3/PERCUSION_Level_3.zarr", + engine="zarr", +) +ds_dropsonde = ds_dropsonde.assign_coords(sonde=np.arange(ds_dropsonde.sonde.size)) -# %% restructure data +# restructure data TB_arts = pd.concat(TB_arts_list, axis=1) TB_hamp = pd.concat(TB_hamp_list, axis=1) -launch_time = ds_dropsonde.sel(sonde_id=TB_arts.columns).launch_time.values +launch_time = ds_dropsonde.sel( + sonde=[int(float(x)) for x in TB_arts.columns.values] +).sonde_time.values TB_arts.columns = launch_time TB_hamp.columns = launch_time -TB_arts = TB_arts.T -TB_hamp = TB_hamp.T +TB_arts = TB_arts.T.dropna() +TB_hamp = TB_hamp.T.dropna() # %% calculate statistics for each flight diffs = TB_arts - TB_hamp diff --git a/scripts/arts_calibration/arts_bt_calculation.py b/scripts/arts_calibration/arts_bt_calculation.py index 963a099..39dc27b 100644 --- a/scripts/arts_calibration/arts_bt_calculation.py +++ b/scripts/arts_calibration/arts_bt_calculation.py @@ -1,7 +1,13 @@ # %% import os import xarray as xr -from src import load_data_functions as loadfuncs +import yaml +import pandas as pd +import typhon +import pyarts +from tqdm import tqdm +import FluxSimulator as fsm +import numpy as np from src.arts_functions import ( Hamp_channels, basic_setup, @@ -10,20 +16,13 @@ get_surface_temperature, get_surface_windspeed, forward_model, + is_complete, ) -from src.ipfs_helpers import read_nc -from orcestra.postprocess.level0 import bahamas -from src import readwrite_functions as rwfuncs -import yaml -import pandas as pd -import typhon -import pyarts -from tqdm import tqdm -import FluxSimulator as fsm + +# from src.plots_functions import plot_dropsonde, plot_TB_comparison # %% get ARTS data -print("Download ARTS data") pyarts.cat.download.retrieve(verbose=True) # %% setup sensor @@ -35,9 +34,20 @@ # %% setup workspace ws = basic_setup([], sensor_description=sensor_description) +# %% load data +configfile = "process_config.yaml" +with open(configfile, "r") as file: + cfg = yaml.safe_load(file) + +ds_dropsonde = xr.open_dataset( + "ipns://latest.orcestra-campaign.org/products/HALO/dropsondes/Level_3/PERCUSION_Level_3.zarr", + engine="zarr", +) +ds_dropsonde = ds_dropsonde.assign_coords(sonde=np.arange(ds_dropsonde.sonde.size)) + # %% define function -def calc_arts_bts(date): +def calc_arts_bts(date, flightletter, ds_dropsonde, cfg): """ Calculates brightness temperatures for the radiometer frequencies with ARTS based on the dropsonde profiles for the flight on date. @@ -51,49 +61,22 @@ def calc_arts_bts(date): None. Data is saved in arts_comparison folder. """ - print("Read Config") - # change the date in config.yaml to date - configfile = "config_ipns.yaml" - with open(configfile, "r") as file: - config_yml = yaml.safe_load(file) - config_yml["date"] = date - with open(configfile, "w") as file: - yaml.dump(config_yml, file) - # read config - cfg = rwfuncs.extract_config_params(configfile) - - # load bahamas data from ipfs - print("Load Bahamas Data") - ds_bahamas = ( - read_nc( - f"ipns://latest.orcestra-campaign.org/raw/HALO/bahamas/{cfg['flightname']}/QL_*.nc" - ) - .pipe(bahamas) - .interpolate_na("time") + flightname = f"HALO-{date}{flightletter}" + ds_radar = xr.open_dataset( + f"{cfg['save_dir']}/radar/{flightname}_radar.zarr", engine="zarr" ) - - # read dropsonde data - print("Load Dropsonde Data") - ds_dropsonde = xr.open_dataset(cfg["path_dropsondes"], engine="zarr") - ds_dropsonde = ds_dropsonde.where( - (ds_dropsonde["interp_time"] > pd.to_datetime(cfg["date"])) - & ( - ds_dropsonde["interp_time"] - < pd.to_datetime(cfg["date"]) + pd.DateOffset(hour=23) - ) - ).dropna(dim="sonde_id", how="all") - - # read HAMP post-processed data - print("Load HAMP Data") - hampdata = loadfuncs.load_hamp_data( - cfg["path_radar"], cfg["path_radiometers"], cfg["path_iwv"] + ds_radiometers = xr.open_dataset( + f"{cfg['save_dir']}/radiometer/{flightname}_radio.zarr", engine="zarr" ) + ds_dropsonde = ds_dropsonde.where( + (ds_dropsonde["interp_time"] > pd.to_datetime(date)) + & (ds_dropsonde["interp_time"] < pd.to_datetime(date) + pd.DateOffset(hour=23)) + ).dropna(dim="sonde", how="all") - # cloud mask from radar print("Calculate Cloud Mask") ds_dropsonde = ds_dropsonde.assign( radar_cloud_flag=( - hampdata.radar.sel(time=ds_dropsonde.launch_time, method="nearest").sel( + ds_radar.sel(time=ds_dropsonde.sonde_time, method="nearest").sel( height=slice(200, None) )["dBZe"] > -30 @@ -101,50 +84,41 @@ def calc_arts_bts(date): * 1 ) cloud_free_idxs = ( - ds_dropsonde["sonde_id"] - .where(ds_dropsonde["radar_cloud_flag"] == 0, drop=True) + ds_dropsonde["sonde"] + .where((ds_dropsonde["radar_cloud_flag"] == 0).compute(), drop=True) .values ) - # initialize result arrays - freqs_hamp = hampdata.radiometers.frequency.values - TBs_arts = pd.DataFrame(index=freqs_hamp, columns=cloud_free_idxs) - TBs_hamp = TBs_arts.copy() + freqs_hamp = ds_radiometers.frequency.values + TBs_arts = pd.DataFrame(index=freqs / 1e9, columns=cloud_free_idxs) + TBs_hamp = pd.DataFrame(index=freqs_hamp, columns=cloud_free_idxs) - # setup folders print("Setup Folders") - if not os.path.exists(f"Data/arts_calibration/{cfg['flightname']}"): - os.makedirs(f"Data/arts_calibration/{cfg['flightname']}") - if not os.path.exists(f"Data/arts_calibration/{cfg['flightname']}/plots"): - os.makedirs(f"Data/arts_calibration/{cfg['flightname']}/plots") - - # loop over cloud free sondes - print(f"Running {cloud_free_idxs.size} dropsondes for {cfg['flightname']}") - for sonde_id in tqdm(cloud_free_idxs): - # get profiles - ds_dropsonde_loc, hampdata_loc, height, drop_time = get_profiles( - sonde_id, ds_dropsonde, hampdata + if not os.path.exists(f"Data/arts_calibration/{flightname}"): + os.makedirs(f"Data/arts_calibration/{flightname}") + if not os.path.exists(f"Data/arts_calibration/{flightname}/plots"): + os.makedirs(f"Data/arts_calibration/{flightname}/plots") + + print(f"Running {cloud_free_idxs.size} dropsondes for {date}") + for sonde in tqdm(cloud_free_idxs): + ds_dropsonde_loc, radiometers_loc, height, drop_time = get_profiles( + sonde, ds_dropsonde, ds_radiometers ) - # check if dropsonde is broken (contains only nan values) - if ds_dropsonde_loc["ta"].isnull().mean().values == 1: - print(f"Dropsonde {sonde_id} is broken, skipping") + if not is_complete(ds_dropsonde_loc, radiometers_loc, drop_time, height, sonde): continue - # get surface values surface_temp = get_surface_temperature(ds_dropsonde_loc) surface_ws = get_surface_windspeed(ds_dropsonde_loc) - # extrapolate dropsonde profiles - ds_dropsonde_extrap = extrapolate_dropsonde( - ds_dropsonde_loc, height, ds_bahamas - ) + ds_dropsonde_extrap = extrapolate_dropsonde(ds_dropsonde_loc, height) + + # plot_dropsonde(ds_dropsonde_extrap, ds_dropsonde_loc) - # convert xarray to ARTS gridded field profile_grd = fsm.generate_gridded_field_from_profiles( pyarts.arts.Vector(ds_dropsonde_extrap["p"].values), ds_dropsonde_extrap["ta"].values, - ds_dropsonde_extrap["alt"].values, + ds_dropsonde_extrap["altitude"].values, gases={ "H2O": typhon.physics.specific_humidity2vmr( ds_dropsonde_extrap["q"].values @@ -154,7 +128,6 @@ def calc_arts_bts(date): }, ) - # run arts BTs, _ = forward_model( ws, profile_grd, @@ -163,55 +136,23 @@ def calc_arts_bts(date): height, ) - # except (ValueError, KeyError, RuntimeError) as e: - # print( - # f"ARTS or extrapolation failed for dropsonde {sonde_id} with error: {e}, skipping" - # ) - # pass - - # Store arts BTs - TBs_arts[sonde_id] = pd.DataFrame(data=BTs, index=freqs / 1e9) - # get according hamp data - TBs_hamp[sonde_id] = hampdata_loc.radiometers.TBs.values - - # save results - TBs_arts.to_csv(f"Data/arts_calibration/{cfg['flightname']}/TBs_arts.csv") - TBs_hamp.to_csv(f"Data/arts_calibration/{cfg['flightname']}/TBs_hamp.csv") - - -# %% call function -# date = str(sys.argv[1]) -calc_arts_bts("20240827") - -# %% test turning extrapolated dropsonde profiles to ARTS input -# read config -cfg = rwfuncs.extract_config_params("config_ipns.yaml") -ds_dropsonde = xr.open_dataset(cfg["path_dropsondes"], engine="zarr") -profile = ds_dropsonde.isel(sonde_id=100) -profile = profile.dropna("gpsalt") - -# %% convert xarray to ARTS gridded field -profile_grd = fsm.generate_gridded_field_from_profiles( - pyarts.arts.Vector(profile["p"].values), - profile["ta"].values, - profile["alt"].values, - gases={ - "H2O": typhon.physics.specific_humidity2vmr(profile["q"].values), - "N2": 0.78, - "O2": 0.21, - }, -) + TBs_arts[sonde] = pd.DataFrame(data=BTs, index=freqs / 1e9) + TBs_hamp[sonde] = radiometers_loc["TBs"] -# %% + # plot_TB_comparison(TBs_arts[sonde], TBs_hamp[sonde], sonde) + + TBs_arts.to_csv(f"Data/arts_calibration/{flightname}/TBs_arts.csv") + TBs_hamp.to_csv(f"Data/arts_calibration/{flightname}/TBs_hamp.csv") -BTs, _ = forward_model( - ws, - profile_grd, - 10, - 300, - 1e4, -) -# %% -freqs = sensor_description[:, 0] + sensor_description[:, 1] + sensor_description[:, 2] +# %% loop over flights +flights = pd.read_csv("flights.csv", index_col=0) +flights_processed = flights[ + (flights["location"] == "sal") | (flights["location"] == "barbados") +] + +for date in flights_processed.index: + calc_arts_bts( + str(date), flights_processed.loc[date]["flightletter"], ds_dropsonde, cfg + ) # %% diff --git a/scripts/arts_calibration/regressions.py b/scripts/arts_calibration/regressions.py new file mode 100644 index 0000000..bc20ef2 --- /dev/null +++ b/scripts/arts_calibration/regressions.py @@ -0,0 +1,166 @@ +# %% +import numpy as np +from scipy.stats import linregress +import pandas as pd +import xarray as xr +import yaml +from src.plots_functions import plot_regression +import matplotlib.pyplot as plt + +# %% Read csv BTs + +flights = pd.read_csv("flights.csv", index_col=0) +flights_processed = flights[ + (flights["location"] == "sal") | (flights["location"] == "barbados") +] + +TB_arts_list = [] +TB_hamp_list = [] +for date in flights_processed.index: + TB_arts_list.append( + pd.read_csv(f"Data/arts_calibration/HALO-{date}a/TBs_arts.csv", index_col=0) + ) + TB_hamp_list.append( + pd.read_csv(f"Data/arts_calibration/HALO-{date}a/TBs_hamp.csv", index_col=0) + ) + +# load dropsonde data +with open("process_config.yaml") as f: + cfg = yaml.safe_load(f) + +ds_dropsonde = xr.open_dataset( + "ipns://latest.orcestra-campaign.org/products/HALO/dropsondes/Level_3/PERCUSION_Level_3.zarr", + engine="zarr", +) +ds_dropsonde = ds_dropsonde.assign_coords(sonde=np.arange(ds_dropsonde.sonde.size)) + +# restructure data +TB_arts = pd.concat(TB_arts_list, axis=1) +TB_hamp = pd.concat(TB_hamp_list, axis=1) +launch_time = ds_dropsonde.sel( + sonde=[int(float(x)) for x in TB_arts.columns.values] +).sonde_time.values +TB_arts.columns = launch_time +TB_hamp.columns = launch_time +TB_arts = TB_arts.T.dropna() +TB_hamp = TB_hamp.T.dropna() + +# %% drop extreme outliers +TB_arts_std = TB_arts.std("index") +TB_hamp_std = TB_hamp.std("index") +TB_arts_med = TB_arts.median("index") +TB_hamp_med = TB_hamp.median("index") +TB_arts = TB_arts.where((TB_arts - TB_arts_med).abs() < 3 * TB_arts_std) +TB_hamp = TB_hamp.where((TB_hamp - TB_hamp_med).abs() < 3 * TB_hamp_std) + +# %% count NaN values +mask_arts = ((TB_arts - TB_arts_med).abs() < 2 * TB_arts_std).mean("index") +mask_hamp = ((TB_hamp - TB_hamp_med).abs() < 2 * TB_hamp_std).mean("index") +mask_arts + +# %% +mask_hamp + + +# %% +multi_index = pd.MultiIndex.from_product( + [TB_arts.columns, ["slope", "standard_error"]], names=["frequency", "parameter"] +) +regression_coeffs = pd.DataFrame( + index=np.unique(TB_arts.index.date), columns=multi_index +) + +for date in flights_processed.index: + date = pd.to_datetime(date, format="%Y%m%d").date() + TB_arts_day = TB_arts.loc[TB_arts.index.date == date] + TB_hamp_day = TB_hamp.loc[TB_hamp.index.date == date] + + for f in TB_arts_day.columns: + # Flatten the arrays and filter out NaN values + x = TB_hamp_day[f].values.flatten() + y = TB_arts_day[f].values.flatten() + mask = ~np.isnan(x) & ~np.isnan(y) + x = x[mask] + y = y[mask] + + if len(x) > 0 and len(y) > 0: # Ensure there are values left after filtering + regression = linregress(x, y) + regression_coeffs.loc[date, (f, "slope")] = regression.slope + regression_coeffs.loc[date, (f, "intercept")] = regression.intercept + +# %% plot regressions +fig = plot_regression(regression_coeffs, TB_arts, TB_hamp, date) +fig.savefig(f"Plots/arts/{date}_regression.png", dpi=300) + +# %% define frequencies +freq_k = [22.24, 23.04, 23.84, 25.44, 26.24, 27.84, 31.40] +freq_v = [50.3, 51.76, 52.8, 53.75, 54.94, 56.66, 58.00] +freq_90 = [90.0] +center_freq_119 = 118.75 +center_freq_183 = 183.31 +width_119 = [1.4, 2.3, 4.2, 8.5] +width_183 = [0.6, 1.5, 2.5, 3.5, 5.0, 7.5] +freq_119 = [center_freq_119 + w for w in width_119] +freq_183 = [center_freq_183 + w for w in width_183] + +calib_dates_183 = [ + "20240822", + "20240901", + "20240905", + "20240913", + "20240916", + "20240918", + "20240920", + "20240923", + "20240925", + "20240928", +] + +calib_oters = [ + "20240901", + "20240905", + "20240913", + "20240916", + "20240918", + "20240920", + "20240923", + "20240925", + "20240928", +] + + +def get_next_flight(calibration_date, flights): + for flight in flights: + if flight >= calibration_date.date(): + return flight + return None + + +# %% plot slope and standard error for all modules +fig, axes = plt.subplots(3, 2, figsize=(15, 15), sharex=False) + +for i, freq in enumerate([freq_k, freq_v, freq_90, freq_119, freq_183]): + ax = axes.flatten()[i] + ax.axhline(1, color="grey", linewidth=0.5) + for f in freq: + ax.plot( + regression_coeffs.index, regression_coeffs[f]["slope"], label=f"{f} GHz" + ) + + ax.set_ylabel("Slope") + ax.legend() + ax.spines[["top", "right"]].set_visible(False) + # format x-axis to show month and day + ax.set_xticks(regression_coeffs.index) + ax.set_xticklabels(regression_coeffs.index, rotation=45) + ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter("%m-%d")) + ax.set_xlim([regression_coeffs.index[0], regression_coeffs.index[-1]]) + + +axes[-1, -1].remove() + +fig.tight_layout() +fig.savefig("Data/arts_calibration/slope.png", dpi=300, bbox_inches="tight") + + +# %% diff --git a/scripts/find_amplifier_faults.py b/scripts/find_amplifier_faults.py new file mode 100644 index 0000000..b9a78eb --- /dev/null +++ b/scripts/find_amplifier_faults.py @@ -0,0 +1,37 @@ +# %% +import xarray as xr +import yaml +from src.ipfs_helpers import read_nc +from src.plots_functions import plot_radiometers +import matplotlib as mpl + +mpl.use("WebAgg") + +# %% load processed data +with open("process_config.yaml", "r") as file: + cfg = yaml.safe_load(file) + +radar = xr.open_dataset(f"{cfg['save_dir']}/full_radar.zarr", engine="zarr") +radiometers = xr.open_dataset( + f"{cfg['save_dir']}/full_radiometer.zarr", + engine="zarr", +) +iwv = xr.open_dataset(f"{cfg['save_dir']}/full_iwv.zarr", engine="zarr") + +# %% load raw data +date = "20241119" +flightletter = "a" +ds_rad_raw = {} +for radio in ["11990", "KV", "183"]: + ds_rad_raw[radio] = read_nc( + f"{cfg['radiometer'].format(date=date, flightletter=flightletter)}/{radio}/{date[2:]}.LV0.NC" + ) + + +# %% plot TBs and gain +plot_radiometers( + radiometers.sel(time=date), + ds_rad_raw, +) + +# %% diff --git a/scripts/generate_ipfs_hashes.py b/scripts/generate_ipfs_hashes.py index 3997207..820badf 100644 --- a/scripts/generate_ipfs_hashes.py +++ b/scripts/generate_ipfs_hashes.py @@ -1,6 +1,6 @@ # %% import subprocess -import ruamel.yaml +import ruamel from tqdm import tqdm diff --git a/scripts/merge_datasets.py b/scripts/merge_datasets.py new file mode 100644 index 0000000..44af321 --- /dev/null +++ b/scripts/merge_datasets.py @@ -0,0 +1,84 @@ +# %% +import xarray as xr +import yaml +from src.ipfs_helpers import get_encoding +from dask.diagnostics import ProgressBar + +# %% open datasets as on file and drop conflicting attributes +with open("process_config.yaml", "r") as file: + cfg = yaml.safe_load(file) + +radar = ( + xr.open_mfdataset( + f"{cfg['save_dir']}/radar/*.zarr", + engine="zarr", + combine="nested", + concat_dim="time", + combine_attrs="drop_conflicts", + chunks={"time": -1, "height": -1}, + ) + .sortby("time") + .chunk( + { + "time": 2**18, + "height": -1, + } + ) +).transpose("time", "height") +radiometer = ( + xr.open_mfdataset( + f"{cfg['save_dir']}/radiometer/*.zarr", + engine="zarr", + combine="nested", + concat_dim="time", + combine_attrs="drop_conflicts", + chunks={"time": -1, "frequency": -1}, + ) + .chunk( + { + "time": 2**18, + "frequency": -1, + } + ) + .transpose("time", "frequency") +) + +iwv = xr.open_mfdataset( + f"{cfg['save_dir']}/iwv/*.zarr", + engine="zarr", + combine_attrs="drop_conflicts", + combine="nested", + concat_dim="time", + chunks={"time": -1}, +).chunk( + { + "time": 2**18, + }, +) + +# %% save single files +print("Save radar") +with ProgressBar(): + radar.to_zarr( + f"{cfg['save_dir']}/full_radar.zarr", + encoding=get_encoding(radar), + mode="w", + ) + +print("Save radiometer") +with ProgressBar(): + radiometer.to_zarr( + f"{cfg['save_dir']}/full_radiometer.zarr", + encoding=get_encoding(radiometer), + mode="w", + ) + +print("Save iwv") +with ProgressBar(): + iwv.to_zarr( + f"{cfg['save_dir']}/full_iwv.zarr", + encoding=get_encoding(iwv), + mode="w", + ) + +# %% diff --git a/scripts/process_data.py b/scripts/process_data.py deleted file mode 100644 index a8ac93f..0000000 --- a/scripts/process_data.py +++ /dev/null @@ -1,183 +0,0 @@ -# %% import modules -import os -import sys - -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) - -import yaml -import xarray as xr -import shutil -from orcestra.postprocess.level0 import ( - radiometer, - radar, - iwv, - add_georeference, -) -from orcestra.postprocess.level1 import ( - correct_radar_height, - filter_radar, - add_masks_radar, - add_metadata_radar, - filter_radiometer, -) - -from src.ipfs_helpers import add_encoding, read_nc, read_mf_nc - - -# %% define function - - -def postprocess_hamp(date, flightletter, version): - """ - Postprocess raw data from HAMP. - - Uses the raw data from hamp and applies the following processing steps: - - Level 1 processing - - Fix bahamas data: Fixes time axis - - Fix radar data: Fixes time axis, adds dBZ variables, adds georefernce from bahamas - - Fix radiometer data: Fixes time axis, adds georefernce from bahamas - - Fix iwv data: Fixes time axis, adds georefernce from bahamas - - Concatenate radiometers: Concatenates radiometer data along frequency dimension - - Level 2 processing - - Correct radar height: Writes radar data on geometric height grid with 30m vertical grid spacing - - Filter radar: Filters radar data - - Filter radiometer: Filters radiometer data - Saves the processed data in the save_dir folder and gives a version number. - - Parameters - ---------- - date : str - Date of the data in the format YYYYMMDD - version : str - Version number of the processed data - - Returns - ------- - None - """ - - # load config file - config_file = "process_config.yaml" - with open(config_file, "r") as file: - config = yaml.safe_load(file) - - # configure paths - paths = {} - paths["radar"] = config["radar"].format(date=date, flightletter=flightletter) - paths["radiometer"] = config["radiometer"].format( - date=date, flightletter=flightletter - ) - paths["bahamas"] = config["bahamas"].format(date=date, flightletter=flightletter) - paths["sea_land_mask"] = config["sea_land_mask"] - paths["save_dir"] = config["save_dir"].format(date=date, flightletter=flightletter) - - # load raw data - print(f"Loading raw data for {date}") - ds_radar_raw = read_mf_nc(paths["radar"]).load() - ds_bahamas = ( - xr.open_dataset(paths["bahamas"], engine="zarr") - .reset_coords(["lat", "lon", "alt"]) - .resample(time="0.25s") - .mean() - ) - ds_iwv_raw = read_nc(f"{paths['radiometer']}/KV/{date[2:]}.IWV.NC") - radiometers = ["183", "11990", "KV"] - ds_radiometers_raw = {} - for radio in radiometers: - ds_radiometers_raw[radio] = read_nc( - f"{paths['radiometer']}/{radio}/{date[2:]}.BRT.NC" - ) - - # do level 1 processing - print("Level 1 processing") - ds_radar_lev1 = radar(ds_radar_raw).pipe( - add_georeference, - lat=ds_bahamas["lat"], - lon=ds_bahamas["lon"], - plane_pitch=ds_bahamas["pitch"], - plane_roll=ds_bahamas["roll"], - plane_altitude=ds_bahamas["alt"], - source=ds_bahamas.attrs["source"], - ) - ds_iwv_lev1 = iwv(ds_iwv_raw).pipe( - add_georeference, - lat=ds_bahamas["lat"], - lon=ds_bahamas["lon"], - plane_pitch=ds_bahamas["pitch"], - plane_roll=ds_bahamas["roll"], - plane_altitude=ds_bahamas["alt"], - source=ds_bahamas.attrs["source"], - ) - ds_radiometers_lev1 = {} - for radio in radiometers: - ds_radiometers_lev1[radio] = radiometer(ds_radiometers_raw[radio]) - - # concatenate radiometers and add georeference - ds_radiometers_lev1_concat = xr.concat( - [ds_radiometers_lev1[radio] for radio in radiometers], dim="frequency" - ).sortby("frequency") - ds_radiometers_lev1_concat = ds_radiometers_lev1_concat.assign( - TBs=ds_radiometers_lev1_concat["TBs"].T - ).pipe( - add_georeference, - lat=ds_bahamas["lat"], - lon=ds_bahamas["lon"], - plane_pitch=ds_bahamas["pitch"], - plane_roll=ds_bahamas["roll"], - plane_altitude=ds_bahamas["alt"], - source=ds_bahamas.attrs["source"], - ) - - # do level 2 processing - print("Level 2 processing") - sea_land_mask = xr.open_dataarray(paths["sea_land_mask"]) - - ds_radar_lev2 = ( - ds_radar_lev1.pipe(correct_radar_height) - .pipe(filter_radar) - .pipe(add_metadata_radar, flight_id=f"HALO-{date}{flightletter}") - .pipe(add_masks_radar, sea_land_mask) - ) - ds_radiometer_lev2 = filter_radiometer( - ds_radiometers_lev1_concat, sea_land_mask=sea_land_mask - ) - ds_iwv_lev2 = filter_radiometer(ds_iwv_lev1, sea_land_mask=sea_land_mask) - - # save data - delete if exists to prevent overwriting which can cause issues with zarr - print(f"Saving data for {date}") - ds_radar_lev2.attrs["version"] = version - ds_radiometer_lev2.attrs["version"] = version - ds_iwv_lev2.attrs["version"] = version - # radar - path_radar = f"{paths['save_dir']}/radar/HALO-{date}a_radar.zarr" - if os.path.exists(path_radar): - shutil.rmtree(path_radar) - ds_radar_lev2.chunk(time=4**9).pipe(add_encoding).to_zarr(path_radar, mode="w") - # radiometer - path_radiometer = f"{paths['save_dir']}/radiometer/HALO-{date}a_radio.zarr" - if os.path.exists(path_radiometer): - shutil.rmtree(path_radiometer) - ds_radiometer_lev2.chunk(time=4**9, frequency=-1).pipe(add_encoding).to_zarr( - path_radiometer, mode="w" - ) - # iwv - path_iwv = f"{paths['save_dir']}/iwv/HALO-{date}a_iwv.zarr" - if os.path.exists(path_iwv): - shutil.rmtree(path_iwv) - ds_iwv_lev2.chunk(time=4**9).pipe(add_encoding).to_zarr(path_iwv, mode="w") - - -# %% run postprocessing -dates = [ - "20240926", -] - -flightletters = [ - "a", -] - -version = "1.0" -for date, flightletter in zip(dates, flightletters): - postprocess_hamp(date, flightletter, version) - -# %% diff --git a/scripts/process_hamp.py b/scripts/process_hamp.py new file mode 100644 index 0000000..707b4ae --- /dev/null +++ b/scripts/process_hamp.py @@ -0,0 +1,217 @@ +# %% import modules +import os +import yaml +import xarray as xr +import shutil +import pandas as pd +from src.process import ( + format_radiometer, + format_radar, + format_iwv, + filter_radar, + filter_radiometer, + add_masks_radar, + add_masks_radiometer, + add_masks_iwv, + add_metadata_radar, + add_metadata_radiometer, + add_metadata_iwv, + add_georeference, + correct_radar_height, + cleanup_iwv, + cleanup_radiometers, +) +from src.ipfs_helpers import get_encoding, read_nc, read_mf_nc + + +# %% define function +def postprocess_hamp(date, flightletter, version): + """ + Postprocess raw data from HAMP. + + Parameters + ---------- + date : str + Date of the data in the format YYYYMMDD + flightletter : str + Letter of the flight + version : str + Version number of the processed data + + Returns + ------- + None + """ + + # load config file + config_file = "process_config.yaml" + with open(config_file, "r") as file: + config = yaml.safe_load(file) + + # configure paths + paths = {} + paths["radar"] = config["radar"].format(date=date, flightletter=flightletter) + # dirty fix + if date == "20240809": + paths["radiometer"] = config["radiometer"].format(date=date, flightletter="a") + else: + paths["radiometer"] = config["radiometer"].format( + date=date, flightletter=flightletter + ) + paths["bahamas"] = config["bahamas"].format(date=date, flightletter=flightletter) + paths["sea_land_mask"] = config["sea_land_mask"] + paths["save_dir"] = config["save_dir"].format(date=date, flightletter=flightletter) + + # load raw data + print(f"Loading raw data for {date}") + ds_radar_raw = read_mf_nc(paths["radar"]).load() + + if date == "20240929": # only flight that crossed 0 UTC + date_2 = "20240930" + ds_bahamas = ( + xr.open_dataset(paths["bahamas"], engine="zarr") + .sel(time=slice(date, date_2)) + .reset_coords(["lat", "lon", "alt"]) + .resample(time="0.25s") + .mean() + ) + ds_iwv_raw_29 = read_nc(f"{paths['radiometer']}/KV/{date[2:]}.IWV.NC") + ds_iwv_raw_30 = read_nc(f"{paths['radiometer']}/KV/{date_2[2:]}.IWV.NC") + ds_iwv_raw = xr.combine_by_coords( + [ds_iwv_raw_29, ds_iwv_raw_30], + data_vars="minimal", + compat="override", + coords="minimal", + ) + radiometers = ["183", "11990", "KV"] + ds_radiometers_raw_29 = {} + ds_radiometers_raw_30 = {} + ds_radiometers_raw = {} + for radio in radiometers: + ds_radiometers_raw_29[radio] = read_nc( + f"{paths['radiometer']}/{radio}/{date[2:]}.BRT.NC" + ) + ds_radiometers_raw_30[radio] = read_nc( + f"{paths['radiometer']}/{radio}/{date_2[2:]}.BRT.NC" + ) + ds_radiometers_raw[radio] = xr.combine_by_coords( + [ds_radiometers_raw_29[radio], ds_radiometers_raw_30[radio]], + data_vars="minimal", + compat="override", + coords="minimal", + ) + + else: + ds_bahamas = ( + xr.open_dataset(paths["bahamas"], engine="zarr") + .sel(time=date) + .reset_coords(["lat", "lon", "alt"]) + .resample(time="0.25s") + .mean() + ) + ds_iwv_raw = read_nc(f"{paths['radiometer']}/KV/{date[2:]}.IWV.NC") + radiometers = ["183", "11990", "KV"] + ds_radiometers_raw = {} + for radio in radiometers: + ds_radiometers_raw[radio] = read_nc( + f"{paths['radiometer']}/{radio}/{date[2:]}.BRT.NC" + ) + + print("Processing") + ds_radar_lev1 = format_radar(ds_radar_raw).pipe( + add_georeference, + lat=ds_bahamas["lat"], + lon=ds_bahamas["lon"], + plane_pitch=ds_bahamas["pitch"], + plane_roll=ds_bahamas["roll"], + plane_altitude=ds_bahamas["alt"], + ) + ds_iwv_lev1 = format_iwv(ds_iwv_raw).pipe( + add_georeference, + lat=ds_bahamas["lat"], + lon=ds_bahamas["lon"], + plane_pitch=ds_bahamas["pitch"], + plane_roll=ds_bahamas["roll"], + plane_altitude=ds_bahamas["alt"], + ) + ds_radiometers_lev1 = {} + for radio in radiometers: + ds_radiometers_lev1[radio] = format_radiometer(ds_radiometers_raw[radio]) + + # concatenate radiometers and add georeference + ds_radiometers_lev1_concat = xr.concat( + [ds_radiometers_lev1[radio] for radio in radiometers], dim="frequency" + ).sortby("frequency") + ds_radiometers_lev1_concat = ds_radiometers_lev1_concat.assign( + TBs=ds_radiometers_lev1_concat["TBs"].T + ).pipe( + add_georeference, + lat=ds_bahamas["lat"], + lon=ds_bahamas["lon"], + plane_pitch=ds_bahamas["pitch"], + plane_roll=ds_bahamas["roll"], + plane_altitude=ds_bahamas["alt"], + ) + + sea_land_mask = xr.open_dataarray(paths["sea_land_mask"]).load() + + ds_radar_lev2 = ( + ds_radar_lev1.pipe(correct_radar_height) + .pipe(filter_radar) + .pipe(add_metadata_radar, flight_id=f"{date}{flightletter}") + .pipe(add_masks_radar, sea_land_mask) + .transpose("time", "height") + ) + ds_radiometer_lev2 = ( + filter_radiometer(ds_radiometers_lev1_concat) + .pipe(add_masks_radiometer, sea_land_mask) + .pipe(add_metadata_radiometer, flight_id=f"{date}{flightletter}") + .pipe(cleanup_radiometers) + .transpose("time", "frequency") + ) + ds_iwv_lev2 = ( + filter_radiometer(ds_iwv_lev1) + .pipe(add_metadata_iwv, flight_id=f"{date}{flightletter}") + .pipe(add_masks_iwv, sea_land_mask) + .pipe(cleanup_iwv) + ) + + print(f"Saving data for {date}") + ds_radar_lev2.attrs["version"] = version + ds_radiometer_lev2.attrs["version"] = version + ds_iwv_lev2.attrs["version"] = version + # radar + path_radar = f"{paths['save_dir']}/radar/HALO-{date}{flightletter}_radar.zarr" + if os.path.exists(path_radar): + shutil.rmtree(path_radar) + ds_radar_lev2.chunk(time=4**9, height=-1).to_zarr( + path_radar, encoding=get_encoding(ds_radar_lev2), mode="w" + ) + # radiometer + path_radiometer = ( + f"{paths['save_dir']}/radiometer/HALO-{date}{flightletter}_radio.zarr" + ) + if os.path.exists(path_radiometer): + shutil.rmtree(path_radiometer) + ds_radiometer_lev2.chunk(time=4**9, frequency=-1).to_zarr( + path_radiometer, encoding=get_encoding(ds_radiometer_lev2), mode="w" + ) + # iwv + path_iwv = f"{paths['save_dir']}/iwv/HALO-{date}{flightletter}_iwv.zarr" + if os.path.exists(path_iwv): + shutil.rmtree(path_iwv) + ds_iwv_lev2.chunk(time=4**9).to_zarr( + path_iwv, encoding=get_encoding(ds_iwv_lev2), mode="w" + ) + + +# %% run postprocessing +flights = pd.read_csv("flights.csv", index_col=0) +version = "1.1" +for date, flightletter in zip(flights.index, flights["flightletter"]): + postprocess_hamp(str(date), flightletter, version) + +# %% test postprocessing +postprocess_hamp("20240929", "a", "1.1") + +# %% diff --git a/scripts/testplot.py b/scripts/testplot.py new file mode 100644 index 0000000..61f4718 --- /dev/null +++ b/scripts/testplot.py @@ -0,0 +1,68 @@ +# %% +from src.plots_functions import testplot_hamp +import xarray as xr +import yaml +import pandas as pd + +# %% load config +flights = pd.read_csv("flights.csv", index_col=0) + +with open("process_config.yaml", "r") as file: + cfg = yaml.safe_load(file) + +# %% load full data +radar = xr.open_dataset(f"{cfg['save_dir']}/full_radar.zarr", engine="zarr") +radiometers = xr.open_dataset( + f"{cfg['save_dir']}/full_radiometer.zarr", + engine="zarr", +) +iwv = xr.open_dataset(f"{cfg['save_dir']}/full_iwv.zarr", engine="zarr") + +# %% plot all flights +for date in flights.index: + if date == 20240928: # only flight that crossed 0 UTC + fig = testplot_hamp( + radar.sel(time=slice("20240929", "20240930")), + radiometers.sel(time=slice("20240929", "20240930")), + iwv.sel(time=slice("20240929", "20240930")), + ground_filter=True, + roll_filter=True, + calibration_filter=True, + ) + else: + fig = testplot_hamp( + radar.sel(time=str(date)), + radiometers.sel(time=str(date)), + iwv.sel(time=str(date)), + ground_filter=True, + roll_filter=True, + calibration_filter=True, + amplifier_faults=True, + ) + fig.savefig(f"Plots/{date}_hamp_filtered.png", dpi=300) + +# %% plot single flight +date = "20240929" +flightletter = "a" +radar = xr.open_dataset( + f"{cfg['save_dir']}/radar/HALO-{date}{flightletter}_radar.zarr", engine="zarr" +) +radiometers = xr.open_dataset( + f"{cfg['save_dir']}/radiometer/HALO-{date}{flightletter}_radio.zarr", engine="zarr" +) +iwv = xr.open_dataset( + f"{cfg['save_dir']}/iwv/HALO-{date}{flightletter}_iwv.zarr", engine="zarr" +) + +# %% +fig = testplot_hamp( + radar, + radiometers, + iwv, + ground_filter=True, + roll_filter=True, + calibration_filter=True, + amplifier_faults=True, +) + +# %% diff --git a/src/arts_functions.py b/src/arts_functions.py index 25dd2bd..8573784 100644 --- a/src/arts_functions.py +++ b/src/arts_functions.py @@ -3,6 +3,7 @@ from scipy.optimize import curve_fit import xarray as xr import pandas as pd +from scipy.stats import linregress from pyarts.workspace import arts_agenda @@ -270,7 +271,7 @@ def Hamp_channels(band_selection, rel_mandatory_grid_spacing=1.0 / 4.0): channels["F"] = { "f_center": np.ones(4) * 118.75e9, - "Offset1": np.array([1.3, 2.3, 4.2, 8.5]) * 1e9, + "Offset1": np.array([1.4, 2.3, 4.2, 8.5]) * 1e9, "Offset2": np.zeros(4), "NeDT": 0.6, "Accuracy": 1.5, @@ -628,18 +629,37 @@ def exponential(x, a, b): def fit_exponential(x, y, p0): nanmask = np.isnan(y) | np.isnan(x) - popt, _ = curve_fit(exponential, x[~nanmask], y[~nanmask], p0=p0) - offset = y[~nanmask][-1] + if np.sum(nanmask) > 0: + popt, _ = curve_fit(exponential, x[~nanmask], y[~nanmask], p0=p0) + offset = y[~nanmask][-1] + idx_nan = np.where(nanmask)[0] + nanmask[idx_nan - 1] = True # get overlap of one + filled = np.zeros_like(y) + filled[~nanmask] = y[~nanmask] + new_vals = exponential(x[nanmask], *popt) + filled[nanmask] = new_vals - new_vals[0] + offset + return filled + else: + return y + + +def fit_linear(x, y, upper_val, height): + nanmask = np.isnan(y) | np.isnan( + x + ) # nan's exist under plane where we want to extrapolate + last_val = y[~nanmask][-1] + last_height = x[~nanmask][-1] idx_nan = np.where(nanmask)[0] nanmask[idx_nan - 1] = True # get overlap of one + slope = (upper_val - last_val) / (height - last_height) filled = np.zeros_like(y) filled[~nanmask] = y[~nanmask] - new_vals = exponential(x[nanmask], *popt) - filled[nanmask] = new_vals - new_vals[0] + offset + new_vals = slope * (x[nanmask] - last_height) + last_val + filled[nanmask] = new_vals return filled -def fit_linear(x, y, upper_val, height): +def fit_regression(x, y): nanmask = np.isnan(y) | np.isnan( x ) # nan's exist under plane where we want to extrapolate @@ -647,7 +667,7 @@ def fit_linear(x, y, upper_val, height): last_height = x[~nanmask][-1] idx_nan = np.where(nanmask)[0] nanmask[idx_nan - 1] = True # get overlap of one - slope = (upper_val - last_val) / (height - last_height) + slope = linregress(x[~nanmask], y[~nanmask]).slope filled = np.zeros_like(y) filled[~nanmask] = y[~nanmask] new_vals = slope * (x[nanmask] - last_height) + last_val @@ -655,138 +675,61 @@ def fit_linear(x, y, upper_val, height): return filled -def get_profiles(sonde_id, ds_dropsonde, hampdata): - ds_dropsonde_loc = ds_dropsonde.sel(sonde_id=sonde_id) - drop_time = ds_dropsonde_loc["launch_time"].values - hampdata_loc = hampdata.sel(timeslice=drop_time, method="nearest") - height = float(hampdata_loc.radiometers.plane_altitude.values) +def fit_constant(x, y): + nanmask = np.isnan(y) | np.isnan( + x + ) # nan's exist under plane where we want to extrapolate + last_val = y[~nanmask][-1] + idx_nan = np.where(nanmask)[0] + nanmask[idx_nan - 1] = True # get overlap of one + filled = np.zeros_like(y) + filled[~nanmask] = y[~nanmask] + filled[nanmask] = last_val + return filled + + +def get_profiles(sonde, ds_dropsonde, radiometers): + ds_dropsonde_loc = ds_dropsonde.sel(sonde=sonde) + drop_time = ds_dropsonde_loc["sonde_time"].values + hampdata_loc = radiometers.dropna("time").sel(time=drop_time, method="nearest") + height = float(hampdata_loc.plane_altitude.values) return ds_dropsonde_loc, hampdata_loc, height, drop_time -def extrapolate_dropsonde(ds_dropsonde, height, ds_bahamas): +def extrapolate_dropsonde(ds_dropsonde, height): # drop nans - ds_dropsonde = ds_dropsonde.where(ds_dropsonde["gpsalt"] < height, drop=True) + ds_dropsonde = ds_dropsonde.where(ds_dropsonde["altitude"] < height, drop=True) bool = (ds_dropsonde["p"].isnull()) & ( - ds_dropsonde["gpsalt"] < 100 + ds_dropsonde["altitude"] < 100 ) # drop nans at lower levels ds_dropsonde = ds_dropsonde.where(~bool, drop=True) p_extrap = fit_exponential( - ds_dropsonde["gpsalt"].values, - ds_dropsonde["p"].interpolate_na("gpsalt").values, + ds_dropsonde["altitude"].values, + ds_dropsonde["p"].interpolate_na("altitude").values, p0=[1e5, -0.0001], ) - ta_extrap = fit_linear( - ds_dropsonde["gpsalt"].values, - ds_dropsonde["ta"].interpolate_na("gpsalt").values, - ds_bahamas["TS"] - .sel(time=ds_dropsonde["launch_time"].values, method="nearest") - .values, - height, + ta_extrap = fit_regression( + ds_dropsonde["altitude"].values, + ds_dropsonde["ta"].interpolate_na("altitude").values, ) - q_extrap = fit_linear( - ds_dropsonde["gpsalt"].values, - ds_dropsonde["q"].interpolate_na("gpsalt").values, - ds_bahamas["MIXRATIO"] - .sel(time=ds_dropsonde["launch_time"].values, method="nearest") - .values - / 1e3, - height, + q_extrap = fit_constant( + ds_dropsonde["altitude"].values, + ds_dropsonde["q"].interpolate_na("altitude").values, ) return xr.Dataset( { - "p": (("alt"), p_extrap), - "ta": (("alt"), ta_extrap), - "q": (("alt"), q_extrap), + "p": (("altitude"), p_extrap), + "ta": (("altitude"), ta_extrap), + "q": (("altitude"), q_extrap), }, - coords={"alt": ds_dropsonde["gpsalt"].values}, + coords={"altitude": ds_dropsonde["altitude"].values}, ) -def average_double_bands(TB, freqs_hamp): - """Average double bands of ARTS simulations. - - Parameters: - TB (ndarray): Brightness temperatures. - - Returns: - ndarray: Averaged brightness temperatures. - """ - - TB_averaged = pd.DataFrame(index=freqs_hamp, columns=["TB"]) - single_freqs = np.float32( - np.array( - [ - 22.24, - 23.04, - 23.84, - 25.44, - 26.24, - 27.84, - 31.4, - 50.3, - 51.76, - 52.8, - 53.75, - 54.94, - 56.66, - 58.0, - 90, - ] - ) - ) - double_freqs = np.float32( - np.array( - [ - 120.15, - 121.05, - 122.95, - 127.25, - 183.91, - 184.81, - 185.81, - 186.81, - 188.31, - 190.81, - ] - ) - ) - mirror_freqs = np.float32( - np.array( - [ - 117.35, - 116.45, - 114.55, - 110.25, - 182.71, - 181.81, - 179.81, - 178.31, - 180.81, - 175.81, - ] - ) - ) - counterparts = pd.DataFrame( - data=mirror_freqs, index=double_freqs, columns=["mirror_freq"] - ) - for freq in freqs_hamp: - if freq in single_freqs: - TB_averaged.loc[freq] = TB.loc[freq].values[0] - else: - TB_averaged.loc[freq] = np.mean( - [ - float(TB.loc[freq].values), - float(TB.loc[counterparts.loc[freq].values].values), - ] - ) - - return TB_averaged - - def get_surface_temperature(dropsonde): """ Get temperature at lowest level of dropsonde which is not nan. @@ -798,7 +741,8 @@ def get_surface_temperature(dropsonde): float: Surface temperature. """ - return dropsonde["ta"].where(~dropsonde["ta"].isnull(), drop=True).values[0] + T_surf = dropsonde["ta"].where(~dropsonde["ta"].isnull(), drop=True).values[0] + return T_surf def get_surface_windspeed(dropsonde): @@ -814,3 +758,55 @@ def get_surface_windspeed(dropsonde): u = dropsonde["u"].where(~dropsonde["u"].isnull(), drop=True).values[0] v = dropsonde["v"].where(~dropsonde["v"].isnull(), drop=True).values[0] return np.sqrt(u**2 + v**2) + + +def is_complete(dropsonde, hamp, drop_time, height, sonde): + """ + Check if data is complete and valid. + + Parameters: + dropsonde (xr.Dataset): Dropsonde data. + bahamas (xr.Dataset): Bahamas data. + hamp (xr.Dataset): HAMP data. + + Returns: + bool: True if data is valid, False otherwise. + """ + + if ( + (dropsonde["ta"].isnull().mean() == 1) + or (dropsonde["p"].isnull().mean() == 1) + or (dropsonde["q"].isnull().mean() == 1) + ): + print(f"Dropsonde {sonde} contains nan only, skipping") + return False + + if dropsonde["p"].dropna("altitude").diff("altitude").max() > 0: + print(f"Dropsonde {sonde} pressure is not monotonically decreasing, skipping") + return False + + if dropsonde["p"].max() < 900e2: + print(f"Dropsonde {sonde} pressure is below 900 hPa, skipping") + return + + if np.isnan(height): + print(f"Bahamas at {drop_time} contains nan, skipping") + return False + + if hamp["TBs"].isnull().mean().values == 1: + print(f"HAMP data at {drop_time} contains nan, skipping") + return False + + if hamp["time"].values - drop_time > pd.Timedelta("2 minutes"): + print(f"HAMP data at {drop_time} is more than 2 minutes away, skipping") + return False + + if ( + dropsonde["altitude"].where(~dropsonde["ta"].isnull(), drop=True).values[0] > 20 + ) or ( + dropsonde["altitude"].where(~dropsonde["u"].isnull(), drop=True).values[0] > 20 + ): + print(f"Dropsonde {sonde} lowest altitude is above 20 m, skipping") + return False + + return True diff --git a/src/config.json b/src/config.json new file mode 100644 index 0000000..4404dc1 --- /dev/null +++ b/src/config.json @@ -0,0 +1,8 @@ +{ + "roll_threshold": 5, + "valid_radar_state": 13, + "noise_threshold": 100, + "altitude_threshold": 4800, + "plane_variables": ["lat", "lon", "plane_altitude", "plane_roll", "plane_pitch"], + "path_error_file": "/home/m/m301049/hamp_processing/error_files/" +} diff --git a/src/ipfs_helpers.py b/src/ipfs_helpers.py index 9183393..2917005 100644 --- a/src/ipfs_helpers.py +++ b/src/ipfs_helpers.py @@ -5,37 +5,37 @@ def get_chunks(dimensions): - if "frequency" in dimensions: - chunks = { - "time": 4**8, - "frequency": 5, - } - elif "height" in dimensions: - chunks = { - "time": 4**5, - "height": 4**4, - } - else: - chunks = { - "time": 4**9, - } + match dimensions: + case ("time",): + chunks = { + "time": 2**18, + } + case ("time", "height"): + chunks = { + "time": 2**11, + "height": 2**7, + } + case ("time", "frequency"): + chunks = { + "time": 2**16, + "frequency": 5, + } return tuple((chunks[d] for d in dimensions)) -def add_encoding(dataset): - numcodecs.blosc.set_nthreads(1) - compressor = numcodecs.Blosc("zstd") - - for var in dataset.variables: - if var not in dataset.dims: - dataset[var].encoding = { - "compressor": compressor, - "dtype": "float32", - "chunks": get_chunks(dataset[var].dims), - } +def get_encoding(dataset): + numcodecs.blosc.set_nthreads(1) # IMPORTANT FOR DETERMINISTIC CIDs + codec = numcodecs.Blosc("zstd", shuffle=1, clevel=6) - return dataset + return { + var: { + "chunks": get_chunks(dataset[var].dims), + "compressor": codec, + } + for var in dataset.variables + if var not in dataset.dims + } def read_nc(url): diff --git a/src/plots_functions.py b/src/plots_functions.py new file mode 100644 index 0000000..dd06d1f --- /dev/null +++ b/src/plots_functions.py @@ -0,0 +1,223 @@ +import matplotlib.pyplot as plt +import numpy as np +from matplotlib.cm import get_cmap +from matplotlib.colors import Normalize + +# import matplotlib +# matplotlib.use("webagg") + + +def plot_dropsonde(ds_extrap, ds_loc): + fig, axes = plt.subplots(2, 3, figsize=(15, 9), sharey=True) + ds_extrap["ta"].plot(y="altitude", ax=axes[0, 0], label="Temperature", color="red") + ds_loc["ta"].plot(y="altitude", ax=axes[0, 0], label="Temperature", color="blue") + ds_extrap["q"].plot( + y="altitude", ax=axes[0, 1], label="Specific humidity", color="red" + ) + ds_loc["q"].plot( + y="altitude", ax=axes[0, 1], label="Specific humidity", color="blue" + ) + ds_extrap["p"].plot(y="altitude", ax=axes[0, 2], label="Pressure", color="red") + ds_loc["p"].plot(y="altitude", ax=axes[0, 2], label="Pressure", color="blue") + ds_loc["u"].plot(y="altitude", ax=axes[1, 1], label="u", color="blue") + ds_loc["v"].plot(y="altitude", ax=axes[1, 2], label="v", color="blue") + axes[0, 0].set_xlabel("Temperature / K") + axes[0, 1].set_xlabel("Specific humidity") + axes[0, 2].set_xlabel("Pressure / hPa") + axes[1, 0].set_xlabel("Altitude / m") + axes[1, 1].set_xlabel("u / m/s") + axes[1, 2].set_xlabel("v / m/s") + + for ax in axes.flatten(): + ax.set_title("") + ax.set_ylabel("") + ax.spines[["top", "right"]].set_visible(False) + axes[0, 0].set_ylabel("GPS altitude / m") + axes[0, 1].set_ylabel("GPS altitude / m") + plt.show() + + +def plot_TB_comparison(TB_arts, TB_hamp, sonde_id): + fig, ax = plt.subplots(1, 1, figsize=(10, 10)) + + ax.scatter(TB_hamp.index, TB_hamp, color="blue", marker="o", label="HAMP") + ax.scatter(TB_arts.index, TB_arts, color="red", marker="x", label="ARTS") + ax.set_xlabel("Brightness temperature / K") + ax.set_xticks(TB_arts.index) + ax.set_xticklabels(TB_arts.index, rotation=45) + ax.spines[["top", "right"]].set_visible(False) + ax.set_ylabel("Frequency / GHz") + ax.set_title(f"{sonde_id}") + ax.legend() + plt.show() + + +def testplot_hamp( + ds_radar, + ds_radiometers, + ds_iwv, + ground_filter=True, + roll_filter=True, + calibration_filter=True, + amplifier_faults=True, +): + plt.close("all") + fig, axes = plt.subplots( + 7, + 2, + figsize=(15, 20), + sharex="col", + height_ratios=[3, 1, 1, 1, 1, 1, 1], + width_ratios=[10, 1], + ) + data = ds_radar["dBZg"] + if ground_filter: + data = data.where(ds_radar["mask_ground_return"]) + if roll_filter: + data = data.where(ds_radar["mask_roll"]) + if calibration_filter: + data = data.where(ds_radar["mask_calibration"]) + if amplifier_faults: + ds_radiometers = ds_radiometers.where(ds_radiometers["mask_amplifier_fault"]) + + col = data.plot.pcolormesh( + ax=axes[0, 0], + x="time", + y="height", + cmap="YlGnBu", + vmin=-30, + vmax=30, + add_colorbar=False, + ) + cb = fig.colorbar(col, cax=axes[0, 1], orientation="vertical", shrink=0.5) + cb.set_label("dBZg") + + # Define frequencies for each subplot + frequencies_list = [ + [22.24, 23.04, 23.84, 25.44, 26.24, 27.84, 31.4], + [50.3, 51.76, 52.8, 53.75, 54.94, 56.66, 58.0], + [90], + [120.15, 121.05, 122.95, 127.25], + [183.91, 184.81, 185.81, 186.81, 188.31, 190.81], + ] + + # Create a colormap + cmap = get_cmap("viridis") # Adjust the range according to your frequencies + + for i, frequencies in enumerate(frequencies_list): + for f in frequencies: + norm = Normalize(vmin=min(frequencies), vmax=max(frequencies)) + color = cmap(norm(f)) + ds_radiometers["TBs"].sel(frequency=[f]).plot( + ax=axes[i + 1, 0], x="time", label=f, color=color + ) + handles, labels = axes[i + 1, 0].get_legend_handles_labels() + axes[i + 1, 1].legend(handles=handles, labels=labels) + axes[i + 1, 1].axis("off") + axes[i + 1, 0].set_title("") + axes[i + 1, 0].set_ylabel("TB / K") + axes[i + 1, 0].set_xlabel("") + axes[i + 1, 0].spines[["top", "right"]].set_visible(False) + + ds_iwv["IWV"].plot(ax=axes[6, 0], x="time", color="k", label="IWV") + axes[6, 1].remove() + axes[6, 0].set_title("") + axes[6, 0].set_ylabel("IWV / kg m-2") + axes[0, 0].spines[["top", "right"]].set_visible(False) + axes[0, 0].set_xlabel("") + axes[0, 0].set_ylabel("Height / m") + axes[6, 0].set_xlabel("Time") + fig.tight_layout() + + return fig + + +def define_module(radio): + if (radio == "K") | (radio == "V"): + module = "KV" + elif radio == "183": + module = "183" + elif radio == "90": + module = "11990" + elif radio == "119": + module = "11990" + else: + raise ValueError("Invalid radiometer frequency") + return module + + +def plot_radiometers( + ds_radiometers, + ds_radiometers_raw, +): + plt.close("all") + fig, axes = plt.subplots( + 5, + 2, + figsize=(12, 7), + sharex="col", + width_ratios=[10, 1], + ) + + freqs = { + "K": slice(22, 32), + "V": slice(50, 58), + "183": slice(183, 191), + "119": slice(120, 128), + "90": 90, + } + for i, radio in enumerate(freqs.keys()): + ds_radiometers["TBs"].sel(frequency=freqs[radio]).plot.line( + ax=axes[i, 0], x="time", add_legend=False + ) + if radio != "90": + handles = list(axes[i, 0].get_lines()) + labels = ds_radiometers.sel(frequency=freqs[radio]).frequency.values + axes[i, 1].legend(handles, labels) + axes[i, 1].axis("off") + axes[i, 0].set_ylabel("TB / K") + axes[i, 0].set_xlabel("") + axes[i, 0].set_title(f"{radio}") + axes[i, 0].spines[["top"]].set_visible(False) + ax2 = axes[i, 0].twinx() + ax2.spines[["top"]].set_visible(False) + ax2.plot( + ds_radiometers_raw[define_module(radio)].time, + ds_radiometers_raw[define_module(radio)].sel(frequency=freqs[radio])[ + "gain" + ], + color="grey", + ) + ax2.set_ylabel("Gain") + + axes[4, 0].set_xlabel("Time") + fig.tight_layout() + plt.show() + + return fig + + +def plot_regression(regression_coeffs, TB_arts, TB_hamp, date): + fig, axes = plt.subplots(5, 5, figsize=(15, 15)) + TB_hamp = TB_hamp[TB_hamp.index.date == date] + TB_arts = TB_arts[TB_arts.index.date == date] + for i, freq in enumerate(TB_arts.columns): + x = np.linspace( + np.nanmin(TB_hamp[freq].values), np.nanmax(TB_hamp[freq].values), 100 + ) + y = ( + regression_coeffs.loc[date, (freq, "slope")] * x + + regression_coeffs.loc[date, (freq, "intercept")] + ) + axes.flatten()[i].scatter( + TB_hamp[freq], TB_arts[freq], color="blue", marker="o", s=1 + ) + axes.flatten()[i].plot(x, y, color="red") + axes.flatten()[i].set_title(f"{freq} GHz") + + for ax in axes[:, 0]: + ax.set_ylabel("ARTS TB [K]") + for ax in axes[-1, :]: + ax.set_xlabel("HAMP TB [K]") + fig.tight_layout() + return fig diff --git a/src/process.py b/src/process.py new file mode 100644 index 0000000..8d1fff6 --- /dev/null +++ b/src/process.py @@ -0,0 +1,812 @@ +import numpy as np +import xarray as xr +import json +import os +import pandas as pd +from scipy.ndimage import convolve +from orcestra.utils import get_flight_segments +import yaml + +# Get the directory of the current script +script_dir = os.path.dirname(os.path.abspath(__file__)) + +# Construct the path to config.json relative to the script's location +config_path = os.path.join(script_dir, "config.json") + +# read config.json file for parameters +with open(config_path) as f: + config = json.load(f) + + +def _selective_where(ds, condition): + """ + Apply where to dataset but do not touch georeference data which should not be masked + + Parameters + ---------- + ds : xr.Dataset + The input dataset. + condition : xr.DataArray, np.array, or boolean + The condition to apply to the dataset. + + Returns + ------- + xr.Dataset + The dataset with the condition applied. + """ + + ds = ds.assign( + { + var: ds[var].where(condition) + for var in ds.data_vars + if var not in config["plane_variables"] + } + ) + return ds + + +def _bahamas_fix_time(ds): + """Fix time coordinate of BAHAMAS datasets.""" + return ds.rename({"tid": "time", "TIME": "time"}).set_index(time="time") + + +def _radar_fix_time(ds): + """Fix time coordinate of RADAR moments datasets.""" + datetime = ( + np.datetime64("1970-01-01", "ns") + + ds.time.values * np.timedelta64(1, "s") + + ds.microsec.values * np.timedelta64(1, "us") + ) + + return ds.assign(time=datetime).drop_vars("microsec") + + +def _radar_add_dBZ(ds): + """Add reflectivity in dB.""" + ds = ds.assign( + dBZg=lambda dx: 10 * np.log10(dx.Zg), + dBZe=lambda dx: 10 * np.log10(dx.Ze), + ) + ds.dBZg.attrs = { + "units": "dBZg", + "long_name": "Decadal logarithm of equivalent radar reflectivity of all targets (Zg)", + } + ds.dBZe.attrs = { + "units": "dBZe", + "long_name": "Decadal logarithm of equivalent radar reflectivity of hydrometeors (Ze)", + } + + return ds + + +def _fix_radiometer_time(ds): + """Replace duplicates in time coordinate of radiometer datasets with correct time and ensure 4Hz frequency.""" + + # replace duplicate values + time_broken = ds.time.values + first_occurence = time_broken[0] + n = 0 + time_new = [] + for i in range(len(time_broken)): + if time_broken[i] == first_occurence: + time_new.append(time_broken[i] + pd.Timedelta("0.25s") * n) + n += 1 + else: + n = 1 + first_occurence = time_broken[i] + time_new.append(first_occurence) + + ds = ds.assign_coords(time=time_new).sortby("time").drop_duplicates("time") + + # ensure 4Hz frequency + start_time = ds.time.min().values + end_time = ds.time.max().values + time_expected = pd.date_range(start=start_time, end=end_time, freq="0.25s") + ds = ds.reindex(time=time_expected, fill_value=np.nan) + + return ds + + +def _noise_filter_radar(ds): + """Filter radar data by noise level. + + Parameters + ---------- + ds : xr.Dataset + Level0 radar dataset. + + Returns + ------- + xr.Dataset + Radar data filtered by noise level. + + """ + return ds.pipe(_selective_where, (ds.npw1 > config["noise_threshold"])) + + +def _state_filter_radar(ds): + """Filter radar data for valid state. + + Parameters + ---------- + ds : xr.Dataset + Level0 radar dataset. + + Returns + ------- + xr.Dataset + Radar data filtered for valid state. + """ + + return ds.pipe(_selective_where, (ds.grst == config["valid_radar_state"])) + + +def _altitude_filter(ds): + """Filter any dataset by plane altitude. + + Parameters + ---------- + ds : xr.Dataset + Level1 dataset. + + Returns + ------- + xr.Dataset + Dataset filtered by plane altitude. + """ + + return ds.pipe(_selective_where, (ds.plane_altitude > config["altitude_threshold"])) + + +def _filter_clutter(ds): + """Filter radar data for clutter. + + Parameters + ---------- + ds : xr.Dataset + Level1 radar dataset. + + Returns + ------- + xr.Dataset + Radar data filtered for clutter. + """ + kernel = np.array( + [ + [1, 1, 1, 1, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 1, 1, 1, 1], + ] + ) + + for var in [var for var in ds if ds[var].dims == ("time", "height")]: + ds_binary = ~np.isnan(ds[var]) + neighbour_count = convolve(ds_binary, kernel, mode="constant", cval=False) + clutter_mask = (ds_binary) & (~neighbour_count) + ds = ds.assign({var: ds[var].where(~clutter_mask, np.nan)}) + return ds + + +def _trim_dataset(ds, dim="time"): + """ + Trim the dataset by removing data at the beginning and end until the first and last occurrence of valid data. + + Parameters + ---------- + ds : xr.Dataset + The input dataset. + dim : str + The dimension along which to trim the dataset. Default is "time". + + Returns + ------- + xr.Dataset + The trimmed dataset. + """ + # Drop NaNs along the specified dimension + valid_data = ds.dropna(dim=dim, how="all") + + # Find the first and last indices of valid data + first_valid_index = valid_data[dim].values[0] + last_valid_index = valid_data[dim].values[-1] + + # Slice the dataset to include only the valid data + trimmed_ds = ds.sel({dim: slice(first_valid_index, last_valid_index)}) + + return trimmed_ds + + +def _add_sea_land_mask(ds, sea_land_mask, offset=pd.Timedelta("7s")): + """Filters out data that was collected over land. + + Removes data by offset earlier than the the time the plane flies over land + to avoid including land measurements due to tilt of the plane or mask inaccuracies. + + Parameters + ---------- + ds : xr.Dataset + Dataset to filter. + sea_land_mask : xr.DataArray + Mask of land and sea. 1 for sea, 0 for land. + offset : pd.Timedelta + Time offset to remove data before the plane flies over land. Default is 7 seconds. + + Returns + ------- + xr.Dataset + Filtered dataset. + """ + + mask_path = sea_land_mask.sel(lat=ds.lat, lon=ds.lon, method="nearest") + diff = (mask_path * 1).diff("time") + start_land = diff.where(diff == -1).dropna("time").time + end_land = diff.where(diff == 1).dropna("time").time + for t in start_land: + mask_path.loc[dict(time=slice(t - offset, t))] = 0 + for t in end_land: + mask_path.loc[dict(time=slice(t, t + offset))] = 0 + ds = ds.assign(mask_sea_land=mask_path.drop_vars(["lat", "lon"]).astype(int)) + ds["mask_sea_land"].attrs = { + "long_name": "Mask for sea and land.", + "description": "1 indicates sea, 0 indicates land.", + } + return ds + + +def _add_amplifier_fault_mask(ds): + """Add a mask for amplifier faults to the radiometer dataset. + + Parameters + ---------- + ds : xr.Dataset + Level1 radiometer dataset. + Returns + ------- + xr.Dataset + Radiometer dataset with a mask for amplifier faults. + """ + + date = pd.Timestamp(ds.time.values[0]).strftime("%Y%m%d") + path = f"{config['path_error_file']}HALO-{date}.yaml" + if os.path.exists(path): + with open(path) as f: + amplifier_faults = yaml.safe_load(f) + else: + amplifier_faults = {} + + mask_amplifier_fault = xr.DataArray( + np.ones(ds.TBs.shape), + dims=ds.TBs.dims, + coords=ds.TBs.coords, + attrs={ + "long_name": "Mask for amplifier faults", + "description": "1 indicates no amplifier fault, 0 indicates amplifier fault", + }, + ) + + freq_of_module = { + "KV": slice(22, 58), + "183": slice(183, 191), + "119": slice(120, 128), + "90": 90, + } + + for module in amplifier_faults.keys(): + for time_tuple in amplifier_faults[module]: + mask_amplifier_fault.loc[ + { + "time": slice(f"{date}T{time_tuple[0]}", f"{date}T{time_tuple[1]}"), + "frequency": freq_of_module[module], + } + ] = 0 + + ds = ds.assign(mask_amplifier_fault=mask_amplifier_fault.astype(int)) + + return ds + + +def _add_clibration_mask(ds): + """ + Add a mask for radar calibration segments to the radar dataset. + + Parameters + ---------- + ds : xr.Dataset + Level1 radar dataset. + Returns + ------- + xr.Dataset + Radar dataset with a mask for radar calibration segments. + """ + + meta = get_flight_segments() + radar_calib = [ + {**s, "platform_id": platform_id, "flight_id": flight_id} + for platform_id, flights in meta.items() + for flight_id, flight in flights.items() + for s in flight["segments"] + if "radar_calibration_wiggle" in s["kinds"] + ] + + radar_calib_flight = [s for s in radar_calib if s["flight_id"] == ds.flight_id] + mask_calib = xr.DataArray( + np.ones(ds.time.shape), + dims="time", + coords={"time": ds.time}, + attrs={ + "long_name": "Mask for radar calibration segments", + "description": "1 indicates no radar calibration, 0 indicates radar calibration", + }, + ) + for s in radar_calib_flight: + mask_calib.loc[{"time": slice(s["start"], s["end"])}] = 0 + + ds = ds.assign(mask_calibration=mask_calib.astype(int)) + + return ds + + +def _add_ground_mask(ds, sea_land_mask, threshold=40): + """ + Add a mask for ground reflections to the radar dataset. + + The mask is created by identifying the first height level coming from the top for which dBZg exceeds the given threshold. + All cells below this height level are considered to be ground reflections. + + Parameters + ---------- + ds_radar : xr.Dataset + Level1 radar dataset. + sea_land_mask : xr.Dataset + Dataset containing a mask for land and ocean. The mask should have the dimensions lat and lon. + 1 indicates ocean, 0 indicates land. + threshold : int, optional + dBZg threshold for ground reflection, by default 40. + Returns + ------- + xr.Dataset + Radar dataset with a mask for ground reflections. + """ + + # get land or sea mask along track + sea_mask = sea_land_mask.sel(lat=ds.lat, lon=ds.lon, method="nearest").drop_vars( + ["lat", "lon"] + ) + + # get first height level coming from the top that exceeds threshold + strong_signal = (ds.dBZg > threshold) * ds.height + max_height = strong_signal.idxmax("height").interpolate_na( + dim="time", method="linear" + ) + + # create mask for land and ocean + dz = ds.height.diff("height").mean().values + mask_land = ds.height > (max_height + 6 * dz) + mask_ocean = ds.height > (max_height + 2 * dz) + mask_ground_return = xr.where(sea_mask, mask_ocean, mask_land).astype(int) + + # add mask to dataset + ds = ds.assign(mask_ground_return=mask_ground_return) + ds["mask_ground_return"].attrs = { + "long_name": "Mask for ground reflections", + "description": "1 indicates no ground reflection, 0 indicates ground reflection", + } + + return ds + + +def _add_roll_mask(ds): + """ + Add a mask for roll segments to the radar dataset. + + Parameters + ---------- + ds : xr.Dataset + Level1 radar dataset. + Returns + ------- + xr.Dataset + Radar dataset with a mask for roll segments. + """ + + mask_roll = (np.abs(ds.plane_roll) <= 5).astype(int) + mask_roll.attrs = { + "long_name": "Mask for roll segments", + "description": "0 indicates roll higher 5 deg, 1 indicates roll lower 5 deg", + } + ds = ds.assign(mask_roll=mask_roll) + + return ds + + +def add_georeference(ds, lat, lon, plane_pitch, plane_roll, plane_altitude): + """Add georeference information to dataset.""" + ds = ds.assign( + plane_altitude=plane_altitude.sel(time=ds.time, method="nearest").assign_coords( + time=ds.time + ), + lat=lat.sel(time=ds.time, method="nearest").assign_coords(time=ds.time), + lon=lon.sel(time=ds.time, method="nearest").assign_coords(time=ds.time), + plane_roll=plane_roll.sel(time=ds.time, method="nearest").assign_coords( + time=ds.time + ), + plane_pitch=plane_pitch.sel(time=ds.time, method="nearest").assign_coords( + time=ds.time + ), + ) + return ds + + +def format_bahamas(ds): + """Post-processing of BAHAMAS datasets.""" + return ds.pipe( + _bahamas_fix_time, + ) + + +def format_radiometer(ds): + """Post-processing of radiometer datasets.""" + return ( + ds.rename(number_frequencies="frequency") + .set_index(frequency="Freq") + .pipe( + _fix_radiometer_time, + ) + ) + + +def format_iwv(ds): + """Post-processing of IWV datasets.""" + return ds.pipe(_fix_radiometer_time) + + +def format_radar(ds): + """Post-processing of Radar quick look datasets.""" + return ds.pipe( + _radar_fix_time, + ).pipe( + _radar_add_dBZ, + ) + + +def filter_radar(ds): + """Filter radar data for noise, valid radar states, and roll angle. + + Parameters + ---------- + ds : xr.Dataset + Level1 radar dataset. + + Returns + ------- + xr.Dataset + Radar data filtered for noise, state, and roll angle. + """ + + return ( + ds.pipe(_noise_filter_radar) + .pipe(_state_filter_radar) + .pipe(_trim_dataset) + .pipe(_filter_clutter) + ) + + +def filter_radiometer(ds): + """Filter radiometer data for height and roll angle. + + Parameters + ---------- + ds : xr.Dataset + Level0 radiometer dataset. + height : xr.DataArray + Flight altitude in m. + roll : xr.DataArray + Flight roll angle in degrees. + lat : xr.DataArray + Latitudes of the flightpath. + lon : xr.DataArray + Longitudes of the flightpath. + sea_land_mask : xr.DataArray + Mask of land and sea. 1 for sea, 0 for land. + + Returns + ------- + xr.Dataset + Radiometer data filtered for height and roll angle. + """ + + return ds.pipe(_altitude_filter).pipe(_trim_dataset) + + +def add_masks_radiometer(ds, sea_land_mask): + """Add masks to radiometer data for sea and land. + + Parameters + ---------- + ds : xr.Dataset + Level1 radiometer dataset. + sea_land_mask : xr.DataArray + Mask of land and sea. 1 for sea, 0 for land. + + Returns + ------- + xr.Dataset + Radiometer dataset with masks for height and roll angle. + """ + + return ds.pipe(_add_sea_land_mask, sea_land_mask).pipe(_add_amplifier_fault_mask) + + +def add_masks_iwv(ds, sea_land_mask): + """Add masks to IWV data for sea and land. + + Parameters + ---------- + ds : xr.Dataset + Level1 IWV dataset. + sea_land_mask : xr.DataArray + Mask of land and sea. 1 for sea, 0 for land. + + Returns + ------- + xr.Dataset + IWV dataset with masks for height and roll angle. + """ + + return ds.pipe(_add_sea_land_mask, sea_land_mask) + + +def add_masks_radar(ds, sea_land_mask): + """Add masks to radar data for calibration, ground reflections, and roll segments. + + Parameters + ---------- + ds : xr.Dataset + Level1 radar dataset. + sea_land_mask : xr.DataArray + Mask of land and sea. 1 for sea, 0 for land. + + Returns + ------- + xr.Dataset + Radar dataset with masks for calibration, ground reflections, and roll segments. + """ + + return ( + ds.pipe(_add_clibration_mask) + .pipe(_add_ground_mask, sea_land_mask) + .pipe(_add_roll_mask) + ) + + +def correct_radar_height(ds): + """Correct radar range gates with HALO flight altitude to height above WGS84 ellipsoid. + + Parameters + ---------- + ds : xr.Dataset + Level1 radar dataset. + + Returns + ------- + xr.Dataset + Radar data corrected to height above WGS84 ellipsoid. + + """ + z_grid = np.arange(0, ds.plane_altitude.max() + 30, 30) + + flight_los = ( + ds.plane_altitude + / np.cos(np.radians(ds.plane_pitch)) + / np.cos(np.radians(ds.plane_roll)) + ) + + ds_z_grid = xr.DataArray( + data=np.tile(z_grid, (len(ds.time), 1)), + dims=("time", "height"), + coords={"time": ds.time, "height": z_grid}, + ) + ds_range = xr.DataArray( + coords={"time": ds.time, "height": z_grid}, + dims=("time", "height"), + data=ds_z_grid + / np.cos(np.radians(ds.plane_pitch)) + / np.cos(np.radians(ds.plane_roll)), + ) + + ds = ds.sel(range=flight_los - ds_range, method="nearest").drop_vars("range") + ds = ds.assign( + { + var: ds[var].where(ds_z_grid < ds.plane_altitude) + for var in ds + if ds[var].dims == ("time", "height") + } + ) + return ds + + +def add_metadata_radar(ds, flight_id): + """ + Add metadata to the radar dataset. + + Parameters + ---------- + ds : xr.Dataset + Level1 radar dataset. + Returns + ------- + xr.Dataset + Radar dataset with metadata. + """ + + ds.attrs["flight_id"] = flight_id + ds.attrs[ + "title" + ] = "MIRA Cloud Radar Moments from the HALO Microwave Package (Level 3)" + ds.attrs["summary"] = ( + "This dataset contains measurements from the MIRA cloud radar onboard the HALO aircraft during the ORCESTRA campaign. " + "The measurements are processed and quality controlled. The processing includes formatting the data, adding georeference information, " + "filtering for noise, clutter and valid radar states, and adding masks for calibration, ground reflections, and roll segments." + ) + ds.attrs["creator_name"] = "Jakob Deutloff, Lukas Kluft, Clara Bayley" + ds.attrs[ + "creator_email" + ] = "jakob.deutloff@uni-hamburg.de, lukas.kluft@mpimet.mpg.de, clara.bayley@mpimet.mpg.de" + ds.attrs["project"] = "ORCESTRA, PERCUSION" + ds.attrs["platform"] = "HALO" + ds.attrs[ + "history" + ] = "The processing software is available at https://github.com/orcestra-campaign/hamp_processing" + ds.attrs["license"] = "CC-BY-4.0" + ds.attrs["featureType"] = "trajectoryProfile" + ds.attrs["references"] = "10.5194/amt-12-1815-2019, 10.5194/essd-13-5545-2021" + ds.attrs["keywords"] = "Cloud Radar, HALO, ORCESTRA, PERCUSION, Tropical Atlantic" + + ds.attrs.pop("Copywright", None) + ds.attrs.pop("Copywright_Owner", None) + ds.attrs.pop("Latitude", None) + ds.attrs.pop("Longitude", None) + ds.attrs.pop("Altitude", None) + ds.attrs.pop("institution", None) + ds.attrs.pop("location", None) + + return ds + + +def add_metadata_radiometer(ds, flight_id): + """ + Add metadata to the radiometer dataset. + + Parameters + ---------- + ds : xr.Dataset + Level1 radiometer dataset. + Returns + ------- + xr.Dataset + Radiometer dataset with metadata. + """ + + ds.attrs["flight_id"] = flight_id + ds.attrs["title"] = "Radiometer Data from the Halo Microwave Package (Level 2)" + ds.attrs["summary"] = ( + "This dataset contains measurements from the radiometers onboard the HALO aircraft during the ORCESTRA campaign. " + "The measurements are processed and quality controlled. The processing includes formatting the data, " + "adding georeference information, and filtering for altitudes above 4800 m and adding a land-sea mask." + ) + ds.attrs["creator_name"] = "Jakob Deutloff, Lukas Kluft, Clara Bayley" + ds.attrs[ + "creator_email" + ] = "jakob.deutloff@uni-hamburg.de, lukas.kluft@mpimet.mpg.de, clara.bayley@mpimet.mpg.de" + ds.attrs["project"] = "ORCESTRA, PERCUSION" + ds.attrs["platform"] = "HALO" + ds.attrs[ + "history" + ] = "The processing software is available at https://github.com/orcestra-campaign/hamp_processing" + ds.attrs["license"] = "CC-BY-4.0" + ds.attrs["featureType"] = "trajectoryProfile" + ds.attrs["references"] = "10.5194/amt-12-1815-2019, 10.5194/essd-13-5545-2021" + ds.attrs[ + "keywords" + ] = "Radiometer, Microwave, HALO, ORCESTRA, PERCUSION, Tropical Atlantic" + + ds.attrs.pop("Comment", None) + ds.attrs.pop("Radiometer_Location", None) + ds.attrs.pop("Station_Altitude", None) + ds.attrs.pop("Station_Latitude", None) + ds.attrs.pop("Station_Longitude", None) + ds.attrs.pop("Radiometer_Software", None) + ds.attrs.pop("Serial_Number", None) + + return ds + + +def add_metadata_iwv(ds, flight_id): + """ + Add metadata to the IWV dataset. + + Parameters + ---------- + ds : xr.Dataset + Level1 IWV dataset. + Returns + ------- + xr.Dataset + IWV dataset with metadata. + """ + + ds.attrs["flight_id"] = flight_id + ds.attrs[ + "title" + ] = "Integrated Water Vapor Retrieval from the K-Band and W-Band Microwave Radiometers" + ds.attrs["summary"] = ( + "This dataset contains retrievals of integrated water vapor (IWV) from the KV-band microwave radiometer onboard the HALO aircraft during the ORCESTRA campaign. " + "The retrieval is based on a linear regression model and should not be used for quantitative analysis. " + "The purpose of this data was to provide an estimate of IWV during the flights. The processing includes formatting the data, " + "adding georeference information, filtering for altitudes above 4800 m and adding a land-sea mask." + ) + ds.attrs["creator_name"] = "Jakob Deutloff, Lukas Kluft, Clara Bayley" + ds.attrs[ + "creator_email" + ] = "jakob.deutloff@uni-hamburg.de, lukas.kluft@mpimet.mpg.de, clara.bayley@mpimet.mpg.de" + ds.attrs["project"] = "ORCESTRA, PERCUSION" + ds.attrs["platform"] = "HALO" + ds.attrs[ + "history" + ] = "The processing software is available at https://github.com/orcestra-campaign/hamp_processing" + ds.attrs["license"] = "CC-BY-4.0" + ds.attrs["featureType"] = "trajectoryProfile" + ds.attrs["references"] = "10.5194/amt-7-4539-2014" + ds.attrs[ + "keywords" + ] = "Radiometer, Microwave, Integrated Water Vapor, Retrieval, HALO, ORCESTRA, PERCUSION, Tropical Atlantic" + + ds.attrs.pop("Comment", None) + ds.attrs.pop("Radiometer_Location", None) + ds.attrs.pop("Station_Altitude", None) + ds.attrs.pop("Station_Latitude", None) + ds.attrs.pop("Station_Longitude", None) + ds.attrs.pop("Radiometer_Software", None) + ds.attrs.pop("Serial_Number", None) + ds.attrs.pop("Host-PC_Software", None) + ds.attrs.pop("Radiometer_System", None) + + return ds + + +def cleanup_radiometers(ds): + """Remove unnecessary variables from radiometer dataset. + + Parameters + ---------- + ds : xr.Dataset + Level1 radiometer dataset. + + Returns + ------- + xr.Dataset + Radiometer dataset without unnecessary variables. + """ + + return ds.drop_vars(["Rad_ID", "IntSampCnt", "file_code", "RSFactor", "Min_TBs"]) + + +def cleanup_iwv(ds): + """Remove unnecessary variables from IWV dataset. + + Parameters + ---------- + ds : xr.Dataset + Level1 IWV dataset. + + Returns + ------- + xr.Dataset + IWV dataset without unnecessary variables. + """ + + return ds.drop_vars( + ["Rad_ID", "IntSampCnt", "file_code", "RSFactor", "Max_IWV", "Min_IWV"] + )