ocean-uhh · tillmrtz · Feb 5, 2026 · Feb 5, 2026 · eleanorfrajka · Feb 12, 2026
diff --git a/seagliderOG1/config/OG1_var_names.yaml b/seagliderOG1/config/OG1_var_names.yaml
@@ -2,7 +2,7 @@ latitude: LATITUDE
 longitude: LONGITUDE
 gps_lat: LATITUDE_GPS
 gps_lon: LONGITUDE_GPS
-gps_time: TIME_GPS
+gps_time: TIME_GPS #### It exists only log_gps_time in the data files depending on gps_info as index. Same for lat and lon.
 ctd_time: TIME
 time: TIME
 eng_pitchAng: PITCH
@@ -30,6 +30,7 @@ salinity: PSAL
 #conservative_temperature: CT
 ctd_density: DENSITY
 profile_index: PROFILE_NUMBER
+divenum: DIVE_NUMBER
 platform: PLATFORM_MODEL
 source: PLATFORM_SERIAL_NUMBER
 vert_speed: GLIDER_VERT_VELO_MODEL
@@ -44,6 +45,10 @@ eng_wlbb2f_blueRef: BBP470_REF
 eng_wlbb2f_blueCount: BBP470
 eng_wlbb2f_redCount: BBP700
 eng_wlbb2f_redRef: BBP700_REF
+eng_wlbb2f_VFtemp: BBP_VFTEMP ### What is the correct variable name for this?
+eng_tempFreq: TEMP_FREQ ### What is the correct variable name for this?
+eng_sbe43_O2Freq: O2_FREQ ### What is the correct variable name for this?
+eng_condFreq: COND_FREQ ### What is the correct variable name for this?
 particulate_backscatter: BBP700
 backscatter_scaled: BBP700
 backscatter_raw: RBBP700
@@ -73,6 +78,7 @@ north_displacement: NORTH_DISPLACEMENT
 eng_pitchCtl: PITCH_CTL
 eng_rollCtl: ROLL_CTL
 eng_vbdCC: VBD_CC
+buoyancy: BUOYANCY
 
 sbe41: Seabird unpumped CTD
 wlbb2f: Wetlabs BB2FL-VMT
diff --git a/seagliderOG1/config/OG1_vocab_attrs.yaml b/seagliderOG1/config/OG1_vocab_attrs.yaml
@@ -16,16 +16,16 @@ ICES_CODE:
 PLATFORM_MAKER:
   long_name: glider manufacturer
   platform_maker_vocabulary: https://vocab.nerc.ac.uk/collection/B75/current/ORG01077/
-DEPLOYMENT_TIME:
+DEPLOYMENT_TIME: #### What dimension to use here? N_MEASUREMENTS or nothing?
   long_name: date of deployment
   standard_name: time
   calendar: gregorian
   units: seconds since 1970-01-01T00:00:00Z
-DEPLOYMENT_LATITUDE:
+DEPLOYMENT_LATITUDE: #### What dimension to use here? N_MEASUREMENTS or nothing?
   long_name: latitude of deployment
   standard_name: latitude
   units: degrees_north
-DEPLOYMENT_LONGITUDE:
+DEPLOYMENT_LONGITUDE: #### What dimension to use here? N_MEASUREMENTS or nothing?
   long_name: longitude of deployment
   standard_name: longitude
   units: degrees_east
@@ -352,9 +352,15 @@ DENSITY:
   valid_min: 1000
   valid_max: 1040
   uri: https://vocab.nerc.ac.uk/collection/OG1/current/DENSITY/
+BUOYANCY:
+  long_name: Buoyancy of vehicle, corrected for compression effects
+  units: g
 PROFILE_NUMBER:
   long_name: profile index
   units: '1'
+DIVE_NUMBER:
+  long_name: dive number
+  units: '1'
 PHASE:
   long_name: behavior of the glider at sea
   comment: This is based only on splitting each dive cycle into the period before
@@ -393,6 +399,22 @@ BBP470:
 #    (BLUE WAVELENGTHS) Version 1-05 Document Control Number 1341-00540 2014-05-28.
 ##    Observatories Initiative document DATA PRODUCT SPECIFICATION FOR OPTICAL BACKSCATTER
 #    Downloaded from https://oceanobservatories.org/wp-content/uploads/2015/10/1341-00540_Data_Product_SPEC_FLUBSCT_OOI.pdf
+BBP_VFTEMP: ### eng_wlbb2f_VFtemp
+  long_name: What is this variable?
+  units: Celsius
+  observation_type: observed
+TEMP_FREQ: ### eng_tempFreq
+  long_name: What is this variable?
+  units: Hz
+  observation_type: observed
+O2_FREQ: ### eng_sbe43_O2Freq
+  long_name: What is this variable?
+  units: Hz
+  observation_type: observed
+COND_FREQ: ### eng_condFreq
+  long_name: What is this variable?
+  units: Hz
+  observation_type: observed
 BBP700_REF:
   long_name: What is this - maybe dark counts. red.
   observation_type: observed

diff --git a/seagliderOG1/convertOG1.py b/seagliderOG1/convertOG1.py
@@ -23,10 +23,10 @@ def convert_to_OG1(
     contrib_to_append: dict[str, str] | None = None,
 ) -> tuple[xr.Dataset, list[str]]:
     """Convert Seaglider basestation datasets to OG1 format.
-    
+
     Processes a list of xarray datasets or a single xarray dataset, converts them to OG1 format,
-    concatenates the datasets, sorts by time, and applies attributes. Main conversion function that 
-    processes basestation datasets, applies OG1 standardization, concatenates multiple datasets, 
+    concatenates the datasets, sorts by time, and applies attributes. Main conversion function that
+    processes basestation datasets, applies OG1 standardization, concatenates multiple datasets,
     and adds global attributes.
 
     Parameters
@@ -253,7 +253,7 @@ def process_dataset(ds1_base: xr.Dataset, firstrun: bool = False) -> tuple[
     # Must be after split_ds
     ds_new = standardise_OG10(ds_sgdatapoint, firstrun)
 
-    # Add new variables to the dataset (GPS, divenum, PROFILE_NUMBER, PHASE)
+    # Add new variables to the dataset (GPS, DIVE_NUMBER, PROFILE_NUMBER, PHASE)
     # -----------------------------------------------------------------------
     # Add the gps_info to the dataset
     # Must be after split_by_unique_dims and after rename_dimensions
@@ -270,9 +270,11 @@ def process_dataset(ds1_base: xr.Dataset, firstrun: bool = False) -> tuple[
     ds_sensor = tools.gather_sensor_info(ds_other, ds_sgcal, firstrun)
     ds_new = tools.add_sensor_to_dataset(ds_new, ds_sensor, ds_sgcal, firstrun)
 
-    # Remove variables matching vocabularies.vars_to_remove and also 'TIME_GPS'
     # TIME_GPS throws errors on saving as netCDF, possibly because of the format of the NaNs?
-    vars_to_remove = vocabularies.vars_to_remove + ["TIME_GPS"]
+    # To avoid problems, reset the dtype of TIME_GPS
+    ds_new['TIME_GPS'] = ds_new['TIME_GPS'].astype('datetime64[ns]')
+
+    vars_to_remove = vocabularies.vars_to_remove #+ ["TIME_GPS"]
     ds_new = ds_new.drop_vars(
         [var for var in vars_to_remove if var in ds_new.variables]
     )
@@ -286,7 +288,7 @@ def standardise_OG10(
     unit_format: dict[str, str] = vocabularies.unit_str_format,
 ) -> xr.Dataset:
     """Standardize the dataset to OG1 format by renaming dimensions, variables, and assigning attributes.
-    
+
     Applies OG1 vocabulary for variable names, units, and attributes.
     Performs unit conversions and QC flag standardization.
 
@@ -297,7 +299,7 @@ def standardise_OG10(
     firstrun : bool, optional
         Indicates whether this is the first run of the standardization process. Default is False.
     unit_format : dict of str, optional
-        A dictionary mapping unit strings to their standardized format. 
+        A dictionary mapping unit strings to their standardized format.
         Default is vocabularies.unit_str_format.
 
     Returns
@@ -492,7 +494,7 @@ def add_gps_info_to_dataset(ds: xr.Dataset, gps_ds: xr.Dataset) -> xr.Dataset:
 ##-----------------------------------------------------------------------------------------
 def update_dataset_attributes(ds: xr.Dataset, contrib_to_append: dict[str, str] | None) -> dict[str, str]:
     """Update the attributes of the dataset based on the provided attribute input.
-    
+
     Processes contributor information, time attributes, and applies OG1
     global attribute vocabulary in the correct order.
 
@@ -557,7 +559,7 @@ def update_dataset_attributes(ds: xr.Dataset, contrib_to_append: dict[str, str]
 
 def get_contributors(ds: xr.Dataset, values_to_append: dict[str, str] | None = None) -> dict[str, str]:
     """Extract and format contributor information for OG1 attributes.
-    
+
     Processes creator and contributor information from dataset attributes,
     formats them as comma-separated strings, and handles institution mapping.
 
@@ -583,9 +585,9 @@ def create_or_append_list(existing_list, new_item):
 
     def list_to_comma_separated_string(lst):
         """Convert a list of strings to a single string with values separated by commas.
-        
+
         Replace any commas present in list elements with hyphens.
-        
+
         Parameters
         ----------
         lst : list
@@ -752,7 +754,7 @@ def list_to_comma_separated_string(lst):
 
 def get_time_attributes(ds: xr.Dataset) -> dict[str, str]:
     """Extract and clean time-related attributes from the dataset.
-    
+
     Converts various time formats to OG1-standard YYYYMMDDTHHMMSS format
     and adds date_modified timestamp.
 
@@ -795,7 +797,7 @@ def get_time_attributes(ds: xr.Dataset) -> dict[str, str]:
 
 def extract_attr_to_keep(ds1: xr.Dataset, attr_as_is: list[str] = vocabularies.global_attrs["attr_as_is"]) -> dict[str, str]:
     """Extract attributes to retain unchanged.
-    
+
     Parameters
     ----------
     ds1 : xarray.Dataset
@@ -823,7 +825,7 @@ def extract_attr_to_rename(
     ds1: xr.Dataset, attr_to_rename: dict[str, str] = vocabularies.global_attrs["attr_to_rename"]
 ) -> dict[str, str]:
     """Extract and rename attributes according to OG1 vocabulary.
-    
+
     Parameters
     ----------
     ds1 : xarray.Dataset

diff --git a/seagliderOG1/readers.py b/seagliderOG1/readers.py
@@ -56,7 +56,7 @@ def load_sample_dataset(dataset_name: str = "p0330015_20100906.nc") -> xr.Datase
     Parameters
     ----------
     dataset_name : str, optional
-        Name of the sample dataset to load. Must be one of the available 
+        Name of the sample dataset to load. Must be one of the available
         datasets in the registry. Default is "p0330015_20100906.nc".
 
     Returns
@@ -80,18 +80,18 @@ def load_sample_dataset(dataset_name: str = "p0330015_20100906.nc") -> xr.Datase
 
 def _validate_filename(filename: str) -> bool:
     """Validate if filename matches expected Seaglider basestation patterns.
-    
+
     Validates against two expected patterns:
     1. p1234567.nc (7 digits after 'p')
     2. p0420100_20100903.nc (7 digits, underscore, 8 digits)
-    
+
     Also validates that both glider serial number and profile number are positive.
-    
+
     Parameters
     ----------
     filename : str
         The filename to validate.
-        
+
     Returns
     -------
     bool
@@ -115,15 +115,15 @@ def _validate_filename(filename: str) -> bool:
 
 def _profnum_from_filename(filename: str) -> int:
     """Extract the profile/dive number from a Seaglider filename.
-    
+
     Extracts characters 4-7 (0-indexed) which represent the dive cycle number
     in filenames like p0420001.nc or p0420001_20100903.nc.
-    
+
     Parameters
     ----------
     filename : str
         Seaglider filename to parse.
-        
+
     Returns
     -------
     int
@@ -135,15 +135,15 @@ def _profnum_from_filename(filename: str) -> int:
 
 def _glider_sn_from_filename(filename: str) -> int:
     """Extract the glider serial number from a Seaglider filename.
-    
+
     Extracts characters 1-3 (0-indexed) which represent the 3-digit glider
     serial number in filenames like p0420001.nc.
-    
+
     Parameters
     ----------
     filename : str
         Seaglider filename to parse.
-        
+
     Returns
     -------
     int
@@ -155,15 +155,15 @@ def _glider_sn_from_filename(filename: str) -> int:
 
 def filter_files_by_profile(file_list: list[str], start_profile: int | None = None, end_profile: int | None = None) -> list[str]:
     """Filter files by profile/dive number range.
-    
+
     Filters Seaglider basestation files based on profile number range.
-    Expects filenames of the form pXXXYYYY.nc, where XXX is the 3-digit 
+    Expects filenames of the form pXXXYYYY.nc, where XXX is the 3-digit
     glider serial number and YYYY is the 4-digit dive cycle number.
-    
+
     Example: p0420001.nc represents glider 042, dive 0001.
-    
+
     Note: Input file_list does not need to be sorted.
-    
+
     Parameters
     ----------
     file_list : list of str
@@ -172,7 +172,7 @@ def filter_files_by_profile(file_list: list[str], start_profile: int | None = No
         Minimum profile number (inclusive).
     end_profile : int, optional
         Maximum profile number (inclusive).
-        
+
     Returns
     -------
     list of str
@@ -181,10 +181,7 @@ def filter_files_by_profile(file_list: list[str], start_profile: int | None = No
     """
     filtered_files = []
 
-    for file in file_list:
-        if not _validate_filename(file):
-            file_list.remove(file)
-            # _log.warning(f"Skipping file {file} as it does not have the expected format.")
+    file_list = [f for f in file_list if _validate_filename(f)]
 
     #    divenum_values = [int(file[4:8]) for file in file_list]
 
@@ -209,15 +206,15 @@ def filter_files_by_profile(file_list: list[str], start_profile: int | None = No
 
 def load_first_basestation_file(source: str) -> xr.Dataset:
     """Load the first (alphabetically) basestation file from a source.
-    
+
     Useful for quick examination of data structure and metadata from
     a Seaglider mission without loading all files.
-    
+
     Parameters
     ----------
     source : str
         URL or local directory path containing NetCDF files.
-        
+
     Returns
     -------
     xarray.Dataset
@@ -233,10 +230,10 @@ def load_first_basestation_file(source: str) -> xr.Dataset:
 
 def load_basestation_files(source: str, start_profile: int | None = None, end_profile: int | None = None) -> list[xr.Dataset]:
     """Load multiple Seaglider basestation files with optional profile filtering.
-    
+
     Main function for loading Seaglider data from either online repositories
     or local directories. Supports filtering by dive/profile number range.
-    
+
     Parameters
     ----------
     source : str
@@ -245,7 +242,7 @@ def load_basestation_files(source: str, start_profile: int | None = None, end_pr
         Minimum profile number to load.
     end_profile : int, optional
         Maximum profile number to load.
-        
+
     Returns
     -------
     list of xarray.Dataset
@@ -273,10 +270,10 @@ def list_files(
     source: str, registry_loc: str = "seagliderOG1", registry_name: str = "seaglider_registry.txt"
 ) -> list[str]:
     """List NetCDF files from a source (URL or local directory).
-    
+
     For online sources, scrapes directory listings using BeautifulSoup.
     For local sources, lists files in the directory.
-    
+
     Parameters
     ----------
     source : str
@@ -285,12 +282,12 @@ def list_files(
         Legacy parameter, not currently used.
     registry_name : str, optional
         Legacy parameter, not currently used.
-        
+
     Returns
     -------
     list of str
         Sorted list of NetCDF filenames (.nc files only).
-        
+
     Raises
     ------
     ValueError