From 57b74e0100f3056a3101ed6c0f817be34b099364 Mon Sep 17 00:00:00 2001
From: breid-phys <81174535+breid-phys@users.noreply.github.com>
Date: Thu, 19 May 2022 16:41:40 -0300
Subject: [PATCH 01/19] added fast processing to rinex v3

---
 obs3.py | 719 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 719 insertions(+)
 create mode 100644 obs3.py

diff --git a/obs3.py b/obs3.py
new file mode 100644
index 0000000..d1f1637
--- /dev/null
+++ b/obs3.py
@@ -0,0 +1,719 @@
+from __future__ import annotations
+from pathlib import Path
+import numpy as np
+import logging
+from datetime import datetime, timedelta
+import io
+import xarray
+import typing as T
+
+try:
+    from pymap3d import ecef2geodetic
+except ImportError:
+    ecef2geodetic = None
+#
+from .rio import opener, rinexinfo
+from .common import determine_time_system, check_time_interval, check_unique_times
+
+"""https://github.com/mvglasow/satstat/wiki/NMEA-IDs"""
+
+SBAS = 100  # offset for ID
+GLONASS = 37
+QZSS = 192
+BEIDOU = 0
+
+
+def rinexobs3(
+    fn: T.TextIO | Path,
+    use: set[str] = None,
+    tlim: tuple[datetime, datetime] = None,
+    useindicators: bool = False,
+    meas: list[str] = None,
+    verbose: bool = False,
+    *,
+    fast: bool = False,
+    interval: float | int | timedelta = None,
+) -> xarray.Dataset:
+    """
+    process RINEX 3 OBS data
+
+    fn: RINEX OBS 3 filename
+    use: 'G'  or ['G', 'R'] or similar
+
+    tlim: read between these time bounds
+    useindicators: SSI, LLI are output
+    meas:  'L1C'  or  ['L1C', 'C1C'] or similar
+
+    fast: Still double-reading file to get times
+          Uses # OF SATELLITES from header to try and size nparray, falls back
+          to Nsvsys if missing
+          if false, uses old _epoch method
+
+    interval: allows decimating file read by time e.g. every 5 seconds.
+                Useful to speed up reading of very large RINEX files
+    """
+
+    interval = check_time_interval(interval)
+
+    if isinstance(use, str):
+        use = {use}
+
+    if isinstance(meas, str):
+        meas = [meas]
+
+    if not meas or not meas[0].strip():
+        meas = None
+   
+    """
+    Nsvsys may need updating as GNSS systems grow.
+    Let us know if you needed to change them.
+
+    Beidou is 35 max
+    Galileo is 36 max
+    """
+    Nsvsys = 36
+        
+    # %% allocate
+    if fast:
+        times = obstime3(fn)
+    else:
+        data = xarray.Dataset({}, coords={"time": [], "sv": []})
+        
+    if tlim is not None and not isinstance(tlim[0], datetime):
+        raise TypeError("time bounds are specified as datetime.datetime")
+
+    last_epoch = None
+    # %% loop
+    with opener(fn) as f:
+        
+
+        if fast:
+            hdr = obsheader3(f,use)
+            
+            obl = []
+            for sk in hdr["fields"]:
+                obl=obl+hdr["fields"][sk]
+            obl = np.unique(np.array(obl))
+            obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
+            
+            
+            if meas is not None:
+                obl = obl[np.any([np.char.find(obl,j)==0 for i, j  in enumerate(meas)],0)]
+                
+            Nt = times.size
+            Npages = len(obl)
+            if '# OF SATELLITES' in hdr.keys():
+                Nsv = int(hdr['# OF SATELLITES'])
+            else:
+                Nsv = Nsvsys * len(hdr['fields'])
+            
+            svl = np.tile('   ',Nsv)
+            
+            data = np.empty((Npages, Nt, Nsv))
+            data.fill(np.nan)
+            
+            if useindicators:
+                data_lli = np.full_like(data,np.nan)
+                data_ssi = np.full_like(data,np.nan)
+
+        else:
+            hdr = obsheader3(f, use, meas)
+
+
+        # %% process OBS file
+        time_offset = []
+        for ln in f:
+            if not ln.startswith(">"):  # end of file
+                break
+
+            try:
+                time = _timeobs(ln)
+            except ValueError:  # garbage between header and RINEX data
+                logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
+                continue
+
+            try:
+                time_offset.append(float(ln[41:56]))
+            except ValueError:
+                pass
+            # %% get SV indices
+            sv = []
+            raw = ""
+            # Number of visible satellites this time %i3  pg. A13
+            for _ in range(int(ln[33:35])):
+                ln = f.readline()
+                if use is None or ln[0] in use:
+                    sv.append(ln[:3])
+                    raw += ln[3:]
+
+            if tlim is not None:
+                if time < tlim[0]:
+                    continue
+                elif time > tlim[1]:
+                    break
+
+            if interval is not None:
+                if last_epoch is None:  # initialization
+                    last_epoch = time
+                else:
+                    if time - last_epoch < interval:
+                        continue
+                    else:
+                        last_epoch += interval
+
+            if verbose:
+                print(time, end="\r")
+            
+            if fast:
+                for k in sv:
+                    # update list of satellites
+                    if not k in svl:
+                        svl[np.argmax(svl=='   ')]=k
+                
+                darr = np.atleast_2d(
+                        np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
+                    )
+                
+                t = time==times
+
+                for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
+                    # satellite indices "si" to extract from this time's measurements
+                    si = [i for i, s in enumerate(sv) if s[0] in sk]
+                    if len(si) == 0:  # no SV of this system "sk" at this time
+                        continue
+                    
+                    gsv = np.array(sv)[si]
+                    isv = [i for i,s in enumerate(svl) if s in gsv]
+                    
+                    for i,j in enumerate(hdr['fields'][sk]):
+                        o = obl==j
+                        if not np.any(o):
+                            continue
+                        data[o,t,isv]=darr[si,i*3]    
+                        if useindicators:
+                            data_lli[o,t,isv]=darr[si,i*3+1]  
+                            data_ssi[o,t,isv]=darr[si,i*3+2]  
+            else:
+                # this time epoch is complete, assemble the data.
+                data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
+
+    if fast:
+       if '   ' in svl:
+           # remove blank satellites (if tlim used)
+           svl=svl[:np.argmax(svl=='   ')]
+       
+       svl, isv = np.unique(svl,return_index=True)
+           
+       data=data[:,:,isv]
+       if useindicators:
+           data_lli=data_lli[:,:,isv]
+           data_ssi=data_ssi[:,:,isv]
+       
+       obs = xarray.Dataset(coords={"sv": svl,"time":times})
+       for i, k in enumerate(obl):
+           if k is None:
+               continue
+           elif np.all(np.isnan(data[i, :, :])): # drop all nan datasets like tests expect
+               continue
+           obs[k] = (("time", "sv"), data[i, :, :])
+           if useindicators:
+               if k[0] == 'L':
+                   obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
+                   obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+               #elif k[0] == 'C': # only for code?
+               else:
+                   obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+                   
+       obs = obs.dropna(dim="sv", how="all")
+       obs = obs.dropna(dim="time", how="all")  # when tlim specified
+            
+       data=obs
+        
+        
+    # %% patch SV names in case of "G 7" => "G07"
+    data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
+    # %% other attributes
+    data.attrs["version"] = hdr["version"]
+
+    # Get interval from header or derive it from the data
+    if "interval" in hdr.keys():
+        data.attrs["interval"] = hdr["interval"]
+    elif "time" in data.coords.keys():
+        # median is robust against gaps
+        try:
+            data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s"))
+        except TypeError:
+            pass
+    else:
+        data.attrs["interval"] = np.nan
+
+    data.attrs["rinextype"] = "obs"
+    data.attrs["fast_processing"] = int(fast)  # bool is not allowed in NetCDF4
+    data.attrs["time_system"] = determine_time_system(hdr)
+    if isinstance(fn, Path):
+        data.attrs["filename"] = fn.name
+
+    if "position" in hdr.keys():
+        data.attrs["position"] = hdr["position"]
+        if ecef2geodetic is not None:
+            data.attrs["position_geodetic"] = hdr["position_geodetic"]
+
+    if time_offset:
+        data.attrs["time_offset"] = time_offset
+
+    if "RCV CLOCK OFFS APPL" in hdr.keys():
+        try:
+            data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"])
+        except ValueError:
+            pass
+
+    return data
+
+
+def _timeobs(ln: str) -> datetime:
+    """
+    convert time from RINEX 3 OBS text to datetime
+    """
+
+    if not ln.startswith("> "):  # pg. A13
+        raise ValueError('RINEX 3 line beginning "> " is not present')
+
+    return datetime(
+        int(ln[2:6]),
+        int(ln[7:9]),
+        int(ln[10:12]),
+        hour=int(ln[13:15]),
+        minute=int(ln[16:18]),
+        second=int(ln[19:21]),
+        microsecond=int(float(ln[19:29]) % 1 * 1000000),
+    )
+
+
+def obstime3(fn: T.TextIO | Path, verbose: bool = False) -> np.ndarray:
+    """
+    return all times in RINEX file
+    """
+
+    times = []
+
+    with opener(fn) as f:
+        for ln in f:
+            if ln.startswith("> "):
+                try:
+                    times.append(_timeobs(ln))
+                except (ValueError, IndexError):
+                    logging.debug(f"was not a time:\n{ln}")
+                    continue
+
+    times = np.asarray(times)
+
+    check_unique_times(times)
+
+    return times
+
+
+def _epoch(
+    data: xarray.Dataset,
+    raw: str,
+    hdr: dict[str, T.Any],
+    time: datetime,
+    sv: list[str],
+    useindicators: bool,
+    verbose: bool,
+) -> xarray.Dataset:
+    """
+    block processing of each epoch (time step)
+    """
+    darr = np.atleast_2d(
+        np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
+    )
+    #data = xarray.Dataset({}, coords={"time": [], "sv": []})
+    # %% assign data for each time step
+    for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
+        # satellite indices "si" to extract from this time's measurements
+        si = [i for i, s in enumerate(sv) if s[0] in sk]
+        if len(si) == 0:  # no SV of this system "sk" at this time
+            continue
+
+        # measurement indices "di" to extract at this time step
+        di = hdr["fields_ind"][sk]
+        garr = darr[si, :]
+        garr = garr[:, di]
+
+        gsv = np.array(sv)[si]
+
+        dsf: dict[str, tuple] = {}
+        for i, k in enumerate(hdr["fields"][sk]):
+            dsf[k] = (("time", "sv"), np.atleast_2d(garr[:, i * 3]))
+
+            if useindicators:
+                dsf = _indicators(dsf, k, garr[:, i * 3 + 1 : i * 3 + 3])
+
+        if verbose:
+            print(time, "\r", end="")
+
+        epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv})
+        if len(data) == 0:
+            data = epoch_data
+            #data = xarray.merge((data, epoch_data))
+        elif len(hdr["fields"]) == 1:  # one satellite system selected, faster to process
+            data = xarray.concat((data, epoch_data), dim="time")
+        else:  # general case, slower for different satellite systems all together
+            data = xarray.merge((data, epoch_data))
+
+    return data
+
+def rinexsystem3(
+    fn: T.TextIO | Path,
+    use: set[str] = None,
+    tlim: tuple[datetime, datetime] = None,
+    useindicators: bool = False,
+    meas: list[str] = None,
+    verbose: bool = False,
+    *,
+    fast: bool = False,
+    interval: float | int | timedelta = None,
+) -> xarray.Dataset:
+    """
+    process RINEX 3 OBS data
+
+    fn: RINEX OBS 3 filename
+    use: 'G'  or ['G', 'R'] or similar
+
+    tlim: read between these time bounds
+    useindicators: SSI, LLI are output
+    meas:  'L1C'  or  ['L1C', 'C1C'] or similar
+
+    fast:
+          TODO: FUTURE, not yet enabled for OBS3
+          speculative preallocation based on minimum SV assumption and file size.
+          Avoids double-reading file and more complicated linked lists.
+          Believed that Numpy array should be faster than lists anyway.
+          Reduce Nsvmin if error (let us know)
+
+    interval: allows decimating file read by time e.g. every 5 seconds.
+                Useful to speed up reading of very large RINEX files
+    """
+
+    interval = check_time_interval(interval)
+
+    if isinstance(use, str):
+        use = {use}
+
+    if isinstance(meas, str):
+        meas = [meas]
+
+    if not meas or not meas[0].strip():
+        meas = None
+    # %% allocate
+    times = obstime3(fn)
+    #data = xarray.Dataset({}, coords={"time": [], "sv": []})
+    if tlim is not None and not isinstance(tlim[0], datetime):
+        raise TypeError("time bounds are specified as datetime.datetime")
+
+    last_epoch = None
+    # %% loop
+    with opener(fn) as f:
+        
+        if fast:
+            hdr = obsheader3(f)
+        else:
+            hdr = obsheader3(f, use, meas)
+            
+        obl = []
+        for sk in hdr["fields"]:
+            obl=obl+hdr["fields"][sk]
+        obl = np.unique(np.array(obl))
+        obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
+        
+        
+        if meas is not None:
+            obl = obl[np.isin(obl,meas)]
+            
+        Nt = times.size
+        Npages = len(obl)
+        Nsv = int(hdr['# OF SATELLITES'])
+        
+        svl = np.tile('   ',Nsv)
+        
+        data = np.empty((Npages, Nt, Nsv))
+        data.fill(np.nan)
+        
+        if useindicators:
+            data_lli = np.full_like(data,np.nan)
+            data_ssi = np.full_like(data,np.nan)
+
+        # %% process OBS file
+        time_offset = []
+        for ln in f:
+            if not ln.startswith(">"):  # end of file
+                break
+
+            try:
+                time = _timeobs(ln)
+            except ValueError:  # garbage between header and RINEX data
+                logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
+                continue
+
+            try:
+                time_offset.append(float(ln[41:56]))
+            except ValueError:
+                pass
+            # %% get SV indices
+            sv = []
+            raw = ""
+            # Number of visible satellites this time %i3  pg. A13
+            for _ in range(int(ln[33:35])):
+                ln = f.readline()
+                if use is None or ln[0] in use:
+                    sv.append(ln[:3])
+                    raw += ln[3:]
+
+            if tlim is not None:
+                if time < tlim[0]:
+                    continue
+                elif time > tlim[1]:
+                    break
+
+            if interval is not None:
+                if last_epoch is None:  # initialization
+                    last_epoch = time
+                else:
+                    if time - last_epoch < interval:
+                        continue
+                    else:
+                        last_epoch += interval
+
+            if verbose:
+                print(time, end="\r")
+
+ 
+            for k in sv:
+                if not k in svl:
+                    svl[np.argmax(svl=='   ')]=k
+                    
+
+
+            darr = np.atleast_2d(
+                    np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
+                )
+            
+            t = time==times
+            
+            
+            for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
+                # satellite indices "si" to extract from this time's measurements
+                si = [i for i, s in enumerate(sv) if s[0] in sk]
+                if len(si) == 0:  # no SV of this system "sk" at this time
+                    continue
+                
+                gsv = np.array(sv)[si]
+                isv = [i for i,s in enumerate(svl) if s in gsv]
+                #isv = np.zeros_like(gsv,dtype=int)
+                
+                #for ii,jj in enumerate(gsv):
+                 #   isv[ii] = np.nonzero(svl==jj)
+                
+                
+                for i,j in enumerate(hdr['fields'][sk]):
+                    o = obl==j
+                    #for ii,jj in enumerate(gsv):
+                    #    data[o,t,svl==jj]=darr[si[ii],i*3]
+                    data[o,t,isv]=darr[si,i*3]    
+                    if useindicators:
+                        data_lli[o,t,isv]=darr[si,i*3+1]  
+                        data_ssi[o,t,isv]=darr[si,i*3+2]  
+                    
+                
+                
+                
+    
+    if '   ' in svl:
+        svl=svl[:np.argmax(svl=='   ')]
+    
+    svl, isv = np.unique(svl,return_index=True)
+        
+    data=data[:,:,isv]
+    if useindicators:
+        data_lli=data_lli[:,:,isv]
+        data_ssi=data_ssi[:,:,isv]
+    
+    obs = xarray.Dataset(coords={"time":times,"sv": svl})
+    for i, k in enumerate(obl):
+        # FIXME: for limited time span reads, this drops unused data variables
+        # if np.isnan(data[i, ...]).all():
+        #     continue
+        if k is None:
+            continue
+        obs[k] = (("time", "sv"), data[i, :, :])
+        if useindicators:
+            if k[0] == 'L':
+                obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
+                obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+            elif k[0] == 'C':
+                obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+        
+        
+    obs = obs.dropna(dim="sv", how="all")
+    obs = obs.dropna(dim="time", how="all")  # when tlim specified
+    
+    data=obs
+    # %% patch SV names in case of "G 7" => "G07"
+    data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
+    # %% other attributes
+    data.attrs["version"] = hdr["version"]
+
+    # Get interval from header or derive it from the data
+    if "interval" in hdr.keys():
+        data.attrs["interval"] = hdr["interval"]
+    elif "time" in data.coords.keys():
+        # median is robust against gaps
+        try:
+            data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s"))
+        except TypeError:
+            pass
+    else:
+        data.attrs["interval"] = np.nan
+
+    data.attrs["rinextype"] = "obs"
+    data.attrs["fast_processing"] = 0  # bool is not allowed in NetCDF4
+    data.attrs["time_system"] = determine_time_system(hdr)
+    if isinstance(fn, Path):
+        data.attrs["filename"] = fn.name
+
+    if "position" in hdr.keys():
+        data.attrs["position"] = hdr["position"]
+        if ecef2geodetic is not None:
+            data.attrs["position_geodetic"] = hdr["position_geodetic"]
+
+    if time_offset:
+        data.attrs["time_offset"] = time_offset
+
+    if "RCV CLOCK OFFS APPL" in hdr.keys():
+        try:
+            data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"])
+        except ValueError:
+            pass
+
+    return data
+
+
+def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]:
+    """
+    handle LLI (loss of lock) and SSI (signal strength)
+    """
+    if k.startswith(("L1", "L2")):
+        d[k + "lli"] = (("time", "sv"), np.atleast_2d(arr[:, 0]))
+
+    d[k + "ssi"] = (("time", "sv"), np.atleast_2d(arr[:, 1]))
+
+    return d
+
+
+def obsheader3(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dict[str, T.Any]:
+    """
+    get RINEX 3 OBS types, for each system type
+    optionally, select system type and/or measurement type to greatly
+    speed reading and save memory (RAM, disk)
+    """
+    if isinstance(f, (str, Path)):
+        with opener(f, header=True) as h:
+            return obsheader3(h, use, meas)
+
+    fields = {}
+    Fmax = 0
+
+    # %% first line
+    hdr = rinexinfo(f)
+
+    for ln in f:
+        if "END OF HEADER" in ln:
+            break
+
+        hd = ln[60:80]
+        c = ln[:60]
+        if "SYS / # / OBS TYPES" in hd:
+            k = c[0]
+            fields[k] = c[6:60].split()
+            N = int(c[3:6])
+            # %% maximum number of fields in a file, to allow fast Numpy parse.
+            Fmax = max(N, Fmax)
+
+            n = N - 13
+            while n > 0:  # Rinex 3.03, pg. A6, A7
+                ln = f.readline()
+                assert "SYS / # / OBS TYPES" in ln[60:]
+                fields[k] += ln[6:60].split()
+                n -= 13
+
+            assert len(fields[k]) == N
+
+            continue
+
+        if hd.strip() not in hdr:  # Header label
+            hdr[hd.strip()] = c  # don't strip for fixed-width parsers
+            # string with info
+        else:  # concatenate to the existing string
+            hdr[hd.strip()] += " " + c
+
+    # %% list with x,y,z cartesian (OPTIONAL)
+    # Rinex 3.03, pg. A6, Table A2
+    try:
+        # some RINEX files have bad headers with mulitple APPROX POSITION XYZ.
+        # we choose to use the first such header.
+        hdr["position"] = [float(j) for j in hdr["APPROX POSITION XYZ"].split()][:3]
+        if ecef2geodetic is not None and len(hdr["position"]) == 3:
+            hdr["position_geodetic"] = ecef2geodetic(*hdr["position"])
+    except (KeyError, ValueError):
+        pass
+    # %% time
+    try:
+        t0s = hdr["TIME OF FIRST OBS"]
+        # NOTE: must do second=int(float()) due to non-conforming files
+        hdr["t0"] = datetime(
+            year=int(t0s[:6]),
+            month=int(t0s[6:12]),
+            day=int(t0s[12:18]),
+            hour=int(t0s[18:24]),
+            minute=int(t0s[24:30]),
+            second=int(float(t0s[30:36])),
+            microsecond=int(float(t0s[30:43]) % 1 * 1000000),
+        )
+    except (KeyError, ValueError):
+        pass
+
+    try:
+        hdr["interval"] = float(hdr["INTERVAL"][:10])
+    except (KeyError, ValueError):
+        pass
+    # %% select specific satellite systems only (optional)
+    if use:
+        if not set(fields.keys()).intersection(use):
+            raise KeyError(f"system type {use} not found in RINEX file")
+
+        fields = {k: fields[k] for k in use if k in fields}
+
+    # perhaps this could be done more efficiently, but it's probably low impact on overall program.
+    # simple set and frozenset operations do NOT preserve order, which would completely mess up reading!
+    sysind: dict[str, T.Any] = {}
+    if isinstance(meas, (tuple, list, np.ndarray)):
+        for sk in fields:  # iterate over each system
+            # ind = np.isin(fields[sk], meas)  # boolean vector
+            ind = np.zeros(len(fields[sk]), dtype=bool)
+            for m in meas:
+                for i, field in enumerate(fields[sk]):
+                    if field.startswith(m):
+                        ind[i] = True
+
+            fields[sk] = np.array(fields[sk])[ind].tolist()
+            sysind[sk] = np.empty(Fmax * 3, dtype=bool)  # *3 due to LLI, SSI
+            for j, i in enumerate(ind):
+                sysind[sk][j * 3 : j * 3 + 3] = i
+    else:
+        sysind = {k: np.s_[:] for k in fields}
+
+    hdr["fields"] = fields
+    hdr["fields_ind"] = sysind
+    hdr["Fmax"] = Fmax
+
+    return hdr

From a429c12b346f10ee89477f2f8918313e8f23c8a2 Mon Sep 17 00:00:00 2001
From: breid-phys <81174535+breid-phys@users.noreply.github.com>
Date: Thu, 19 May 2022 16:43:36 -0300
Subject: [PATCH 02/19] Update test_rinex.py

Now sets flag correctly
---
 src/georinex/tests/test_rinex.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/georinex/tests/test_rinex.py b/src/georinex/tests/test_rinex.py
index 74fd7e5..1d8c273 100644
--- a/src/georinex/tests/test_rinex.py
+++ b/src/georinex/tests/test_rinex.py
@@ -61,7 +61,7 @@ def test_minimal(tmp_path, filename):
         if int(dat.version) == 2:
             assert dat.fast_processing
         elif int(dat.version) == 3:
-            assert not dat.fast_processing  # FIXME: update when OBS3 fast processing is added.
+            assert dat.fast_processing 
 
 
 def test_dont_care_file_extension():

From 1e025c15069a373708a35cf711fb3d64999de3d0 Mon Sep 17 00:00:00 2001
From: breid-phys <81174535+breid-phys@users.noreply.github.com>
Date: Thu, 26 May 2022 15:18:35 -0300
Subject: [PATCH 03/19] moved file to correct place

---
 src/georinex/obs3.py | 376 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 360 insertions(+), 16 deletions(-)

diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index 130940b..d1f1637 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -44,12 +44,10 @@ def rinexobs3(
     useindicators: SSI, LLI are output
     meas:  'L1C'  or  ['L1C', 'C1C'] or similar
 
-    fast:
-          TODO: FUTURE, not yet enabled for OBS3
-          speculative preallocation based on minimum SV assumption and file size.
-          Avoids double-reading file and more complicated linked lists.
-          Believed that Numpy array should be faster than lists anyway.
-          Reduce Nsvmin if error (let us know)
+    fast: Still double-reading file to get times
+          Uses # OF SATELLITES from header to try and size nparray, falls back
+          to Nsvsys if missing
+          if false, uses old _epoch method
 
     interval: allows decimating file read by time e.g. every 5 seconds.
                 Useful to speed up reading of very large RINEX files
@@ -65,16 +63,62 @@ def rinexobs3(
 
     if not meas or not meas[0].strip():
         meas = None
+   
+    """
+    Nsvsys may need updating as GNSS systems grow.
+    Let us know if you needed to change them.
+
+    Beidou is 35 max
+    Galileo is 36 max
+    """
+    Nsvsys = 36
+        
     # %% allocate
-    # times = obstime3(fn)
-    data = xarray.Dataset({}, coords={"time": [], "sv": []})
+    if fast:
+        times = obstime3(fn)
+    else:
+        data = xarray.Dataset({}, coords={"time": [], "sv": []})
+        
     if tlim is not None and not isinstance(tlim[0], datetime):
         raise TypeError("time bounds are specified as datetime.datetime")
 
     last_epoch = None
     # %% loop
     with opener(fn) as f:
-        hdr = obsheader3(f, use, meas)
+        
+
+        if fast:
+            hdr = obsheader3(f,use)
+            
+            obl = []
+            for sk in hdr["fields"]:
+                obl=obl+hdr["fields"][sk]
+            obl = np.unique(np.array(obl))
+            obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
+            
+            
+            if meas is not None:
+                obl = obl[np.any([np.char.find(obl,j)==0 for i, j  in enumerate(meas)],0)]
+                
+            Nt = times.size
+            Npages = len(obl)
+            if '# OF SATELLITES' in hdr.keys():
+                Nsv = int(hdr['# OF SATELLITES'])
+            else:
+                Nsv = Nsvsys * len(hdr['fields'])
+            
+            svl = np.tile('   ',Nsv)
+            
+            data = np.empty((Npages, Nt, Nsv))
+            data.fill(np.nan)
+            
+            if useindicators:
+                data_lli = np.full_like(data,np.nan)
+                data_ssi = np.full_like(data,np.nan)
+
+        else:
+            hdr = obsheader3(f, use, meas)
+
 
         # %% process OBS file
         time_offset = []
@@ -98,8 +142,9 @@ def rinexobs3(
             # Number of visible satellites this time %i3  pg. A13
             for _ in range(int(ln[33:35])):
                 ln = f.readline()
-                sv.append(ln[:3])
-                raw += ln[3:]
+                if use is None or ln[0] in use:
+                    sv.append(ln[:3])
+                    raw += ln[3:]
 
             if tlim is not None:
                 if time < tlim[0]:
@@ -118,10 +163,73 @@ def rinexobs3(
 
             if verbose:
                 print(time, end="\r")
-
-            # this time epoch is complete, assemble the data.
-            data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
-
+            
+            if fast:
+                for k in sv:
+                    # update list of satellites
+                    if not k in svl:
+                        svl[np.argmax(svl=='   ')]=k
+                
+                darr = np.atleast_2d(
+                        np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
+                    )
+                
+                t = time==times
+
+                for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
+                    # satellite indices "si" to extract from this time's measurements
+                    si = [i for i, s in enumerate(sv) if s[0] in sk]
+                    if len(si) == 0:  # no SV of this system "sk" at this time
+                        continue
+                    
+                    gsv = np.array(sv)[si]
+                    isv = [i for i,s in enumerate(svl) if s in gsv]
+                    
+                    for i,j in enumerate(hdr['fields'][sk]):
+                        o = obl==j
+                        if not np.any(o):
+                            continue
+                        data[o,t,isv]=darr[si,i*3]    
+                        if useindicators:
+                            data_lli[o,t,isv]=darr[si,i*3+1]  
+                            data_ssi[o,t,isv]=darr[si,i*3+2]  
+            else:
+                # this time epoch is complete, assemble the data.
+                data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
+
+    if fast:
+       if '   ' in svl:
+           # remove blank satellites (if tlim used)
+           svl=svl[:np.argmax(svl=='   ')]
+       
+       svl, isv = np.unique(svl,return_index=True)
+           
+       data=data[:,:,isv]
+       if useindicators:
+           data_lli=data_lli[:,:,isv]
+           data_ssi=data_ssi[:,:,isv]
+       
+       obs = xarray.Dataset(coords={"sv": svl,"time":times})
+       for i, k in enumerate(obl):
+           if k is None:
+               continue
+           elif np.all(np.isnan(data[i, :, :])): # drop all nan datasets like tests expect
+               continue
+           obs[k] = (("time", "sv"), data[i, :, :])
+           if useindicators:
+               if k[0] == 'L':
+                   obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
+                   obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+               #elif k[0] == 'C': # only for code?
+               else:
+                   obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+                   
+       obs = obs.dropna(dim="sv", how="all")
+       obs = obs.dropna(dim="time", how="all")  # when tlim specified
+            
+       data=obs
+        
+        
     # %% patch SV names in case of "G 7" => "G07"
     data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
     # %% other attributes
@@ -140,7 +248,7 @@ def rinexobs3(
         data.attrs["interval"] = np.nan
 
     data.attrs["rinextype"] = "obs"
-    data.attrs["fast_processing"] = 0  # bool is not allowed in NetCDF4
+    data.attrs["fast_processing"] = int(fast)  # bool is not allowed in NetCDF4
     data.attrs["time_system"] = determine_time_system(hdr)
     if isinstance(fn, Path):
         data.attrs["filename"] = fn.name
@@ -219,6 +327,7 @@ def _epoch(
     darr = np.atleast_2d(
         np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
     )
+    #data = xarray.Dataset({}, coords={"time": [], "sv": []})
     # %% assign data for each time step
     for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
         # satellite indices "si" to extract from this time's measurements
@@ -246,6 +355,7 @@ def _epoch(
         epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv})
         if len(data) == 0:
             data = epoch_data
+            #data = xarray.merge((data, epoch_data))
         elif len(hdr["fields"]) == 1:  # one satellite system selected, faster to process
             data = xarray.concat((data, epoch_data), dim="time")
         else:  # general case, slower for different satellite systems all together
@@ -253,6 +363,240 @@ def _epoch(
 
     return data
 
+def rinexsystem3(
+    fn: T.TextIO | Path,
+    use: set[str] = None,
+    tlim: tuple[datetime, datetime] = None,
+    useindicators: bool = False,
+    meas: list[str] = None,
+    verbose: bool = False,
+    *,
+    fast: bool = False,
+    interval: float | int | timedelta = None,
+) -> xarray.Dataset:
+    """
+    process RINEX 3 OBS data
+
+    fn: RINEX OBS 3 filename
+    use: 'G'  or ['G', 'R'] or similar
+
+    tlim: read between these time bounds
+    useindicators: SSI, LLI are output
+    meas:  'L1C'  or  ['L1C', 'C1C'] or similar
+
+    fast:
+          TODO: FUTURE, not yet enabled for OBS3
+          speculative preallocation based on minimum SV assumption and file size.
+          Avoids double-reading file and more complicated linked lists.
+          Believed that Numpy array should be faster than lists anyway.
+          Reduce Nsvmin if error (let us know)
+
+    interval: allows decimating file read by time e.g. every 5 seconds.
+                Useful to speed up reading of very large RINEX files
+    """
+
+    interval = check_time_interval(interval)
+
+    if isinstance(use, str):
+        use = {use}
+
+    if isinstance(meas, str):
+        meas = [meas]
+
+    if not meas or not meas[0].strip():
+        meas = None
+    # %% allocate
+    times = obstime3(fn)
+    #data = xarray.Dataset({}, coords={"time": [], "sv": []})
+    if tlim is not None and not isinstance(tlim[0], datetime):
+        raise TypeError("time bounds are specified as datetime.datetime")
+
+    last_epoch = None
+    # %% loop
+    with opener(fn) as f:
+        
+        if fast:
+            hdr = obsheader3(f)
+        else:
+            hdr = obsheader3(f, use, meas)
+            
+        obl = []
+        for sk in hdr["fields"]:
+            obl=obl+hdr["fields"][sk]
+        obl = np.unique(np.array(obl))
+        obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
+        
+        
+        if meas is not None:
+            obl = obl[np.isin(obl,meas)]
+            
+        Nt = times.size
+        Npages = len(obl)
+        Nsv = int(hdr['# OF SATELLITES'])
+        
+        svl = np.tile('   ',Nsv)
+        
+        data = np.empty((Npages, Nt, Nsv))
+        data.fill(np.nan)
+        
+        if useindicators:
+            data_lli = np.full_like(data,np.nan)
+            data_ssi = np.full_like(data,np.nan)
+
+        # %% process OBS file
+        time_offset = []
+        for ln in f:
+            if not ln.startswith(">"):  # end of file
+                break
+
+            try:
+                time = _timeobs(ln)
+            except ValueError:  # garbage between header and RINEX data
+                logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
+                continue
+
+            try:
+                time_offset.append(float(ln[41:56]))
+            except ValueError:
+                pass
+            # %% get SV indices
+            sv = []
+            raw = ""
+            # Number of visible satellites this time %i3  pg. A13
+            for _ in range(int(ln[33:35])):
+                ln = f.readline()
+                if use is None or ln[0] in use:
+                    sv.append(ln[:3])
+                    raw += ln[3:]
+
+            if tlim is not None:
+                if time < tlim[0]:
+                    continue
+                elif time > tlim[1]:
+                    break
+
+            if interval is not None:
+                if last_epoch is None:  # initialization
+                    last_epoch = time
+                else:
+                    if time - last_epoch < interval:
+                        continue
+                    else:
+                        last_epoch += interval
+
+            if verbose:
+                print(time, end="\r")
+
+ 
+            for k in sv:
+                if not k in svl:
+                    svl[np.argmax(svl=='   ')]=k
+                    
+
+
+            darr = np.atleast_2d(
+                    np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
+                )
+            
+            t = time==times
+            
+            
+            for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
+                # satellite indices "si" to extract from this time's measurements
+                si = [i for i, s in enumerate(sv) if s[0] in sk]
+                if len(si) == 0:  # no SV of this system "sk" at this time
+                    continue
+                
+                gsv = np.array(sv)[si]
+                isv = [i for i,s in enumerate(svl) if s in gsv]
+                #isv = np.zeros_like(gsv,dtype=int)
+                
+                #for ii,jj in enumerate(gsv):
+                 #   isv[ii] = np.nonzero(svl==jj)
+                
+                
+                for i,j in enumerate(hdr['fields'][sk]):
+                    o = obl==j
+                    #for ii,jj in enumerate(gsv):
+                    #    data[o,t,svl==jj]=darr[si[ii],i*3]
+                    data[o,t,isv]=darr[si,i*3]    
+                    if useindicators:
+                        data_lli[o,t,isv]=darr[si,i*3+1]  
+                        data_ssi[o,t,isv]=darr[si,i*3+2]  
+                    
+                
+                
+                
+    
+    if '   ' in svl:
+        svl=svl[:np.argmax(svl=='   ')]
+    
+    svl, isv = np.unique(svl,return_index=True)
+        
+    data=data[:,:,isv]
+    if useindicators:
+        data_lli=data_lli[:,:,isv]
+        data_ssi=data_ssi[:,:,isv]
+    
+    obs = xarray.Dataset(coords={"time":times,"sv": svl})
+    for i, k in enumerate(obl):
+        # FIXME: for limited time span reads, this drops unused data variables
+        # if np.isnan(data[i, ...]).all():
+        #     continue
+        if k is None:
+            continue
+        obs[k] = (("time", "sv"), data[i, :, :])
+        if useindicators:
+            if k[0] == 'L':
+                obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
+                obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+            elif k[0] == 'C':
+                obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+        
+        
+    obs = obs.dropna(dim="sv", how="all")
+    obs = obs.dropna(dim="time", how="all")  # when tlim specified
+    
+    data=obs
+    # %% patch SV names in case of "G 7" => "G07"
+    data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
+    # %% other attributes
+    data.attrs["version"] = hdr["version"]
+
+    # Get interval from header or derive it from the data
+    if "interval" in hdr.keys():
+        data.attrs["interval"] = hdr["interval"]
+    elif "time" in data.coords.keys():
+        # median is robust against gaps
+        try:
+            data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s"))
+        except TypeError:
+            pass
+    else:
+        data.attrs["interval"] = np.nan
+
+    data.attrs["rinextype"] = "obs"
+    data.attrs["fast_processing"] = 0  # bool is not allowed in NetCDF4
+    data.attrs["time_system"] = determine_time_system(hdr)
+    if isinstance(fn, Path):
+        data.attrs["filename"] = fn.name
+
+    if "position" in hdr.keys():
+        data.attrs["position"] = hdr["position"]
+        if ecef2geodetic is not None:
+            data.attrs["position_geodetic"] = hdr["position_geodetic"]
+
+    if time_offset:
+        data.attrs["time_offset"] = time_offset
+
+    if "RCV CLOCK OFFS APPL" in hdr.keys():
+        try:
+            data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"])
+        except ValueError:
+            pass
+
+    return data
+
 
 def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]:
     """

From 9c65502934877f3365712725276b157f0842f7b9 Mon Sep 17 00:00:00 2001
From: breid-phys <81174535+breid-phys@users.noreply.github.com>
Date: Thu, 26 May 2022 15:18:58 -0300
Subject: [PATCH 04/19] Delete obs3.py

---
 obs3.py | 719 --------------------------------------------------------
 1 file changed, 719 deletions(-)
 delete mode 100644 obs3.py

diff --git a/obs3.py b/obs3.py
deleted file mode 100644
index d1f1637..0000000
--- a/obs3.py
+++ /dev/null
@@ -1,719 +0,0 @@
-from __future__ import annotations
-from pathlib import Path
-import numpy as np
-import logging
-from datetime import datetime, timedelta
-import io
-import xarray
-import typing as T
-
-try:
-    from pymap3d import ecef2geodetic
-except ImportError:
-    ecef2geodetic = None
-#
-from .rio import opener, rinexinfo
-from .common import determine_time_system, check_time_interval, check_unique_times
-
-"""https://github.com/mvglasow/satstat/wiki/NMEA-IDs"""
-
-SBAS = 100  # offset for ID
-GLONASS = 37
-QZSS = 192
-BEIDOU = 0
-
-
-def rinexobs3(
-    fn: T.TextIO | Path,
-    use: set[str] = None,
-    tlim: tuple[datetime, datetime] = None,
-    useindicators: bool = False,
-    meas: list[str] = None,
-    verbose: bool = False,
-    *,
-    fast: bool = False,
-    interval: float | int | timedelta = None,
-) -> xarray.Dataset:
-    """
-    process RINEX 3 OBS data
-
-    fn: RINEX OBS 3 filename
-    use: 'G'  or ['G', 'R'] or similar
-
-    tlim: read between these time bounds
-    useindicators: SSI, LLI are output
-    meas:  'L1C'  or  ['L1C', 'C1C'] or similar
-
-    fast: Still double-reading file to get times
-          Uses # OF SATELLITES from header to try and size nparray, falls back
-          to Nsvsys if missing
-          if false, uses old _epoch method
-
-    interval: allows decimating file read by time e.g. every 5 seconds.
-                Useful to speed up reading of very large RINEX files
-    """
-
-    interval = check_time_interval(interval)
-
-    if isinstance(use, str):
-        use = {use}
-
-    if isinstance(meas, str):
-        meas = [meas]
-
-    if not meas or not meas[0].strip():
-        meas = None
-   
-    """
-    Nsvsys may need updating as GNSS systems grow.
-    Let us know if you needed to change them.
-
-    Beidou is 35 max
-    Galileo is 36 max
-    """
-    Nsvsys = 36
-        
-    # %% allocate
-    if fast:
-        times = obstime3(fn)
-    else:
-        data = xarray.Dataset({}, coords={"time": [], "sv": []})
-        
-    if tlim is not None and not isinstance(tlim[0], datetime):
-        raise TypeError("time bounds are specified as datetime.datetime")
-
-    last_epoch = None
-    # %% loop
-    with opener(fn) as f:
-        
-
-        if fast:
-            hdr = obsheader3(f,use)
-            
-            obl = []
-            for sk in hdr["fields"]:
-                obl=obl+hdr["fields"][sk]
-            obl = np.unique(np.array(obl))
-            obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
-            
-            
-            if meas is not None:
-                obl = obl[np.any([np.char.find(obl,j)==0 for i, j  in enumerate(meas)],0)]
-                
-            Nt = times.size
-            Npages = len(obl)
-            if '# OF SATELLITES' in hdr.keys():
-                Nsv = int(hdr['# OF SATELLITES'])
-            else:
-                Nsv = Nsvsys * len(hdr['fields'])
-            
-            svl = np.tile('   ',Nsv)
-            
-            data = np.empty((Npages, Nt, Nsv))
-            data.fill(np.nan)
-            
-            if useindicators:
-                data_lli = np.full_like(data,np.nan)
-                data_ssi = np.full_like(data,np.nan)
-
-        else:
-            hdr = obsheader3(f, use, meas)
-
-
-        # %% process OBS file
-        time_offset = []
-        for ln in f:
-            if not ln.startswith(">"):  # end of file
-                break
-
-            try:
-                time = _timeobs(ln)
-            except ValueError:  # garbage between header and RINEX data
-                logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
-                continue
-
-            try:
-                time_offset.append(float(ln[41:56]))
-            except ValueError:
-                pass
-            # %% get SV indices
-            sv = []
-            raw = ""
-            # Number of visible satellites this time %i3  pg. A13
-            for _ in range(int(ln[33:35])):
-                ln = f.readline()
-                if use is None or ln[0] in use:
-                    sv.append(ln[:3])
-                    raw += ln[3:]
-
-            if tlim is not None:
-                if time < tlim[0]:
-                    continue
-                elif time > tlim[1]:
-                    break
-
-            if interval is not None:
-                if last_epoch is None:  # initialization
-                    last_epoch = time
-                else:
-                    if time - last_epoch < interval:
-                        continue
-                    else:
-                        last_epoch += interval
-
-            if verbose:
-                print(time, end="\r")
-            
-            if fast:
-                for k in sv:
-                    # update list of satellites
-                    if not k in svl:
-                        svl[np.argmax(svl=='   ')]=k
-                
-                darr = np.atleast_2d(
-                        np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
-                    )
-                
-                t = time==times
-
-                for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
-                    # satellite indices "si" to extract from this time's measurements
-                    si = [i for i, s in enumerate(sv) if s[0] in sk]
-                    if len(si) == 0:  # no SV of this system "sk" at this time
-                        continue
-                    
-                    gsv = np.array(sv)[si]
-                    isv = [i for i,s in enumerate(svl) if s in gsv]
-                    
-                    for i,j in enumerate(hdr['fields'][sk]):
-                        o = obl==j
-                        if not np.any(o):
-                            continue
-                        data[o,t,isv]=darr[si,i*3]    
-                        if useindicators:
-                            data_lli[o,t,isv]=darr[si,i*3+1]  
-                            data_ssi[o,t,isv]=darr[si,i*3+2]  
-            else:
-                # this time epoch is complete, assemble the data.
-                data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
-
-    if fast:
-       if '   ' in svl:
-           # remove blank satellites (if tlim used)
-           svl=svl[:np.argmax(svl=='   ')]
-       
-       svl, isv = np.unique(svl,return_index=True)
-           
-       data=data[:,:,isv]
-       if useindicators:
-           data_lli=data_lli[:,:,isv]
-           data_ssi=data_ssi[:,:,isv]
-       
-       obs = xarray.Dataset(coords={"sv": svl,"time":times})
-       for i, k in enumerate(obl):
-           if k is None:
-               continue
-           elif np.all(np.isnan(data[i, :, :])): # drop all nan datasets like tests expect
-               continue
-           obs[k] = (("time", "sv"), data[i, :, :])
-           if useindicators:
-               if k[0] == 'L':
-                   obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
-                   obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
-               #elif k[0] == 'C': # only for code?
-               else:
-                   obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
-                   
-       obs = obs.dropna(dim="sv", how="all")
-       obs = obs.dropna(dim="time", how="all")  # when tlim specified
-            
-       data=obs
-        
-        
-    # %% patch SV names in case of "G 7" => "G07"
-    data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
-    # %% other attributes
-    data.attrs["version"] = hdr["version"]
-
-    # Get interval from header or derive it from the data
-    if "interval" in hdr.keys():
-        data.attrs["interval"] = hdr["interval"]
-    elif "time" in data.coords.keys():
-        # median is robust against gaps
-        try:
-            data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s"))
-        except TypeError:
-            pass
-    else:
-        data.attrs["interval"] = np.nan
-
-    data.attrs["rinextype"] = "obs"
-    data.attrs["fast_processing"] = int(fast)  # bool is not allowed in NetCDF4
-    data.attrs["time_system"] = determine_time_system(hdr)
-    if isinstance(fn, Path):
-        data.attrs["filename"] = fn.name
-
-    if "position" in hdr.keys():
-        data.attrs["position"] = hdr["position"]
-        if ecef2geodetic is not None:
-            data.attrs["position_geodetic"] = hdr["position_geodetic"]
-
-    if time_offset:
-        data.attrs["time_offset"] = time_offset
-
-    if "RCV CLOCK OFFS APPL" in hdr.keys():
-        try:
-            data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"])
-        except ValueError:
-            pass
-
-    return data
-
-
-def _timeobs(ln: str) -> datetime:
-    """
-    convert time from RINEX 3 OBS text to datetime
-    """
-
-    if not ln.startswith("> "):  # pg. A13
-        raise ValueError('RINEX 3 line beginning "> " is not present')
-
-    return datetime(
-        int(ln[2:6]),
-        int(ln[7:9]),
-        int(ln[10:12]),
-        hour=int(ln[13:15]),
-        minute=int(ln[16:18]),
-        second=int(ln[19:21]),
-        microsecond=int(float(ln[19:29]) % 1 * 1000000),
-    )
-
-
-def obstime3(fn: T.TextIO | Path, verbose: bool = False) -> np.ndarray:
-    """
-    return all times in RINEX file
-    """
-
-    times = []
-
-    with opener(fn) as f:
-        for ln in f:
-            if ln.startswith("> "):
-                try:
-                    times.append(_timeobs(ln))
-                except (ValueError, IndexError):
-                    logging.debug(f"was not a time:\n{ln}")
-                    continue
-
-    times = np.asarray(times)
-
-    check_unique_times(times)
-
-    return times
-
-
-def _epoch(
-    data: xarray.Dataset,
-    raw: str,
-    hdr: dict[str, T.Any],
-    time: datetime,
-    sv: list[str],
-    useindicators: bool,
-    verbose: bool,
-) -> xarray.Dataset:
-    """
-    block processing of each epoch (time step)
-    """
-    darr = np.atleast_2d(
-        np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
-    )
-    #data = xarray.Dataset({}, coords={"time": [], "sv": []})
-    # %% assign data for each time step
-    for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
-        # satellite indices "si" to extract from this time's measurements
-        si = [i for i, s in enumerate(sv) if s[0] in sk]
-        if len(si) == 0:  # no SV of this system "sk" at this time
-            continue
-
-        # measurement indices "di" to extract at this time step
-        di = hdr["fields_ind"][sk]
-        garr = darr[si, :]
-        garr = garr[:, di]
-
-        gsv = np.array(sv)[si]
-
-        dsf: dict[str, tuple] = {}
-        for i, k in enumerate(hdr["fields"][sk]):
-            dsf[k] = (("time", "sv"), np.atleast_2d(garr[:, i * 3]))
-
-            if useindicators:
-                dsf = _indicators(dsf, k, garr[:, i * 3 + 1 : i * 3 + 3])
-
-        if verbose:
-            print(time, "\r", end="")
-
-        epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv})
-        if len(data) == 0:
-            data = epoch_data
-            #data = xarray.merge((data, epoch_data))
-        elif len(hdr["fields"]) == 1:  # one satellite system selected, faster to process
-            data = xarray.concat((data, epoch_data), dim="time")
-        else:  # general case, slower for different satellite systems all together
-            data = xarray.merge((data, epoch_data))
-
-    return data
-
-def rinexsystem3(
-    fn: T.TextIO | Path,
-    use: set[str] = None,
-    tlim: tuple[datetime, datetime] = None,
-    useindicators: bool = False,
-    meas: list[str] = None,
-    verbose: bool = False,
-    *,
-    fast: bool = False,
-    interval: float | int | timedelta = None,
-) -> xarray.Dataset:
-    """
-    process RINEX 3 OBS data
-
-    fn: RINEX OBS 3 filename
-    use: 'G'  or ['G', 'R'] or similar
-
-    tlim: read between these time bounds
-    useindicators: SSI, LLI are output
-    meas:  'L1C'  or  ['L1C', 'C1C'] or similar
-
-    fast:
-          TODO: FUTURE, not yet enabled for OBS3
-          speculative preallocation based on minimum SV assumption and file size.
-          Avoids double-reading file and more complicated linked lists.
-          Believed that Numpy array should be faster than lists anyway.
-          Reduce Nsvmin if error (let us know)
-
-    interval: allows decimating file read by time e.g. every 5 seconds.
-                Useful to speed up reading of very large RINEX files
-    """
-
-    interval = check_time_interval(interval)
-
-    if isinstance(use, str):
-        use = {use}
-
-    if isinstance(meas, str):
-        meas = [meas]
-
-    if not meas or not meas[0].strip():
-        meas = None
-    # %% allocate
-    times = obstime3(fn)
-    #data = xarray.Dataset({}, coords={"time": [], "sv": []})
-    if tlim is not None and not isinstance(tlim[0], datetime):
-        raise TypeError("time bounds are specified as datetime.datetime")
-
-    last_epoch = None
-    # %% loop
-    with opener(fn) as f:
-        
-        if fast:
-            hdr = obsheader3(f)
-        else:
-            hdr = obsheader3(f, use, meas)
-            
-        obl = []
-        for sk in hdr["fields"]:
-            obl=obl+hdr["fields"][sk]
-        obl = np.unique(np.array(obl))
-        obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
-        
-        
-        if meas is not None:
-            obl = obl[np.isin(obl,meas)]
-            
-        Nt = times.size
-        Npages = len(obl)
-        Nsv = int(hdr['# OF SATELLITES'])
-        
-        svl = np.tile('   ',Nsv)
-        
-        data = np.empty((Npages, Nt, Nsv))
-        data.fill(np.nan)
-        
-        if useindicators:
-            data_lli = np.full_like(data,np.nan)
-            data_ssi = np.full_like(data,np.nan)
-
-        # %% process OBS file
-        time_offset = []
-        for ln in f:
-            if not ln.startswith(">"):  # end of file
-                break
-
-            try:
-                time = _timeobs(ln)
-            except ValueError:  # garbage between header and RINEX data
-                logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
-                continue
-
-            try:
-                time_offset.append(float(ln[41:56]))
-            except ValueError:
-                pass
-            # %% get SV indices
-            sv = []
-            raw = ""
-            # Number of visible satellites this time %i3  pg. A13
-            for _ in range(int(ln[33:35])):
-                ln = f.readline()
-                if use is None or ln[0] in use:
-                    sv.append(ln[:3])
-                    raw += ln[3:]
-
-            if tlim is not None:
-                if time < tlim[0]:
-                    continue
-                elif time > tlim[1]:
-                    break
-
-            if interval is not None:
-                if last_epoch is None:  # initialization
-                    last_epoch = time
-                else:
-                    if time - last_epoch < interval:
-                        continue
-                    else:
-                        last_epoch += interval
-
-            if verbose:
-                print(time, end="\r")
-
- 
-            for k in sv:
-                if not k in svl:
-                    svl[np.argmax(svl=='   ')]=k
-                    
-
-
-            darr = np.atleast_2d(
-                    np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
-                )
-            
-            t = time==times
-            
-            
-            for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
-                # satellite indices "si" to extract from this time's measurements
-                si = [i for i, s in enumerate(sv) if s[0] in sk]
-                if len(si) == 0:  # no SV of this system "sk" at this time
-                    continue
-                
-                gsv = np.array(sv)[si]
-                isv = [i for i,s in enumerate(svl) if s in gsv]
-                #isv = np.zeros_like(gsv,dtype=int)
-                
-                #for ii,jj in enumerate(gsv):
-                 #   isv[ii] = np.nonzero(svl==jj)
-                
-                
-                for i,j in enumerate(hdr['fields'][sk]):
-                    o = obl==j
-                    #for ii,jj in enumerate(gsv):
-                    #    data[o,t,svl==jj]=darr[si[ii],i*3]
-                    data[o,t,isv]=darr[si,i*3]    
-                    if useindicators:
-                        data_lli[o,t,isv]=darr[si,i*3+1]  
-                        data_ssi[o,t,isv]=darr[si,i*3+2]  
-                    
-                
-                
-                
-    
-    if '   ' in svl:
-        svl=svl[:np.argmax(svl=='   ')]
-    
-    svl, isv = np.unique(svl,return_index=True)
-        
-    data=data[:,:,isv]
-    if useindicators:
-        data_lli=data_lli[:,:,isv]
-        data_ssi=data_ssi[:,:,isv]
-    
-    obs = xarray.Dataset(coords={"time":times,"sv": svl})
-    for i, k in enumerate(obl):
-        # FIXME: for limited time span reads, this drops unused data variables
-        # if np.isnan(data[i, ...]).all():
-        #     continue
-        if k is None:
-            continue
-        obs[k] = (("time", "sv"), data[i, :, :])
-        if useindicators:
-            if k[0] == 'L':
-                obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
-                obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
-            elif k[0] == 'C':
-                obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
-        
-        
-    obs = obs.dropna(dim="sv", how="all")
-    obs = obs.dropna(dim="time", how="all")  # when tlim specified
-    
-    data=obs
-    # %% patch SV names in case of "G 7" => "G07"
-    data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
-    # %% other attributes
-    data.attrs["version"] = hdr["version"]
-
-    # Get interval from header or derive it from the data
-    if "interval" in hdr.keys():
-        data.attrs["interval"] = hdr["interval"]
-    elif "time" in data.coords.keys():
-        # median is robust against gaps
-        try:
-            data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s"))
-        except TypeError:
-            pass
-    else:
-        data.attrs["interval"] = np.nan
-
-    data.attrs["rinextype"] = "obs"
-    data.attrs["fast_processing"] = 0  # bool is not allowed in NetCDF4
-    data.attrs["time_system"] = determine_time_system(hdr)
-    if isinstance(fn, Path):
-        data.attrs["filename"] = fn.name
-
-    if "position" in hdr.keys():
-        data.attrs["position"] = hdr["position"]
-        if ecef2geodetic is not None:
-            data.attrs["position_geodetic"] = hdr["position_geodetic"]
-
-    if time_offset:
-        data.attrs["time_offset"] = time_offset
-
-    if "RCV CLOCK OFFS APPL" in hdr.keys():
-        try:
-            data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"])
-        except ValueError:
-            pass
-
-    return data
-
-
-def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]:
-    """
-    handle LLI (loss of lock) and SSI (signal strength)
-    """
-    if k.startswith(("L1", "L2")):
-        d[k + "lli"] = (("time", "sv"), np.atleast_2d(arr[:, 0]))
-
-    d[k + "ssi"] = (("time", "sv"), np.atleast_2d(arr[:, 1]))
-
-    return d
-
-
-def obsheader3(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dict[str, T.Any]:
-    """
-    get RINEX 3 OBS types, for each system type
-    optionally, select system type and/or measurement type to greatly
-    speed reading and save memory (RAM, disk)
-    """
-    if isinstance(f, (str, Path)):
-        with opener(f, header=True) as h:
-            return obsheader3(h, use, meas)
-
-    fields = {}
-    Fmax = 0
-
-    # %% first line
-    hdr = rinexinfo(f)
-
-    for ln in f:
-        if "END OF HEADER" in ln:
-            break
-
-        hd = ln[60:80]
-        c = ln[:60]
-        if "SYS / # / OBS TYPES" in hd:
-            k = c[0]
-            fields[k] = c[6:60].split()
-            N = int(c[3:6])
-            # %% maximum number of fields in a file, to allow fast Numpy parse.
-            Fmax = max(N, Fmax)
-
-            n = N - 13
-            while n > 0:  # Rinex 3.03, pg. A6, A7
-                ln = f.readline()
-                assert "SYS / # / OBS TYPES" in ln[60:]
-                fields[k] += ln[6:60].split()
-                n -= 13
-
-            assert len(fields[k]) == N
-
-            continue
-
-        if hd.strip() not in hdr:  # Header label
-            hdr[hd.strip()] = c  # don't strip for fixed-width parsers
-            # string with info
-        else:  # concatenate to the existing string
-            hdr[hd.strip()] += " " + c
-
-    # %% list with x,y,z cartesian (OPTIONAL)
-    # Rinex 3.03, pg. A6, Table A2
-    try:
-        # some RINEX files have bad headers with mulitple APPROX POSITION XYZ.
-        # we choose to use the first such header.
-        hdr["position"] = [float(j) for j in hdr["APPROX POSITION XYZ"].split()][:3]
-        if ecef2geodetic is not None and len(hdr["position"]) == 3:
-            hdr["position_geodetic"] = ecef2geodetic(*hdr["position"])
-    except (KeyError, ValueError):
-        pass
-    # %% time
-    try:
-        t0s = hdr["TIME OF FIRST OBS"]
-        # NOTE: must do second=int(float()) due to non-conforming files
-        hdr["t0"] = datetime(
-            year=int(t0s[:6]),
-            month=int(t0s[6:12]),
-            day=int(t0s[12:18]),
-            hour=int(t0s[18:24]),
-            minute=int(t0s[24:30]),
-            second=int(float(t0s[30:36])),
-            microsecond=int(float(t0s[30:43]) % 1 * 1000000),
-        )
-    except (KeyError, ValueError):
-        pass
-
-    try:
-        hdr["interval"] = float(hdr["INTERVAL"][:10])
-    except (KeyError, ValueError):
-        pass
-    # %% select specific satellite systems only (optional)
-    if use:
-        if not set(fields.keys()).intersection(use):
-            raise KeyError(f"system type {use} not found in RINEX file")
-
-        fields = {k: fields[k] for k in use if k in fields}
-
-    # perhaps this could be done more efficiently, but it's probably low impact on overall program.
-    # simple set and frozenset operations do NOT preserve order, which would completely mess up reading!
-    sysind: dict[str, T.Any] = {}
-    if isinstance(meas, (tuple, list, np.ndarray)):
-        for sk in fields:  # iterate over each system
-            # ind = np.isin(fields[sk], meas)  # boolean vector
-            ind = np.zeros(len(fields[sk]), dtype=bool)
-            for m in meas:
-                for i, field in enumerate(fields[sk]):
-                    if field.startswith(m):
-                        ind[i] = True
-
-            fields[sk] = np.array(fields[sk])[ind].tolist()
-            sysind[sk] = np.empty(Fmax * 3, dtype=bool)  # *3 due to LLI, SSI
-            for j, i in enumerate(ind):
-                sysind[sk][j * 3 : j * 3 + 3] = i
-    else:
-        sysind = {k: np.s_[:] for k in fields}
-
-    hdr["fields"] = fields
-    hdr["fields_ind"] = sysind
-    hdr["Fmax"] = Fmax
-
-    return hdr

From f521b9ef2d9e87aa912ab016232d034735a8cebd Mon Sep 17 00:00:00 2001
From: breid-phys <81174535+breid-phys@users.noreply.github.com>
Date: Thu, 26 May 2022 15:21:41 -0300
Subject: [PATCH 05/19] Updated rio.py .Z

.Z compressed files can also have Hatanaka compression
---
 src/georinex/rio.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/georinex/rio.py b/src/georinex/rio.py
index cb9b2cf..4787428 100644
--- a/src/georinex/rio.py
+++ b/src/georinex/rio.py
@@ -72,6 +72,13 @@ def opener(fn: T.TextIO | Path, header: bool = False) -> T.Iterator[T.TextIO]:
                 raise ImportError("pip install unlzw3")
             with fn.open("rb") as zu:
                 with io.StringIO(unlzw(zu.read()).decode("ascii")) as f:
+                    _, is_crinex = rinex_version(first_nonblank_line(f))
+                    f.seek(0)
+
+                    if is_crinex and not header:
+                        # Conversion to string is necessary because of a quirk where gzip.open()
+                        # even with 'rt' doesn't decompress until read.
+                        f = io.StringIO(crx2rnx(f.read()))
                     yield f
         else:  # assume not compressed (or Hatanaka)
             with fn.open("r", encoding="ascii", errors="ignore") as f:

From 254d8e1184da9c846911645aee257cc0be42e5ec Mon Sep 17 00:00:00 2001
From: Ben <breid.phys@gmail.com>
Date: Thu, 7 Jul 2022 11:33:07 -0300
Subject: [PATCH 06/19] fixed critical indexing mistake in fast obs3

---
 src/georinex/obs3.py             | 389 +++++++++++++++++++++++++++++--
 src/georinex/rio.py              |   7 +
 src/georinex/tests/test_rinex.py |   2 +-
 3 files changed, 380 insertions(+), 18 deletions(-)

diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index 130940b..c16bea4 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -44,12 +44,10 @@ def rinexobs3(
     useindicators: SSI, LLI are output
     meas:  'L1C'  or  ['L1C', 'C1C'] or similar
 
-    fast:
-          TODO: FUTURE, not yet enabled for OBS3
-          speculative preallocation based on minimum SV assumption and file size.
-          Avoids double-reading file and more complicated linked lists.
-          Believed that Numpy array should be faster than lists anyway.
-          Reduce Nsvmin if error (let us know)
+    fast: Still double-reading file to get times
+          Uses # OF SATELLITES from header to try and size nparray, falls back
+          to Nsvsys if missing
+          if false, uses old _epoch method
 
     interval: allows decimating file read by time e.g. every 5 seconds.
                 Useful to speed up reading of very large RINEX files
@@ -65,16 +63,62 @@ def rinexobs3(
 
     if not meas or not meas[0].strip():
         meas = None
+   
+    """
+    Nsvsys may need updating as GNSS systems grow.
+    Let us know if you needed to change them.
+
+    Beidou is 35 max
+    Galileo is 36 max
+    """
+    Nsvsys = 36
+        
     # %% allocate
-    # times = obstime3(fn)
-    data = xarray.Dataset({}, coords={"time": [], "sv": []})
+    if fast:
+        times = obstime3(fn)
+    else:
+        data = xarray.Dataset({}, coords={"time": [], "sv": []})
+        
     if tlim is not None and not isinstance(tlim[0], datetime):
         raise TypeError("time bounds are specified as datetime.datetime")
 
     last_epoch = None
     # %% loop
     with opener(fn) as f:
-        hdr = obsheader3(f, use, meas)
+        
+
+        if fast:
+            hdr = obsheader3(f,use)
+            
+            obl = []
+            for sk in hdr["fields"]:
+                obl=obl+hdr["fields"][sk]
+            obl = np.unique(np.array(obl))
+            obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
+            
+            
+            if meas is not None:
+                obl = obl[np.any([np.char.find(obl,j)==0 for i, j  in enumerate(meas)],0)]
+                
+            Nt = times.size
+            Npages = len(obl)
+            if '# OF SATELLITES' in hdr.keys():
+                Nsv = int(hdr['# OF SATELLITES'])
+            else:
+                Nsv = Nsvsys * len(hdr['fields'])
+            
+            svl = np.tile('   ',Nsv)
+            
+            data = np.empty((Npages, Nt, Nsv))
+            data.fill(np.nan)
+            
+            if useindicators:
+                data_lli = np.full_like(data,np.nan)
+                data_ssi = np.full_like(data,np.nan)
+
+        else:
+            hdr = obsheader3(f, use, meas)
+
 
         # %% process OBS file
         time_offset = []
@@ -98,8 +142,9 @@ def rinexobs3(
             # Number of visible satellites this time %i3  pg. A13
             for _ in range(int(ln[33:35])):
                 ln = f.readline()
-                sv.append(ln[:3])
-                raw += ln[3:]
+                if use is None or ln[0] in use:
+                    sv.append(ln[:3])
+                    raw += ln[3:]
 
             if tlim is not None:
                 if time < tlim[0]:
@@ -118,10 +163,84 @@ def rinexobs3(
 
             if verbose:
                 print(time, end="\r")
-
-            # this time epoch is complete, assemble the data.
-            data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
-
+            
+            if fast:
+                for k in sv:
+                    # update list of satellites
+                    if not k in svl:
+                        svl[np.argmax(svl=='   ')]=k
+                        
+                
+                darr = np.atleast_2d(
+                        np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
+                    )
+                
+                t = time==times
+                
+                #print(darr.shape)
+                #print(sv)
+                for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
+                    # satellite indices "si" to extract from this time's measurements
+                    
+                    
+                    si = [i for i, s in enumerate(sv) if s[0] in sk]
+                    if len(si) == 0:  # no SV of this system "sk" at this time
+                        continue
+                    
+                    gsv = np.array(sv)[si]
+                    #print(gsv)
+                    #isv = [i for i,s in enumerate(svl) if s in gsv]
+                    isv = [np.where(s==gsv) for i,s in enumerate(svl)]
+                    isv,jsv = np.nonzero(np.atleast_2d(svl).T == np.atleast_2d(gsv))
+                    #jsv = [i for i,s in enumerate(sv) if s in gsv]
+                    #print(isv)
+                    #print(jsv)
+                    #print(svl[isv])
+                    for i,j in enumerate(hdr['fields'][sk]):
+                        o = obl==j
+                        if not np.any(o):
+                            continue
+                        data[o,t,isv]=darr[jsv,i*3]    
+                        if useindicators:
+                            data_lli[o,t,isv]=darr[jsv,i*3+1]  
+                            data_ssi[o,t,isv]=darr[jsv,i*3+2]  
+            else:
+                # this time epoch is complete, assemble the data.
+                data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
+
+    if fast:
+       if '   ' in svl:
+           # remove blank satellites (if tlim used)
+           svl=svl[:np.argmax(svl=='   ')]
+       
+       svl, isv = np.unique(svl,return_index=True)
+       #isv = range(svl.size)
+       data=data[:,:,isv]
+       if useindicators:
+           data_lli=data_lli[:,:,isv]
+           data_ssi=data_ssi[:,:,isv]
+       
+       obs = xarray.Dataset(coords={"sv": svl,"time":times})
+       for i, k in enumerate(obl):
+           if k is None:
+               continue
+           elif np.all(np.isnan(data[i, :, :])): # drop all nan datasets like tests expect
+               continue
+           obs[k] = (("time", "sv"), data[i, :, :])
+           if useindicators:
+               if k[0] == 'L':
+                   obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
+                   obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+               #elif k[0] == 'C': # only for code?
+               else:
+                   obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+                   
+       obs = obs.dropna(dim="sv", how="all")
+       obs = obs.dropna(dim="time", how="all")  # when tlim specified
+            
+       data=obs
+        
+        
     # %% patch SV names in case of "G 7" => "G07"
     data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
     # %% other attributes
@@ -140,7 +259,7 @@ def rinexobs3(
         data.attrs["interval"] = np.nan
 
     data.attrs["rinextype"] = "obs"
-    data.attrs["fast_processing"] = 0  # bool is not allowed in NetCDF4
+    data.attrs["fast_processing"] = int(fast)  # bool is not allowed in NetCDF4
     data.attrs["time_system"] = determine_time_system(hdr)
     if isinstance(fn, Path):
         data.attrs["filename"] = fn.name
@@ -152,7 +271,7 @@ def rinexobs3(
 
     if time_offset:
         data.attrs["time_offset"] = time_offset
-
+        
     if "RCV CLOCK OFFS APPL" in hdr.keys():
         try:
             data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"])
@@ -219,6 +338,7 @@ def _epoch(
     darr = np.atleast_2d(
         np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
     )
+    #data = xarray.Dataset({}, coords={"time": [], "sv": []})
     # %% assign data for each time step
     for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
         # satellite indices "si" to extract from this time's measurements
@@ -246,6 +366,7 @@ def _epoch(
         epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv})
         if len(data) == 0:
             data = epoch_data
+            #data = xarray.merge((data, epoch_data))
         elif len(hdr["fields"]) == 1:  # one satellite system selected, faster to process
             data = xarray.concat((data, epoch_data), dim="time")
         else:  # general case, slower for different satellite systems all together
@@ -253,6 +374,240 @@ def _epoch(
 
     return data
 
+def rinexsystem3(
+    fn: T.TextIO | Path,
+    use: set[str] = None,
+    tlim: tuple[datetime, datetime] = None,
+    useindicators: bool = False,
+    meas: list[str] = None,
+    verbose: bool = False,
+    *,
+    fast: bool = False,
+    interval: float | int | timedelta = None,
+) -> xarray.Dataset:
+    """
+    process RINEX 3 OBS data
+
+    fn: RINEX OBS 3 filename
+    use: 'G'  or ['G', 'R'] or similar
+
+    tlim: read between these time bounds
+    useindicators: SSI, LLI are output
+    meas:  'L1C'  or  ['L1C', 'C1C'] or similar
+
+    fast:
+          TODO: FUTURE, not yet enabled for OBS3
+          speculative preallocation based on minimum SV assumption and file size.
+          Avoids double-reading file and more complicated linked lists.
+          Believed that Numpy array should be faster than lists anyway.
+          Reduce Nsvmin if error (let us know)
+
+    interval: allows decimating file read by time e.g. every 5 seconds.
+                Useful to speed up reading of very large RINEX files
+    """
+
+    interval = check_time_interval(interval)
+
+    if isinstance(use, str):
+        use = {use}
+
+    if isinstance(meas, str):
+        meas = [meas]
+
+    if not meas or not meas[0].strip():
+        meas = None
+    # %% allocate
+    times = obstime3(fn)
+    #data = xarray.Dataset({}, coords={"time": [], "sv": []})
+    if tlim is not None and not isinstance(tlim[0], datetime):
+        raise TypeError("time bounds are specified as datetime.datetime")
+
+    last_epoch = None
+    # %% loop
+    with opener(fn) as f:
+        
+        if fast:
+            hdr = obsheader3(f)
+        else:
+            hdr = obsheader3(f, use, meas)
+            
+        obl = []
+        for sk in hdr["fields"]:
+            obl=obl+hdr["fields"][sk]
+        obl = np.unique(np.array(obl))
+        obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
+        
+        
+        if meas is not None:
+            obl = obl[np.isin(obl,meas)]
+            
+        Nt = times.size
+        Npages = len(obl)
+        Nsv = int(hdr['# OF SATELLITES'])
+        
+        svl = np.tile('   ',Nsv)
+        
+        data = np.empty((Npages, Nt, Nsv))
+        data.fill(np.nan)
+        
+        if useindicators:
+            data_lli = np.full_like(data,np.nan)
+            data_ssi = np.full_like(data,np.nan)
+
+        # %% process OBS file
+        time_offset = []
+        for ln in f:
+            if not ln.startswith(">"):  # end of file
+                break
+
+            try:
+                time = _timeobs(ln)
+            except ValueError:  # garbage between header and RINEX data
+                logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
+                continue
+
+            try:
+                time_offset.append(float(ln[41:56]))
+            except ValueError:
+                pass
+            # %% get SV indices
+            sv = []
+            raw = ""
+            # Number of visible satellites this time %i3  pg. A13
+            for _ in range(int(ln[33:35])):
+                ln = f.readline()
+                if use is None or ln[0] in use:
+                    sv.append(ln[:3])
+                    raw += ln[3:]
+
+            if tlim is not None:
+                if time < tlim[0]:
+                    continue
+                elif time > tlim[1]:
+                    break
+
+            if interval is not None:
+                if last_epoch is None:  # initialization
+                    last_epoch = time
+                else:
+                    if time - last_epoch < interval:
+                        continue
+                    else:
+                        last_epoch += interval
+
+            if verbose:
+                print(time, end="\r")
+
+ 
+            for k in sv:
+                if not k in svl:
+                    svl[np.argmax(svl=='   ')]=k
+                    
+
+
+            darr = np.atleast_2d(
+                    np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
+                )
+            
+            t = time==times
+            
+            
+            for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
+                # satellite indices "si" to extract from this time's measurements
+                si = [i for i, s in enumerate(sv) if s[0] in sk]
+                if len(si) == 0:  # no SV of this system "sk" at this time
+                    continue
+                
+                gsv = np.array(sv)[si]
+                isv = [i for i,s in enumerate(svl) if s in gsv]
+                #isv = np.zeros_like(gsv,dtype=int)
+                
+                #for ii,jj in enumerate(gsv):
+                 #   isv[ii] = np.nonzero(svl==jj)
+                
+                
+                for i,j in enumerate(hdr['fields'][sk]):
+                    o = obl==j
+                    #for ii,jj in enumerate(gsv):
+                    #    data[o,t,svl==jj]=darr[si[ii],i*3]
+                    data[o,t,isv]=darr[si,i*3]    
+                    if useindicators:
+                        data_lli[o,t,isv]=darr[si,i*3+1]  
+                        data_ssi[o,t,isv]=darr[si,i*3+2]  
+                    
+                
+                
+                
+    
+    if '   ' in svl:
+        svl=svl[:np.argmax(svl=='   ')]
+    
+    svl, isv = np.unique(svl,return_index=True)
+        
+    data=data[:,:,isv]
+    if useindicators:
+        data_lli=data_lli[:,:,isv]
+        data_ssi=data_ssi[:,:,isv]
+    
+    obs = xarray.Dataset(coords={"time":times,"sv": svl})
+    for i, k in enumerate(obl):
+        # FIXME: for limited time span reads, this drops unused data variables
+        # if np.isnan(data[i, ...]).all():
+        #     continue
+        if k is None:
+            continue
+        obs[k] = (("time", "sv"), data[i, :, :])
+        if useindicators:
+            if k[0] == 'L':
+                obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
+                obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+            elif k[0] == 'C':
+                obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+        
+        
+    obs = obs.dropna(dim="sv", how="all")
+    obs = obs.dropna(dim="time", how="all")  # when tlim specified
+    
+    data=obs
+    # %% patch SV names in case of "G 7" => "G07"
+    data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
+    # %% other attributes
+    data.attrs["version"] = hdr["version"]
+
+    # Get interval from header or derive it from the data
+    if "interval" in hdr.keys():
+        data.attrs["interval"] = hdr["interval"]
+    elif "time" in data.coords.keys():
+        # median is robust against gaps
+        try:
+            data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s"))
+        except TypeError:
+            pass
+    else:
+        data.attrs["interval"] = np.nan
+
+    data.attrs["rinextype"] = "obs"
+    data.attrs["fast_processing"] = 0  # bool is not allowed in NetCDF4
+    data.attrs["time_system"] = determine_time_system(hdr)
+    if isinstance(fn, Path):
+        data.attrs["filename"] = fn.name
+
+    if "position" in hdr.keys():
+        data.attrs["position"] = hdr["position"]
+        if ecef2geodetic is not None:
+            data.attrs["position_geodetic"] = hdr["position_geodetic"]
+
+    if time_offset:
+        data.attrs["time_offset"] = time_offset
+
+    if "RCV CLOCK OFFS APPL" in hdr.keys():
+        try:
+            data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"])
+        except ValueError:
+            pass
+
+    return data
+
 
 def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]:
     """
diff --git a/src/georinex/rio.py b/src/georinex/rio.py
index cb9b2cf..4787428 100644
--- a/src/georinex/rio.py
+++ b/src/georinex/rio.py
@@ -72,6 +72,13 @@ def opener(fn: T.TextIO | Path, header: bool = False) -> T.Iterator[T.TextIO]:
                 raise ImportError("pip install unlzw3")
             with fn.open("rb") as zu:
                 with io.StringIO(unlzw(zu.read()).decode("ascii")) as f:
+                    _, is_crinex = rinex_version(first_nonblank_line(f))
+                    f.seek(0)
+
+                    if is_crinex and not header:
+                        # Conversion to string is necessary because of a quirk where gzip.open()
+                        # even with 'rt' doesn't decompress until read.
+                        f = io.StringIO(crx2rnx(f.read()))
                     yield f
         else:  # assume not compressed (or Hatanaka)
             with fn.open("r", encoding="ascii", errors="ignore") as f:
diff --git a/src/georinex/tests/test_rinex.py b/src/georinex/tests/test_rinex.py
index 74fd7e5..1d8c273 100644
--- a/src/georinex/tests/test_rinex.py
+++ b/src/georinex/tests/test_rinex.py
@@ -61,7 +61,7 @@ def test_minimal(tmp_path, filename):
         if int(dat.version) == 2:
             assert dat.fast_processing
         elif int(dat.version) == 3:
-            assert not dat.fast_processing  # FIXME: update when OBS3 fast processing is added.
+            assert dat.fast_processing 
 
 
 def test_dont_care_file_extension():

From bdb26bbaccd8775f1ffa8ce6187373405b6dc758 Mon Sep 17 00:00:00 2001
From: Ben <breid.phys@gmail.com>
Date: Fri, 8 Jul 2022 09:58:12 -0300
Subject: [PATCH 07/19] fixed index when multi-constellation

---
 src/georinex/obs3.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index 8a72dfd..e48a30b 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -186,9 +186,11 @@ def rinexobs3(
                     
                     gsv = np.array(sv)[si]
 
-                    isv = [np.where(s==gsv) for i,s in enumerate(svl)]
-                    isv,jsv = np.nonzero(np.atleast_2d(svl).T == np.atleast_2d(gsv))
-
+                    #isv = [np.where(s==gsv) for i,s in enumerate(svl)]
+                    isv,jsv = np.nonzero(np.logical_and(
+                        np.atleast_2d(svl).T == np.atleast_2d(sv),
+                                      np.isin(sv,gsv))                  )
+                    
 
                     for i,j in enumerate(hdr['fields'][sk]):
                         o = obl==j

From 82f8971d9e3dc092f7d726a8d662ece1ff4ce8fb Mon Sep 17 00:00:00 2001
From: Ben <breid.phys@gmail.com>
Date: Fri, 16 Sep 2022 18:05:23 -0300
Subject: [PATCH 08/19] minor tweaks for high speed processing

---
 src/georinex/base.py | 6 +++---
 src/georinex/obs2.py | 4 ++--
 src/georinex/obs3.py | 9 ++++-----
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/georinex/base.py b/src/georinex/base.py
index becb78b..5d419d6 100644
--- a/src/georinex/base.py
+++ b/src/georinex/base.py
@@ -154,7 +154,7 @@ def rinexnav(
 
         if fn.suffix == ".nc":
             try:
-                return xarray.open_dataset(fn, group=group)
+                return xarray.load_dataset(fn, group=group)
             except OSError as e:
                 raise LookupError(f"Group {group} not found in {fn}    {e}")
 
@@ -205,7 +205,7 @@ def rinexobs(
         # %% NetCDF4
         if fn.suffix == ".nc":
             try:
-                return xarray.open_dataset(fn, group=group)
+                return xarray.load_dataset(fn, group=group)
             except OSError as e:
                 raise LookupError(f"Group {group} not found in {fn}   {e}")
 
@@ -259,7 +259,7 @@ def _groupexists(fn: Path, group: str, overwrite: bool) -> str:
 
     # be sure there isn't already NAV in it
     try:
-        xarray.open_dataset(fn, group=group)
+        xarray.load_dataset(fn, group=group)
         raise ValueError(f"{group} already in {fn}")
     except OSError:
         pass
diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py
index 4608bab..5dee6db 100644
--- a/src/georinex/obs2.py
+++ b/src/georinex/obs2.py
@@ -36,7 +36,7 @@ def rinexobs2(
     if not use:
         use = {"C", "E", "G", "J", "R", "S"}
 
-    obs = xarray.Dataset({}, coords={"time": [], "sv": []})
+    obs = xarray.Dataset({}, coords={"time": np.array([],dtype=np.datetime64), "sv": []})
     attrs: dict[str, T.Any] = {}
     for u in use:
         o = rinexsystem2(
@@ -388,7 +388,7 @@ def obsheader2(
     hdr["Nl_sv"] = ceil(hdr["Nobs"] / 5)
     # %% list with receiver location in x,y,z cartesian ECEF (OPTIONAL)
     try:
-        hdr["position"] = [float(j) for j in hdr["APPROX POSITION XYZ"].split()]
+        hdr["position"] = [float(j) for j in hdr["APPROX POSITION XYZ"].split()][:3]
         if ecef2geodetic is not None:
             hdr["position_geodetic"] = ecef2geodetic(*hdr["position"])
     except (KeyError, ValueError):
diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index e48a30b..77ac5fc 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -88,7 +88,7 @@ def rinexobs3(
         
 
         if fast:
-            hdr = obsheader3(f,use)
+            hdr = obsheader3(f,use)    
             
             obl = []
             for sk in hdr["fields"]:
@@ -96,7 +96,6 @@ def rinexobs3(
             obl = np.unique(np.array(obl))
             obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
             
-            
             if meas is not None:
                 obl = obl[np.any([np.char.find(obl,j)==0 for i, j  in enumerate(meas)],0)]
                 
@@ -186,7 +185,6 @@ def rinexobs3(
                     
                     gsv = np.array(sv)[si]
 
-                    #isv = [np.where(s==gsv) for i,s in enumerate(svl)]
                     isv,jsv = np.nonzero(np.logical_and(
                         np.atleast_2d(svl).T == np.atleast_2d(sv),
                                       np.isin(sv,gsv))                  )
@@ -234,7 +232,7 @@ def rinexobs3(
                    obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
                    
        obs = obs.dropna(dim="sv", how="all")
-       obs = obs.dropna(dim="time", how="all")  # when tlim specified
+       #obs = obs.dropna(dim="time", how="all")  # when tlim specified
             
        data=obs
         
@@ -266,7 +264,8 @@ def rinexobs3(
         data.attrs["position"] = hdr["position"]
         if ecef2geodetic is not None:
             data.attrs["position_geodetic"] = hdr["position_geodetic"]
-
+    if "rxmodel" in hdr.keys():
+        obs.attrs["rxmodel"] = hdr["rxmodel"]
     if time_offset:
         data.attrs["time_offset"] = time_offset
         

From 2ea8aca5616e25fcaa53b92ea298a7bcd5647ace Mon Sep 17 00:00:00 2001
From: Ben <breid.phys@gmail.com>
Date: Fri, 4 Nov 2022 10:12:56 -0300
Subject: [PATCH 09/19] force unique times

---
 src/georinex/obs3.py | 117 +++++++++++++++++++++----------------------
 1 file changed, 58 insertions(+), 59 deletions(-)

diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index 77ac5fc..b3717ec 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -63,7 +63,7 @@ def rinexobs3(
 
     if not meas or not meas[0].strip():
         meas = None
-   
+
     """
     Nsvsys may need updating as GNSS systems grow.
     Let us know if you needed to change them.
@@ -72,45 +72,45 @@ def rinexobs3(
     Galileo is 36 max
     """
     Nsvsys = 36
-        
+
     # %% allocate
     if fast:
-        times = obstime3(fn)
+        times = np.unique(obstime3(fn))
     else:
         data = xarray.Dataset({}, coords={"time": [], "sv": []})
-        
+
     if tlim is not None and not isinstance(tlim[0], datetime):
         raise TypeError("time bounds are specified as datetime.datetime")
 
     last_epoch = None
     # %% loop
     with opener(fn) as f:
-        
+
 
         if fast:
-            hdr = obsheader3(f,use)    
-            
+            hdr = obsheader3(f,use)
+
             obl = []
             for sk in hdr["fields"]:
                 obl=obl+hdr["fields"][sk]
             obl = np.unique(np.array(obl))
             obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
-            
+
             if meas is not None:
                 obl = obl[np.any([np.char.find(obl,j)==0 for i, j  in enumerate(meas)],0)]
-                
+
             Nt = times.size
             Npages = len(obl)
             if '# OF SATELLITES' in hdr.keys():
                 Nsv = int(hdr['# OF SATELLITES'])
             else:
                 Nsv = Nsvsys * len(hdr['fields'])
-            
+
             svl = np.tile('   ',Nsv)
-            
+
             data = np.empty((Npages, Nt, Nsv))
             data.fill(np.nan)
-            
+
             if useindicators:
                 data_lli = np.full_like(data,np.nan)
                 data_ssi = np.full_like(data,np.nan)
@@ -162,18 +162,18 @@ def rinexobs3(
 
             if verbose:
                 print(time, end="\r")
-            
+
             if fast:
                 for k in sv:
                     # update list of satellites
                     if not k in svl:
                         svl[np.argmax(svl=='   ')]=k
 
-                
+
                 darr = np.atleast_2d(
                         np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
                     )
-                
+
                 t = time==times
 
                 for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
@@ -182,23 +182,22 @@ def rinexobs3(
                     si = [i for i, s in enumerate(sv) if s[0] in sk]
                     if len(si) == 0:  # no SV of this system "sk" at this time
                         continue
-                    
+
                     gsv = np.array(sv)[si]
 
                     isv,jsv = np.nonzero(np.logical_and(
                         np.atleast_2d(svl).T == np.atleast_2d(sv),
                                       np.isin(sv,gsv))                  )
-                    
+
 
                     for i,j in enumerate(hdr['fields'][sk]):
                         o = obl==j
                         if not np.any(o):
                             continue
-
-                        data[o,t,isv]=darr[jsv,i*3]    
+                            data[o,t,isv]=darr[jsv,i*3]
                         if useindicators:
-                            data_lli[o,t,isv]=darr[jsv,i*3+1]  
-                            data_ssi[o,t,isv]=darr[jsv,i*3+2]  
+                            data_lli[o,t,isv]=darr[jsv,i*3+1]
+                            data_ssi[o,t,isv]=darr[jsv,i*3+2]
 
             else:
                 # this time epoch is complete, assemble the data.
@@ -208,14 +207,14 @@ def rinexobs3(
        if '   ' in svl:
            # remove blank satellites (if tlim used)
            svl=svl[:np.argmax(svl=='   ')]
-       
+
        svl, isv = np.unique(svl,return_index=True)
 
        data=data[:,:,isv]
        if useindicators:
            data_lli=data_lli[:,:,isv]
            data_ssi=data_ssi[:,:,isv]
-       
+
        obs = xarray.Dataset(coords={"sv": svl,"time":times})
        for i, k in enumerate(obl):
            if k is None:
@@ -230,13 +229,13 @@ def rinexobs3(
                #elif k[0] == 'C': # only for code?
                else:
                    obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
-                   
+
        obs = obs.dropna(dim="sv", how="all")
        #obs = obs.dropna(dim="time", how="all")  # when tlim specified
-            
+
        data=obs
-        
-        
+
+
     # %% patch SV names in case of "G 7" => "G07"
     data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
     # %% other attributes
@@ -268,7 +267,7 @@ def rinexobs3(
         obs.attrs["rxmodel"] = hdr["rxmodel"]
     if time_offset:
         data.attrs["time_offset"] = time_offset
-        
+
     if "RCV CLOCK OFFS APPL" in hdr.keys():
         try:
             data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"])
@@ -422,31 +421,31 @@ def rinexsystem3(
     last_epoch = None
     # %% loop
     with opener(fn) as f:
-        
+
         if fast:
             hdr = obsheader3(f)
         else:
             hdr = obsheader3(f, use, meas)
-            
+
         obl = []
         for sk in hdr["fields"]:
             obl=obl+hdr["fields"][sk]
         obl = np.unique(np.array(obl))
         obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
-        
-        
+
+
         if meas is not None:
             obl = obl[np.isin(obl,meas)]
-            
+
         Nt = times.size
         Npages = len(obl)
         Nsv = int(hdr['# OF SATELLITES'])
-        
+
         svl = np.tile('   ',Nsv)
-        
+
         data = np.empty((Npages, Nt, Nsv))
         data.fill(np.nan)
-        
+
         if useindicators:
             data_lli = np.full_like(data,np.nan)
             data_ssi = np.full_like(data,np.nan)
@@ -495,57 +494,57 @@ def rinexsystem3(
             if verbose:
                 print(time, end="\r")
 
- 
+
             for k in sv:
                 if not k in svl:
                     svl[np.argmax(svl=='   ')]=k
-                    
+
 
 
             darr = np.atleast_2d(
                     np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
                 )
-            
+
             t = time==times
-            
-            
+
+
             for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
                 # satellite indices "si" to extract from this time's measurements
                 si = [i for i, s in enumerate(sv) if s[0] in sk]
                 if len(si) == 0:  # no SV of this system "sk" at this time
                     continue
-                
+
                 gsv = np.array(sv)[si]
                 isv = [i for i,s in enumerate(svl) if s in gsv]
                 #isv = np.zeros_like(gsv,dtype=int)
-                
+
                 #for ii,jj in enumerate(gsv):
                  #   isv[ii] = np.nonzero(svl==jj)
-                
-                
+
+
                 for i,j in enumerate(hdr['fields'][sk]):
                     o = obl==j
                     #for ii,jj in enumerate(gsv):
                     #    data[o,t,svl==jj]=darr[si[ii],i*3]
-                    data[o,t,isv]=darr[si,i*3]    
+                    data[o,t,isv]=darr[si,i*3]
                     if useindicators:
-                        data_lli[o,t,isv]=darr[si,i*3+1]  
-                        data_ssi[o,t,isv]=darr[si,i*3+2]  
-                    
-                
-                
-                
-    
+                        data_lli[o,t,isv]=darr[si,i*3+1]
+                        data_ssi[o,t,isv]=darr[si,i*3+2]
+
+
+
+
+
     if '   ' in svl:
         svl=svl[:np.argmax(svl=='   ')]
-    
+
     svl, isv = np.unique(svl,return_index=True)
-        
+
     data=data[:,:,isv]
     if useindicators:
         data_lli=data_lli[:,:,isv]
         data_ssi=data_ssi[:,:,isv]
-    
+
     obs = xarray.Dataset(coords={"time":times,"sv": svl})
     for i, k in enumerate(obl):
         # FIXME: for limited time span reads, this drops unused data variables
@@ -560,11 +559,11 @@ def rinexsystem3(
                 obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
             elif k[0] == 'C':
                 obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
-        
-        
+
+
     obs = obs.dropna(dim="sv", how="all")
     obs = obs.dropna(dim="time", how="all")  # when tlim specified
-    
+
     data=obs
     # %% patch SV names in case of "G 7" => "G07"
     data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])

From 0c6b3d2e8f6acac92645fef5615877c69a04f5bf Mon Sep 17 00:00:00 2001
From: Ben <breid.phys@gmail.com>
Date: Fri, 4 Nov 2022 10:30:03 -0300
Subject: [PATCH 10/19] actual bugfix

---
 src/georinex/obs3.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index b3717ec..45151b5 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -75,7 +75,7 @@ def rinexobs3(
 
     # %% allocate
     if fast:
-        times = np.unique(obstime3(fn))
+        times = obstime3(fn)
     else:
         data = xarray.Dataset({}, coords={"time": [], "sv": []})
 
@@ -194,10 +194,11 @@ def rinexobs3(
                         o = obl==j
                         if not np.any(o):
                             continue
-                            data[o,t,isv]=darr[jsv,i*3]
+                        data[np.ix_(o,t,isv)]=darr[jsv,i*3]
+
                         if useindicators:
-                            data_lli[o,t,isv]=darr[jsv,i*3+1]
-                            data_ssi[o,t,isv]=darr[jsv,i*3+2]
+                            data_lli[np.ix_(o,t,isv)]=darr[jsv,i*3+1]
+                            data_ssi[np.ix_(o,t,isv)]=darr[jsv,i*3+2]
 
             else:
                 # this time epoch is complete, assemble the data.

From ef75b05798b8ab5371488dfc37d6ad5f82d295ee Mon Sep 17 00:00:00 2001
From: Ben <breid.phys@gmail.com>
Date: Thu, 8 Dec 2022 15:20:29 -0400
Subject: [PATCH 11/19] code formatting cleanup

---
 src/georinex/obs3.py | 187 ++++++++++++++++++++-----------------------
 1 file changed, 85 insertions(+), 102 deletions(-)

diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index 45151b5..1e98e4d 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -86,18 +86,18 @@ def rinexobs3(
     # %% loop
     with opener(fn) as f:
 
-
         if fast:
-            hdr = obsheader3(f,use)
+            hdr = obsheader3(f, use)
 
             obl = []
             for sk in hdr["fields"]:
-                obl=obl+hdr["fields"][sk]
+                obl = obl + hdr["fields"][sk]
             obl = np.unique(np.array(obl))
             obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
 
             if meas is not None:
-                obl = obl[np.any([np.char.find(obl,j)==0 for i, j  in enumerate(meas)],0)]
+                obl = obl[np.any([np.char.find(obl, j) == 0
+                                  for i, j in enumerate(meas)], 0)]
 
             Nt = times.size
             Npages = len(obl)
@@ -106,20 +106,19 @@ def rinexobs3(
             else:
                 Nsv = Nsvsys * len(hdr['fields'])
 
-            svl = np.tile('   ',Nsv)
+            svl = np.tile('   ', Nsv)
 
             data = np.empty((Npages, Nt, Nsv))
             data.fill(np.nan)
 
             if useindicators:
-                data_lli = np.full_like(data,np.nan)
-                data_ssi = np.full_like(data,np.nan)
+                data_lli = np.full_like(data, np.nan)
+                data_ssi = np.full_like(data, np.nan)
 
         else:
             hdr = obsheader3(f, use, meas)
 
-
-        # %% process OBS file
+        # process OBS file
         time_offset = []
         for ln in f:
             if not ln.startswith(">"):  # end of file
@@ -166,15 +165,14 @@ def rinexobs3(
             if fast:
                 for k in sv:
                     # update list of satellites
-                    if not k in svl:
-                        svl[np.argmax(svl=='   ')]=k
-
+                    if k not in svl:
+                        svl[np.argmax(svl == '   ')] = k
 
                 darr = np.atleast_2d(
-                        np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
-                    )
+                        np.genfromtxt(io.BytesIO(raw.encode("ascii")),
+                                      delimiter=(14, 1, 1) * hdr["Fmax"]))
 
-                t = time==times
+                t = time == times
 
                 for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
                     # satellite indices "si" to extract from this time's measurements
@@ -185,60 +183,60 @@ def rinexobs3(
 
                     gsv = np.array(sv)[si]
 
-                    isv,jsv = np.nonzero(np.logical_and(
+                    isv, jsv = np.nonzero(np.logical_and(
                         np.atleast_2d(svl).T == np.atleast_2d(sv),
-                                      np.isin(sv,gsv))                  )
+                                      np.isin(sv, gsv))                  )
 
-
-                    for i,j in enumerate(hdr['fields'][sk]):
-                        o = obl==j
+                    for i, j in enumerate(hdr['fields'][sk]):
+                        o = obl == j
                         if not np.any(o):
                             continue
-                        data[np.ix_(o,t,isv)]=darr[jsv,i*3]
+                        data[np.ix_(o, t, isv)] = darr[jsv, i*3]
 
                         if useindicators:
-                            data_lli[np.ix_(o,t,isv)]=darr[jsv,i*3+1]
-                            data_ssi[np.ix_(o,t,isv)]=darr[jsv,i*3+2]
+                            data_lli[np.ix_(o, t, isv)] = darr[jsv, i*3+1]
+                            data_ssi[np.ix_(o, t, isv)] = darr[jsv, i*3+2]
 
             else:
                 # this time epoch is complete, assemble the data.
                 data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
 
     if fast:
-       if '   ' in svl:
-           # remove blank satellites (if tlim used)
-           svl=svl[:np.argmax(svl=='   ')]
-
-       svl, isv = np.unique(svl,return_index=True)
-
-       data=data[:,:,isv]
-       if useindicators:
-           data_lli=data_lli[:,:,isv]
-           data_ssi=data_ssi[:,:,isv]
-
-       obs = xarray.Dataset(coords={"sv": svl,"time":times})
-       for i, k in enumerate(obl):
-           if k is None:
-               continue
-           elif np.all(np.isnan(data[i, :, :])): # drop all nan datasets like tests expect
-               continue
-           obs[k] = (("time", "sv"), data[i, :, :])
-           if useindicators:
-               if k[0] == 'L':
-                   obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
-                   obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
-               #elif k[0] == 'C': # only for code?
-               else:
-                   obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
-
-       obs = obs.dropna(dim="sv", how="all")
-       #obs = obs.dropna(dim="time", how="all")  # when tlim specified
-
-       data=obs
+        if '   ' in svl:
+            # remove blank satellites (if tlim used)
+            svl = svl[:np.argmax(svl == '   ')]
+
+        svl, isv = np.unique(svl, return_index=True)
 
+        data = data[:, :, isv]
+        if useindicators:
+            data_lli = data_lli[:, :, isv]
+            data_ssi = data_ssi[:, :, isv]
+
+        obs = xarray.Dataset(coords={"sv": svl, "time": times})
+        for i, k in enumerate(obl):
+            if k is None:
+                continue
+            elif np.all(np.isnan(data[i, :, :])):
+                # drop all nan datasets like tests expect
+                continue
+            obs[k] = (("time", "sv"), data[i, :, :])
+            if useindicators:
+                if k[0] == 'L':
+                    obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
+                    obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+                # elif k[0] == 'C': # only for code?
+                else:
+                    obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+
+        obs = obs.dropna(dim="sv", how="all")
+         # obs = obs.dropna(dim="time", how="all")  # when tlim specified
+
+        data = obs
 
     # %% patch SV names in case of "G 7" => "G07"
-    data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
+    data = data.assign_coords(sv=[s.replace(" ", "0")
+                                  for s in data.sv.values.tolist()])
     # %% other attributes
     data.attrs["version"] = hdr["version"]
 
@@ -248,7 +246,8 @@ def rinexobs3(
     elif "time" in data.coords.keys():
         # median is robust against gaps
         try:
-            data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s"))
+            data.attrs["interval"] = np.median(np.diff(data.time)
+                                               / np.timedelta64(1, "s"))
         except TypeError:
             pass
     else:
@@ -333,9 +332,9 @@ def _epoch(
     block processing of each epoch (time step)
     """
     darr = np.atleast_2d(
-        np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
-    )
-    #data = xarray.Dataset({}, coords={"time": [], "sv": []})
+        np.genfromtxt(io.BytesIO(raw.encode("ascii")),
+                      delimiter=(14, 1, 1) * hdr["Fmax"]))
+    # data = xarray.Dataset({}, coords={"time": [], "sv": []})
     # %% assign data for each time step
     for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
         # satellite indices "si" to extract from this time's measurements
@@ -355,7 +354,7 @@ def _epoch(
             dsf[k] = (("time", "sv"), np.atleast_2d(garr[:, i * 3]))
 
             if useindicators:
-                dsf = _indicators(dsf, k, garr[:, i * 3 + 1 : i * 3 + 3])
+                dsf = _indicators(dsf, k, garr[:, i * 3 + 1: i * 3 + 3])
 
         if verbose:
             print(time, "\r", end="")
@@ -363,7 +362,7 @@ def _epoch(
         epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv})
         if len(data) == 0:
             data = epoch_data
-            #data = xarray.merge((data, epoch_data))
+            # data = xarray.merge((data, epoch_data))
         elif len(hdr["fields"]) == 1:  # one satellite system selected, faster to process
             data = xarray.concat((data, epoch_data), dim="time")
         else:  # general case, slower for different satellite systems all together
@@ -371,6 +370,7 @@ def _epoch(
 
     return data
 
+
 def rinexsystem3(
     fn: T.TextIO | Path,
     use: set[str] = None,
@@ -415,7 +415,7 @@ def rinexsystem3(
         meas = None
     # %% allocate
     times = obstime3(fn)
-    #data = xarray.Dataset({}, coords={"time": [], "sv": []})
+    # data = xarray.Dataset({}, coords={"time": [], "sv": []})
     if tlim is not None and not isinstance(tlim[0], datetime):
         raise TypeError("time bounds are specified as datetime.datetime")
 
@@ -430,26 +430,25 @@ def rinexsystem3(
 
         obl = []
         for sk in hdr["fields"]:
-            obl=obl+hdr["fields"][sk]
+            obl = obl + hdr["fields"][sk]
         obl = np.unique(np.array(obl))
         obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
 
-
         if meas is not None:
-            obl = obl[np.isin(obl,meas)]
+            obl = obl[np.isin(obl, meas)]
 
         Nt = times.size
         Npages = len(obl)
         Nsv = int(hdr['# OF SATELLITES'])
 
-        svl = np.tile('   ',Nsv)
+        svl = np.tile('   ', Nsv)
 
         data = np.empty((Npages, Nt, Nsv))
         data.fill(np.nan)
 
         if useindicators:
-            data_lli = np.full_like(data,np.nan)
-            data_ssi = np.full_like(data,np.nan)
+            data_lli = np.full_like(data, np.nan)
+            data_ssi = np.full_like(data, np.nan)
 
         # %% process OBS file
         time_offset = []
@@ -495,19 +494,15 @@ def rinexsystem3(
             if verbose:
                 print(time, end="\r")
 
-
             for k in sv:
-                if not k in svl:
-                    svl[np.argmax(svl=='   ')]=k
-
-
+                if k not in svl:
+                    svl[np.argmax(svl == '   ')] = k
 
             darr = np.atleast_2d(
-                    np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
-                )
-
-            t = time==times
+                    np.genfromtxt(io.BytesIO(raw.encode("ascii")),
+                                  delimiter=(14, 1, 1) * hdr["Fmax"]))
 
+            t = time == times
 
             for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
                 # satellite indices "si" to extract from this time's measurements
@@ -516,37 +511,26 @@ def rinexsystem3(
                     continue
 
                 gsv = np.array(sv)[si]
-                isv = [i for i,s in enumerate(svl) if s in gsv]
-                #isv = np.zeros_like(gsv,dtype=int)
+                isv = [i for i, s in enumerate(svl) if s in gsv]
 
-                #for ii,jj in enumerate(gsv):
-                 #   isv[ii] = np.nonzero(svl==jj)
-
-
-                for i,j in enumerate(hdr['fields'][sk]):
-                    o = obl==j
-                    #for ii,jj in enumerate(gsv):
-                    #    data[o,t,svl==jj]=darr[si[ii],i*3]
-                    data[o,t,isv]=darr[si,i*3]
+                for i, j in enumerate(hdr['fields'][sk]):
+                    o = obl == j
+                    data[o, t, isv]=darr[si,i*3]
                     if useindicators:
-                        data_lli[o,t,isv]=darr[si,i*3+1]
-                        data_ssi[o,t,isv]=darr[si,i*3+2]
-
-
-
-
+                        data_lli[o, t, isv]=darr[si, i*3+1]
+                        data_ssi[o, t, isv]=darr[si, i*3+2]
 
     if '   ' in svl:
-        svl=svl[:np.argmax(svl=='   ')]
+        svl = svl[:np.argmax(svl == '   ')]
 
-    svl, isv = np.unique(svl,return_index=True)
+    svl, isv = np.unique(svl, return_index=True)
 
-    data=data[:,:,isv]
+    data=data[:, :, isv]
     if useindicators:
-        data_lli=data_lli[:,:,isv]
-        data_ssi=data_ssi[:,:,isv]
+        data_lli = data_lli[:, :, isv]
+        data_ssi = data_ssi[:, :, isv]
 
-    obs = xarray.Dataset(coords={"time":times,"sv": svl})
+    obs = xarray.Dataset(coords={"time": times, "sv": svl})
     for i, k in enumerate(obl):
         # FIXME: for limited time span reads, this drops unused data variables
         # if np.isnan(data[i, ...]).all():
@@ -561,11 +545,10 @@ def rinexsystem3(
             elif k[0] == 'C':
                 obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
 
-
     obs = obs.dropna(dim="sv", how="all")
     obs = obs.dropna(dim="time", how="all")  # when tlim specified
 
-    data=obs
+    data = obs
     # %% patch SV names in case of "G 7" => "G07"
     data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
     # %% other attributes
@@ -716,7 +699,7 @@ def obsheader3(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dic
             fields[sk] = np.array(fields[sk])[ind].tolist()
             sysind[sk] = np.empty(Fmax * 3, dtype=bool)  # *3 due to LLI, SSI
             for j, i in enumerate(ind):
-                sysind[sk][j * 3 : j * 3 + 3] = i
+                sysind[sk][j * 3: j * 3 + 3] = i
     else:
         sysind = {k: np.s_[:] for k in fields}
 

From e7fd313fb713109c3c05781296fed06b088deadc Mon Sep 17 00:00:00 2001
From: Ben <breid.phys@gmail.com>
Date: Thu, 8 Dec 2022 17:35:18 -0400
Subject: [PATCH 12/19] fast v3 loads to memory

---
 src/georinex/obs3.py | 13 ++++++++++---
 src/georinex/rio.py  |  6 +++++-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index 1e98e4d..0563753 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -44,7 +44,7 @@ def rinexobs3(
     useindicators: SSI, LLI are output
     meas:  'L1C'  or  ['L1C', 'C1C'] or similar
 
-    fast: Still double-reading file to get times
+    fast: Loads entire file into memory as a string
           Uses # OF SATELLITES from header to try and size nparray, falls back
           to Nsvsys if missing
           if false, uses old _epoch method
@@ -73,8 +73,15 @@ def rinexobs3(
     """
     Nsvsys = 36
 
+    if isinstance(fn, Path):
+        file_name = fn
+    else:
+        file_name = None
+
     # %% allocate
     if fast:
+        with opener(fn) as f:
+            fn = [ln for ln in f]
         times = obstime3(fn)
     else:
         data = xarray.Dataset({}, coords={"time": [], "sv": []})
@@ -256,8 +263,8 @@ def rinexobs3(
     data.attrs["rinextype"] = "obs"
     data.attrs["fast_processing"] = int(fast)  # bool is not allowed in NetCDF4
     data.attrs["time_system"] = determine_time_system(hdr)
-    if isinstance(fn, Path):
-        data.attrs["filename"] = fn.name
+    if isinstance(file_name, Path):
+        data.attrs["filename"] = file_name.name
 
     if "position" in hdr.keys():
         data.attrs["position"] = hdr["position"]
diff --git a/src/georinex/rio.py b/src/georinex/rio.py
index 4787428..5bcedc6 100644
--- a/src/georinex/rio.py
+++ b/src/georinex/rio.py
@@ -20,7 +20,7 @@
 
 
 @contextmanager
-def opener(fn: T.TextIO | Path, header: bool = False) -> T.Iterator[T.TextIO]:
+def opener(fn: T.TextIO | Path | list, header: bool = False) -> T.Iterator[T.TextIO]:
     """provides file handle for regular ASCII or gzip files transparently"""
     if isinstance(fn, str):
         fn = Path(fn).expanduser()
@@ -28,6 +28,10 @@ def opener(fn: T.TextIO | Path, header: bool = False) -> T.Iterator[T.TextIO]:
     if isinstance(fn, io.StringIO):
         fn.seek(0)
         yield fn
+    elif isinstance(fn, list):
+        f = ''.join(fn)
+        fn = io.StringIO(f)
+        yield fn
     elif isinstance(fn, Path):
         # need to have this check for Windows
         if not fn.is_file():

From c35576f59568a9dd22bafc318242eabe789671b6 Mon Sep 17 00:00:00 2001
From: Ben <breid.phys@gmail.com>
Date: Wed, 18 Jan 2023 19:13:38 -0400
Subject: [PATCH 13/19] limited calls to genfromtxt for 2x speed

---
 src/georinex/obs3.py | 209 +++++++++++++++++++------------------------
 1 file changed, 93 insertions(+), 116 deletions(-)

diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index 0563753..eb0b614 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -45,8 +45,7 @@ def rinexobs3(
     meas:  'L1C'  or  ['L1C', 'C1C'] or similar
 
     fast: Loads entire file into memory as a string
-          Uses # OF SATELLITES from header to try and size nparray, falls back
-          to Nsvsys if missing
+            TODO: time interval, time offset, etc.
           if false, uses old _epoch method
 
     interval: allows decimating file read by time e.g. every 5 seconds.
@@ -64,15 +63,6 @@ def rinexobs3(
     if not meas or not meas[0].strip():
         meas = None
 
-    """
-    Nsvsys may need updating as GNSS systems grow.
-    Let us know if you needed to change them.
-
-    Beidou is 35 max
-    Galileo is 36 max
-    """
-    Nsvsys = 36
-
     if isinstance(fn, Path):
         file_name = fn
     else:
@@ -108,138 +98,125 @@ def rinexobs3(
 
             Nt = times.size
             Npages = len(obl)
-            if '# OF SATELLITES' in hdr.keys():
-                Nsv = int(hdr['# OF SATELLITES'])
-            else:
-                Nsv = Nsvsys * len(hdr['fields'])
 
-            svl = np.tile('   ', Nsv)
+            f = np.array([f for f in f])  # don't need opener anymore
+            sv = np.array([ln[:3] for ln in f])
+            (usv, isv) = np.unique(sv, return_inverse=True)  # get all sv lines
+
+            # list of epoch numbers per datum
+            timeIndex = np.cumsum(isv ==
+                                  np.flatnonzero(np.char.rfind(usv, '>') == 0)
+                                  )-1
 
-            data = np.empty((Npages, Nt, Nsv))
-            data.fill(np.nan)
+            # usv contains epoch entries too, get only sv
+            svl = usv[np.logical_not(np.char.startswith(usv, '>'))]
+            Nsv = svl.size
+
+            data = np.full((Npages, Nt, Nsv), np.nan)
 
             if useindicators:
                 data_lli = np.full_like(data, np.nan)
                 data_ssi = np.full_like(data, np.nan)
 
-        else:
-            hdr = obsheader3(f, use, meas)
-
-        # process OBS file
-        time_offset = []
-        for ln in f:
-            if not ln.startswith(">"):  # end of file
-                break
-
-            try:
-                time = _timeobs(ln)
-            except ValueError:  # garbage between header and RINEX data
-                logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
-                continue
-
-            try:
-                time_offset.append(float(ln[41:56]))
-            except ValueError:
-                pass
-            # %% get SV indices
-            sv = []
-            raw = ""
-            # Number of visible satellites this time %i3  pg. A13
-            for _ in range(int(ln[33:35])):
-                ln = f.readline()
-                if use is None or ln[0] in use:
-                    sv.append(ln[:3])
-                    raw += ln[3:]
+            time_offset = []  # TODO: fill this list
 
-            if tlim is not None:
-                if time < tlim[0]:
+            for i, sv in enumerate(usv):
+                if sv[0] == '>':
+                    # time offset processing here
+                    continue
+                elif use is not None and sv[0] not in use:
                     continue
-                elif time > tlim[1]:
-                    break
-
-            if interval is not None:
-                if last_epoch is None:  # initialization
-                    last_epoch = time
-                else:
-                    if time - last_epoch < interval:
-                        continue
-                    else:
-                        last_epoch += interval
-
-            if verbose:
-                print(time, end="\r")
-
-            if fast:
-                for k in sv:
-                    # update list of satellites
-                    if k not in svl:
-                        svl[np.argmax(svl == '   ')] = k
 
+                # genfromtext is bottleneck, limit calls this way
                 darr = np.atleast_2d(
-                        np.genfromtxt(io.BytesIO(raw.encode("ascii")),
+                        np.genfromtxt(np.char.lstrip(f[isv == i], sv),
                                       delimiter=(14, 1, 1) * hdr["Fmax"]))
 
-                t = time == times
+                t_i = timeIndex[isv == i]
 
-                for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
-                    # satellite indices "si" to extract from this time's measurements
+                # jsv == i-1 except for files spanning 1999-2000
+                jsv = svl == sv
 
-                    si = [i for i, s in enumerate(sv) if s[0] in sk]
-                    if len(si) == 0:  # no SV of this system "sk" at this time
+                for j, jobl in enumerate(hdr['fields'][sv[0]]):
+                    o = obl == jobl
+                    if not np.any(o):
                         continue
 
-                    gsv = np.array(sv)[si]
+                    data[o, t_i, jsv] = darr[:, j*3]
 
-                    isv, jsv = np.nonzero(np.logical_and(
-                        np.atleast_2d(svl).T == np.atleast_2d(sv),
-                                      np.isin(sv, gsv))                  )
+                    if useindicators:
+                        data_lli[np.ix_(o, t_i, jsv)] = darr[:, j*3+1]
+                        data_ssi[np.ix_(o, t_i, jsv)] = darr[:, j*3+2]
 
-                    for i, j in enumerate(hdr['fields'][sk]):
-                        o = obl == j
-                        if not np.any(o):
-                            continue
-                        data[np.ix_(o, t, isv)] = darr[jsv, i*3]
+            obs = xarray.Dataset(coords={"sv": svl, "time": times})
+            for i, k in enumerate(obl):
+                if k is None:
+                    continue
+                elif np.all(np.isnan(data[i, :, :])):
+                    # drop all nan datasets like tests expect
+                    continue
+                obs[k] = (("time", "sv"), data[i, :, :])
+                if useindicators:
+                    if k[0] == 'L':
+                        obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
+                        obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+                    # elif k[0] == 'C': # only for code?
+                    else:
+                        obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
 
-                        if useindicators:
-                            data_lli[np.ix_(o, t, isv)] = darr[jsv, i*3+1]
-                            data_ssi[np.ix_(o, t, isv)] = darr[jsv, i*3+2]
+            obs = obs.dropna(dim="sv", how="all")
+             # obs = obs.dropna(dim="time", how="all")  # when tlim specified
 
-            else:
-                # this time epoch is complete, assemble the data.
-                data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
+            data = obs
 
-    if fast:
-        if '   ' in svl:
-            # remove blank satellites (if tlim used)
-            svl = svl[:np.argmax(svl == '   ')]
+        else:
+            hdr = obsheader3(f, use, meas)
+            # process OBS file
+            time_offset = []
+            for ln in f:
+                if not ln.startswith(">"):  # end of file
+                    break
 
-        svl, isv = np.unique(svl, return_index=True)
+                try:
+                    time = _timeobs(ln)
+                except ValueError:  # garbage between header and RINEX data
+                    logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
+                    continue
 
-        data = data[:, :, isv]
-        if useindicators:
-            data_lli = data_lli[:, :, isv]
-            data_ssi = data_ssi[:, :, isv]
+                try:
+                    time_offset.append(float(ln[41:56]))
+                except ValueError:
+                    pass
+                # %% get SV indices
+                sv = []
+                raw = ""
+                # Number of visible satellites this time %i3  pg. A13
+                for _ in range(int(ln[33:35])):
+                    ln = f.readline()
+                    if use is None or ln[0] in use:
+                        sv.append(ln[:3])
+                        raw += ln[3:]
+
+                if tlim is not None:
+                    if time < tlim[0]:
+                        continue
+                    elif time > tlim[1]:
+                        break
 
-        obs = xarray.Dataset(coords={"sv": svl, "time": times})
-        for i, k in enumerate(obl):
-            if k is None:
-                continue
-            elif np.all(np.isnan(data[i, :, :])):
-                # drop all nan datasets like tests expect
-                continue
-            obs[k] = (("time", "sv"), data[i, :, :])
-            if useindicators:
-                if k[0] == 'L':
-                    obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
-                    obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
-                # elif k[0] == 'C': # only for code?
-                else:
-                    obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+                if interval is not None:
+                    if last_epoch is None:  # initialization
+                        last_epoch = time
+                    else:
+                        if time - last_epoch < interval:
+                            continue
+                        else:
+                            last_epoch += interval
 
-        obs = obs.dropna(dim="sv", how="all")
-         # obs = obs.dropna(dim="time", how="all")  # when tlim specified
+                if verbose:
+                    print(time, end="\r")
 
-        data = obs
+                # this time epoch is complete, assemble the data.
+                data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
 
     # %% patch SV names in case of "G 7" => "G07"
     data = data.assign_coords(sv=[s.replace(" ", "0")

From 5f50d9db1fc702d321ad2c8c2bb6c2be2ed30b78 Mon Sep 17 00:00:00 2001
From: breid-phys <breid.phys@gmail.com>
Date: Tue, 27 Jun 2023 12:41:34 -0300
Subject: [PATCH 14/19]  fixed datetime64 issue

---
 src/georinex/obs2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py
index 5dee6db..e468d92 100644
--- a/src/georinex/obs2.py
+++ b/src/georinex/obs2.py
@@ -36,7 +36,7 @@ def rinexobs2(
     if not use:
         use = {"C", "E", "G", "J", "R", "S"}
 
-    obs = xarray.Dataset({}, coords={"time": np.array([],dtype=np.datetime64), "sv": []})
+    obs = xarray.Dataset({}, coords={"time": np.array([],dtype='datetime64[ns]'), "sv": []})
     attrs: dict[str, T.Any] = {}
     for u in use:
         o = rinexsystem2(

From 0d2f69ba6f41a2e68746077dea43114fed35a698 Mon Sep 17 00:00:00 2001
From: breid-phys <breid.phys@gmail.com>
Date: Thu, 29 Jun 2023 10:35:04 -0300
Subject: [PATCH 15/19]  fixed bug with nav errors

---
 channeldata.json                       |   7 +++
 index.html                             |  83 +++++++++++++++++++++++++
 noarch/current_repodata.json           |   9 +++
 noarch/current_repodata.json.bz2       | Bin 0 -> 126 bytes
 noarch/index.html                      |  82 ++++++++++++++++++++++++
 noarch/repodata.json                   |   9 +++
 noarch/repodata.json.bz2               | Bin 0 -> 126 bytes
 noarch/repodata_from_packages.json     |   9 +++
 noarch/repodata_from_packages.json.bz2 | Bin 0 -> 126 bytes
 9 files changed, 199 insertions(+)
 create mode 100644 channeldata.json
 create mode 100644 index.html
 create mode 100644 noarch/current_repodata.json
 create mode 100644 noarch/current_repodata.json.bz2
 create mode 100644 noarch/index.html
 create mode 100644 noarch/repodata.json
 create mode 100644 noarch/repodata.json.bz2
 create mode 100644 noarch/repodata_from_packages.json
 create mode 100644 noarch/repodata_from_packages.json.bz2

diff --git a/channeldata.json b/channeldata.json
new file mode 100644
index 0000000..fdd2409
--- /dev/null
+++ b/channeldata.json
@@ -0,0 +1,7 @@
+{
+  "channeldata_version": 1,
+  "packages": {},
+  "subdirs": [
+    "noarch"
+  ]
+}
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..3eda6ed
--- /dev/null
+++ b/index.html
@@ -0,0 +1,83 @@
+<html>
+<head>
+  <title>georinex</title>
+  <style type="text/css">
+    a, a:active {
+      text-decoration: none; color: blue;
+    }
+    a:visited {
+      color: #48468F;
+    }
+    a:hover, a:focus {
+      text-decoration: underline; color: red;
+    }
+    body {
+      background-color: #F5F5F5;
+    }
+    h2 {
+      margin-bottom: 12px;
+    }
+    th, td {
+      font: 100% monospace; text-align: left;
+    }
+    th {
+      font-weight: bold; padding-right: 14px; padding-bottom: 3px;
+    }
+    th.tight {
+        padding-right: 6px;
+    }
+    td {
+      padding-right: 14px;
+    }
+    td.tight {
+        padding-right: 8px;
+    }
+    td.s, th.s {
+      text-align: right;
+    }
+    td.summary {
+      white-space: nowrap;
+      overflow: hidden;
+    }
+    td.packagename {
+      white-space: nowrap;
+      text-overflow: ellipsis;
+      overflow: hidden;
+      max-width: 180px;
+      padding-right: 8px;
+    }
+    td.version {
+      //white-space: nowrap;
+      overflow: hidden;
+      max-width: 90px;
+      padding-right: 8px;
+    }
+    table {
+      background-color: white;
+      border-top: 1px solid #646464;
+      border-bottom: 1px solid #646464;
+      padding-top: 10px;
+      padding-bottom: 14px;
+    }
+    address {
+      color: #787878;
+      padding-top: 10px;
+    }
+  </style>
+</head>
+<body>
+  <h2>georinex</h2>
+  <h3><a href="rss.xml">RSS Feed</a>&nbsp;&nbsp;&nbsp;<a href="channeldata.json">channeldata.json</a></h3>
+<a href="noarch">noarch</a>&nbsp;&nbsp;&nbsp;  <table>
+    <tr>
+      <th style="padding-right:18px;">Package</th>
+      <th>Latest Version</th>
+      <th>Doc</th>
+      <th>Dev</th>
+      <th>License</th>
+<th class="tight">noarch</th>      <th>Summary</th>
+    </tr>
+  </table>
+  <address>Updated: 2023-06-08 14:39:19 +0000 - Files: 0</address>
+</body>
+</html>
\ No newline at end of file
diff --git a/noarch/current_repodata.json b/noarch/current_repodata.json
new file mode 100644
index 0000000..c8a47d1
--- /dev/null
+++ b/noarch/current_repodata.json
@@ -0,0 +1,9 @@
+{
+  "info": {
+    "subdir": "noarch"
+  },
+  "packages": {},
+  "packages.conda": {},
+  "removed": [],
+  "repodata_version": 1
+}
diff --git a/noarch/current_repodata.json.bz2 b/noarch/current_repodata.json.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..34499176b4ca978123fa4d2b1d85533774f5f867
GIT binary patch
literal 126
zcmV-^0D=EPT4*^jL0KkKSsDu+S^xk%TYx|iPz4|m01ChB-wGfAbQ+22Hj(KDgFwch
z$wt)m10m`FG$`~a3eYtYm7oYhXV(wUZotdp>j?sm)=p!=7h6LU_DABFZAI`Dme~+Q
g7Jaajjb0$+AWY8UiY)tU6j$+gBvXY60idzfpkos=Q~&?~

literal 0
HcmV?d00001

diff --git a/noarch/index.html b/noarch/index.html
new file mode 100644
index 0000000..3610ed4
--- /dev/null
+++ b/noarch/index.html
@@ -0,0 +1,82 @@
+<html>
+<head>
+  <title>georinex/noarch</title>
+  <style type="text/css">
+    a, a:active {
+      text-decoration: none; color: blue;
+    }
+    a:visited {
+      color: #48468F;
+    }
+    a:hover, a:focus {
+      text-decoration: underline; color: red;
+    }
+    body {
+      background-color: #F5F5F5;
+    }
+    h2 {
+      margin-bottom: 12px;
+    }
+    th, td {
+      font: 100% monospace; text-align: left;
+    }
+    th {
+      font-weight: bold; padding-right: 14px; padding-bottom: 3px;
+    }
+    td {
+      padding-right: 20px;
+    }
+    td.s, th.s {
+      text-align: right;
+    }
+    table {
+      background-color: white;
+      border-top: 1px solid #646464;
+      border-bottom: 1px solid #646464;
+      padding-top: 10px;
+      padding-bottom: 14px;
+    }
+    address {
+      color: #787878;
+      padding-top: 10px;
+    }
+  </style>
+</head>
+<body>
+  <h2>georinex/noarch</h2>
+  <table>
+    <tr>
+      <th>Filename</th>
+      <th>Size</th>
+      <th>Last Modified</th>
+      <th>SHA256</th>
+      <th>MD5</th>
+    </tr>
+    <tr>
+      <td><a href="repodata.json" alt="repodata.json">repodata.json</a></td>
+      <td class="s">127 B</td>
+      <td>2023-06-08 14:39:19 +0000</td>
+      <td>b546412dc20b790c5f9c223e394ff0e39a27ba12a99631ca4d1bb6c0ca3bd05c</td>
+      <td>917338f97423c09c911618661fef3056</td>
+    </tr>    <tr>
+      <td><a href="repodata.json.bz2" alt="repodata.json.bz2">repodata.json.bz2</a></td>
+      <td class="s">126 B</td>
+      <td>2023-06-08 14:39:19 +0000</td>
+      <td>32d48c11b6d5ee61a54a9076f8c06a62ba795f69b671db3e44b1301fa0efae0e</td>
+      <td>1ecb5b301b9a2ba831f0a68a7ace5f00</td>
+    </tr>    <tr>
+      <td><a href="repodata_from_packages.json" alt="repodata_from_packages.json">repodata_from_packages.json</a></td>
+      <td class="s">127 B</td>
+      <td>2023-06-08 14:39:19 +0000</td>
+      <td>b546412dc20b790c5f9c223e394ff0e39a27ba12a99631ca4d1bb6c0ca3bd05c</td>
+      <td>917338f97423c09c911618661fef3056</td>
+    </tr>    <tr>
+      <td><a href="repodata_from_packages.json.bz2" alt="repodata_from_packages.json.bz2">repodata_from_packages.json.bz2</a></td>
+      <td class="s">126 B</td>
+      <td>2023-06-08 14:39:19 +0000</td>
+      <td>32d48c11b6d5ee61a54a9076f8c06a62ba795f69b671db3e44b1301fa0efae0e</td>
+      <td>1ecb5b301b9a2ba831f0a68a7ace5f00</td>
+    </tr>  </table>
+  <address>Updated: 2023-06-08 14:39:19 +0000 - Files: 0</address>
+</body>
+</html>
\ No newline at end of file
diff --git a/noarch/repodata.json b/noarch/repodata.json
new file mode 100644
index 0000000..c8a47d1
--- /dev/null
+++ b/noarch/repodata.json
@@ -0,0 +1,9 @@
+{
+  "info": {
+    "subdir": "noarch"
+  },
+  "packages": {},
+  "packages.conda": {},
+  "removed": [],
+  "repodata_version": 1
+}
diff --git a/noarch/repodata.json.bz2 b/noarch/repodata.json.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..34499176b4ca978123fa4d2b1d85533774f5f867
GIT binary patch
literal 126
zcmV-^0D=EPT4*^jL0KkKSsDu+S^xk%TYx|iPz4|m01ChB-wGfAbQ+22Hj(KDgFwch
z$wt)m10m`FG$`~a3eYtYm7oYhXV(wUZotdp>j?sm)=p!=7h6LU_DABFZAI`Dme~+Q
g7Jaajjb0$+AWY8UiY)tU6j$+gBvXY60idzfpkos=Q~&?~

literal 0
HcmV?d00001

diff --git a/noarch/repodata_from_packages.json b/noarch/repodata_from_packages.json
new file mode 100644
index 0000000..c8a47d1
--- /dev/null
+++ b/noarch/repodata_from_packages.json
@@ -0,0 +1,9 @@
+{
+  "info": {
+    "subdir": "noarch"
+  },
+  "packages": {},
+  "packages.conda": {},
+  "removed": [],
+  "repodata_version": 1
+}
diff --git a/noarch/repodata_from_packages.json.bz2 b/noarch/repodata_from_packages.json.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..34499176b4ca978123fa4d2b1d85533774f5f867
GIT binary patch
literal 126
zcmV-^0D=EPT4*^jL0KkKSsDu+S^xk%TYx|iPz4|m01ChB-wGfAbQ+22Hj(KDgFwch
z$wt)m10m`FG$`~a3eYtYm7oYhXV(wUZotdp>j?sm)=p!=7h6LU_DABFZAI`Dme~+Q
g7Jaajjb0$+AWY8UiY)tU6j$+gBvXY60idzfpkos=Q~&?~

literal 0
HcmV?d00001


From 9d283613567cb0e20df690fa85c510cce7ae584f Mon Sep 17 00:00:00 2001
From: breid-phys <breid.phys@gmail.com>
Date: Thu, 29 Jun 2023 16:21:12 -0300
Subject: [PATCH 16/19]  fixes for file reading edge cases

---
 src/georinex/obs3.py |   5 +-
 src/georinex/rio.py  | 121 ++++++++++++++++++++++++++-----------------
 2 files changed, 76 insertions(+), 50 deletions(-)

diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index eb0b614..0669331 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -99,7 +99,8 @@ def rinexobs3(
             Nt = times.size
             Npages = len(obl)
 
-            f = np.array([f for f in f])  # don't need opener anymore
+            f = np.array([f for f in f if f[1:3] != '  '])  # don't need opener anymore
+
             sv = np.array([ln[:3] for ln in f])
             (usv, isv) = np.unique(sv, return_inverse=True)  # get all sv lines
 
@@ -121,7 +122,7 @@ def rinexobs3(
             time_offset = []  # TODO: fill this list
 
             for i, sv in enumerate(usv):
-                if sv[0] == '>':
+                if sv[0] == '>' or sv[2] == ' ':
                     # time offset processing here
                     continue
                 elif use is not None and sv[0] not in use:
diff --git a/src/georinex/rio.py b/src/georinex/rio.py
index 5bcedc6..cbd6d25 100644
--- a/src/georinex/rio.py
+++ b/src/georinex/rio.py
@@ -20,7 +20,7 @@
 
 
 @contextmanager
-def opener(fn: T.TextIO | Path | list, header: bool = False) -> T.Iterator[T.TextIO]:
+def opener(fn: T.TextIO | Path | list, header: bool = False, encoding="ascii") -> T.Iterator[T.TextIO]:
     """provides file handle for regular ASCII or gzip files transparently"""
     if isinstance(fn, str):
         fn = Path(fn).expanduser()
@@ -37,45 +37,15 @@ def opener(fn: T.TextIO | Path | list, header: bool = False) -> T.Iterator[T.Tex
         if not fn.is_file():
             raise FileNotFoundError(fn)
 
-        finf = fn.stat()
-        if finf.st_size > 100e6:
-            logging.info(f"opening {finf.st_size/1e6} MByte {fn.name}")
-
-        suffix = fn.suffix.lower()
-
-        if suffix == ".gz":
-            with gzip.open(fn, "rt") as f:
-                _, is_crinex = rinex_version(first_nonblank_line(f))
-                f.seek(0)
-
-                if is_crinex and not header:
-                    # Conversion to string is necessary because of a quirk where gzip.open()
-                    # even with 'rt' doesn't decompress until read.
-                    f = io.StringIO(crx2rnx(f.read()))
-                yield f
-        elif suffix == ".bz2":
-            # this is for plain bzip2 files, NOT tar.bz2, which requires f.seek(512)
-            with bz2.open(fn, "rt") as f:
-                _, is_crinex = rinex_version(first_nonblank_line(f))
-                f.seek(0)
-
-                if is_crinex and not header:
-                    f = io.StringIO(crx2rnx(f.read()))
-                yield f
-        elif suffix == ".zip":
-            with zipfile.ZipFile(fn, "r") as z:
-                flist = z.namelist()
-                for rinexfn in flist:
-                    with z.open(rinexfn, "r") as bf:
-                        f = io.StringIO(
-                            io.TextIOWrapper(bf, encoding="ascii", errors="ignore").read()  # type: ignore
-                        )
-                        yield f
-        elif suffix == ".z":
-            if unlzw is None:
-                raise ImportError("pip install unlzw3")
-            with fn.open("rb") as zu:
-                with io.StringIO(unlzw(zu.read()).decode("ascii")) as f:
+        try:
+            finf = fn.stat()
+            if finf.st_size > 100e6:
+                logging.info(f"opening {finf.st_size/1e6} MByte {fn.name}")
+
+            suffix = fn.suffix.lower()
+
+            if suffix == ".gz":
+                with gzip.open(fn, "rt", encoding=encoding) as f:
                     _, is_crinex = rinex_version(first_nonblank_line(f))
                     f.seek(0)
 
@@ -84,14 +54,69 @@ def opener(fn: T.TextIO | Path | list, header: bool = False) -> T.Iterator[T.Tex
                         # even with 'rt' doesn't decompress until read.
                         f = io.StringIO(crx2rnx(f.read()))
                     yield f
-        else:  # assume not compressed (or Hatanaka)
-            with fn.open("r", encoding="ascii", errors="ignore") as f:
-                _, is_crinex = rinex_version(first_nonblank_line(f))
-                f.seek(0)
-
-                if is_crinex and not header:
-                    f = io.StringIO(crx2rnx(f))
-                yield f
+            elif suffix == ".bz2":
+                # this is for plain bzip2 files, NOT tar.bz2, which requires f.seek(512)
+                with bz2.open(fn, "rt", encoding=encoding) as f:
+                    _, is_crinex = rinex_version(first_nonblank_line(f))
+                    f.seek(0)
+
+                    if is_crinex and not header:
+                        f = io.StringIO(crx2rnx(f.read()))
+                    yield f
+            elif suffix == ".zip":
+                with zipfile.ZipFile(fn, "r") as z:
+                    flist = z.namelist()
+                    for rinexfn in flist:
+                        with z.open(rinexfn, "r") as bf:
+                            f = io.StringIO(
+                                io.TextIOWrapper(bf, encoding=encoding, errors="ignore").read()  # type: ignore
+                            )
+                            yield f
+            elif suffix == ".z":
+                if unlzw is None:
+                    raise ImportError("pip install unlzw3")
+                try:
+                    with fn.open("rb") as zu:
+                        with io.StringIO(unlzw(zu.read()).decode(encoding)) as f:
+                            _, is_crinex = rinex_version(first_nonblank_line(f))
+                            f.seek(0)
+
+                            if is_crinex and not header:
+                                # Conversion to string is necessary because of a quirk where gzip.open()
+                                # even with 'rt' doesn't decompress until read.
+                                f = io.StringIO(crx2rnx(f.read()))
+                            yield f
+                except ValueError as e:
+                    if 'magic bytes' in e.args[0]:
+                        # sometimes .Z files are actually gzipped
+                        with gzip.open(fn, "rt", encoding=encoding) as f:
+                            _, is_crinex = rinex_version(first_nonblank_line(f))
+                            f.seek(0)
+
+                            if is_crinex and not header:
+                                # Conversion to string is necessary because of a quirk where gzip.open()
+                                # even with 'rt' doesn't decompress until read.
+                                f = io.StringIO(crx2rnx(f.read()))
+                            yield f
+
+            else:  # assume not compressed (or Hatanaka)
+                with fn.open("r", encoding=encoding, errors="ignore") as f:
+                    _, is_crinex = rinex_version(first_nonblank_line(f))
+                    f.seek(0)
+
+                    if is_crinex and not header:
+                        f = io.StringIO(crx2rnx(f))
+                    yield f
+        except UnicodeDecodeError as e:
+            # some RINEX files have awkward encoding
+            if encoding == "ascii":
+                with opener(fn, header, encoding="latin-1") as f:
+                    yield f
+            elif encoding == "latin-1":
+                with opener(fn, header, encoding="utf-8") as f:
+                    yield f
+            else:
+                raise(e)
     else:
         raise OSError(f"Unsure what to do with input of type: {type(fn)}")
 

From 66551247ebe4a32ff549c9df50f2e62bf92d545d Mon Sep 17 00:00:00 2001
From: breid-phys <breid.phys@gmail.com>
Date: Wed, 5 Jul 2023 18:17:32 -0300
Subject: [PATCH 17/19]  added regex to reject bad potential sv

---
 src/georinex/obs3.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index 0669331..c0f5aa4 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -5,6 +5,7 @@
 from datetime import datetime, timedelta
 import io
 import xarray
+import re
 import typing as T
 
 try:
@@ -122,7 +123,7 @@ def rinexobs3(
             time_offset = []  # TODO: fill this list
 
             for i, sv in enumerate(usv):
-                if sv[0] == '>' or sv[2] == ' ':
+                if re.match(r'[A-Z][ \d]\d', sv) is None:
                     # time offset processing here
                     continue
                 elif use is not None and sv[0] not in use:

From a8579a2c36d14474895cf921101c5ddc3b17510c Mon Sep 17 00:00:00 2001
From: breid-phys <breid.phys@gmail.com>
Date: Thu, 5 Oct 2023 18:22:14 -0300
Subject: [PATCH 18/19] fixed SBAS error for v2 fast code

---
 src/georinex/obs2.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py
index e468d92..0f53448 100644
--- a/src/georinex/obs2.py
+++ b/src/georinex/obs2.py
@@ -29,14 +29,13 @@ def rinexobs2(
     fast: bool = True,
     interval: float | int | timedelta = None,
 ) -> xarray.Dataset:
-
     if isinstance(use, str):
         use = {use}
 
     if not use:
         use = {"C", "E", "G", "J", "R", "S"}
 
-    obs = xarray.Dataset({}, coords={"time": np.array([],dtype='datetime64[ns]'), "sv": []})
+    obs = xarray.Dataset({}, coords={"time": np.array([], dtype="datetime64[ns]"), "sv": []})
     attrs: dict[str, T.Any] = {}
     for u in use:
         o = rinexsystem2(
@@ -233,7 +232,10 @@ def rinexsystem2(
             assert darr.shape[0] == gsv.size
 
             # %% select only "used" satellites
-            isv = [int(s[1:]) - 1 for s in gsv]
+            if system == "S":
+                isv = range(len(gsv))
+            else:
+                isv = [int(s[1:]) - 1 for s in gsv]
 
             for i, k in enumerate(hdr["fields_ind"]):
                 if useindicators:
@@ -263,9 +265,12 @@ def rinexsystem2(
             else:
                 fields.extend([None, None])
 
-    obs = xarray.Dataset(
-        coords={"time": times, "sv": [f"{system}{i:02d}" for i in range(1, Nsvsys + 1)]}
-    )
+    if system == "S":
+        obs = xarray.Dataset(coords={"time": times, "sv": gsv})
+    else:
+        obs = xarray.Dataset(
+            coords={"time": times, "sv": [f"{system}{i:02d}" for i in range(1, Nsvsys + 1)]}
+        )
 
     for i, k in enumerate(fields):
         # FIXME: for limited time span reads, this drops unused data variables
@@ -273,7 +278,7 @@ def rinexsystem2(
         #     continue
         if k is None:
             continue
-        obs[k] = (("time", "sv"), data[i, :, :])
+        obs[k] = (("time", "sv"), data[i, :, : len(obs["sv"])])
 
     obs = obs.dropna(dim="sv", how="all")
     obs = obs.dropna(dim="time", how="all")  # when tlim specified
@@ -557,7 +562,6 @@ def _timehdr(ln: str) -> datetime:
 
 
 def _timeobs(ln: str) -> datetime:
-
     year = int(ln[1:3])
     if year < 80:
         year += 2000

From bb1310d8331136e8cfe24b93ec6f33725ec44f16 Mon Sep 17 00:00:00 2001
From: breid-phys <breid.phys@gmail.com>
Date: Tue, 17 Oct 2023 11:35:56 -0300
Subject: [PATCH 19/19] added support for rinex v4

---
 src/georinex/base.py |  12 ++
 src/georinex/obs3.py |  93 ++++-----
 src/georinex/obs4.py | 474 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 530 insertions(+), 49 deletions(-)
 create mode 100644 src/georinex/obs4.py

diff --git a/src/georinex/base.py b/src/georinex/base.py
index 5d419d6..fc26e56 100644
--- a/src/georinex/base.py
+++ b/src/georinex/base.py
@@ -8,6 +8,7 @@
 from .rio import rinexinfo
 from .obs2 import rinexobs2
 from .obs3 import rinexobs3
+from .obs4 import rinexobs4
 from .nav2 import rinexnav2
 from .nav3 import rinexnav3
 from .sp3 import load_sp3
@@ -235,6 +236,17 @@ def rinexobs(
             fast=fast,
             interval=interval,
         )
+    elif int(info["version"]) == 4:
+        obs = rinexobs4(
+            fn,
+            use,
+            tlim=tlim,
+            useindicators=useindicators,
+            meas=meas,
+            verbose=verbose,
+            fast=fast,
+            interval=interval,
+        )
     else:
         raise ValueError(f"unknown RINEX {info}  {fn}")
 
diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index c0f5aa4..77ed12a 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -83,7 +83,6 @@ def rinexobs3(
     last_epoch = None
     # %% loop
     with opener(fn) as f:
-
         if fast:
             hdr = obsheader3(f, use)
 
@@ -91,27 +90,24 @@ def rinexobs3(
             for sk in hdr["fields"]:
                 obl = obl + hdr["fields"][sk]
             obl = np.unique(np.array(obl))
-            obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
+            obl = obl[np.argsort([i[1:] + i[0] for i in iter(obl)])]
 
             if meas is not None:
-                obl = obl[np.any([np.char.find(obl, j) == 0
-                                  for i, j in enumerate(meas)], 0)]
+                obl = obl[np.any([np.char.find(obl, j) == 0 for i, j in enumerate(meas)], 0)]
 
             Nt = times.size
             Npages = len(obl)
 
-            f = np.array([f for f in f if f[1:3] != '  '])  # don't need opener anymore
+            f = np.array([f for f in f if f[1:3] != "  "])  # don't need opener anymore
 
             sv = np.array([ln[:3] for ln in f])
             (usv, isv) = np.unique(sv, return_inverse=True)  # get all sv lines
 
             # list of epoch numbers per datum
-            timeIndex = np.cumsum(isv ==
-                                  np.flatnonzero(np.char.rfind(usv, '>') == 0)
-                                  )-1
+            timeIndex = np.cumsum(isv == np.flatnonzero(np.char.rfind(usv, ">") == 0)) - 1
 
             # usv contains epoch entries too, get only sv
-            svl = usv[np.logical_not(np.char.startswith(usv, '>'))]
+            svl = usv[np.logical_not(np.char.startswith(usv, ">"))]
             Nsv = svl.size
 
             data = np.full((Npages, Nt, Nsv), np.nan)
@@ -123,7 +119,7 @@ def rinexobs3(
             time_offset = []  # TODO: fill this list
 
             for i, sv in enumerate(usv):
-                if re.match(r'[A-Z][ \d]\d', sv) is None:
+                if re.match(r"[A-Z][ \d]\d", sv) is None:
                     # time offset processing here
                     continue
                 elif use is not None and sv[0] not in use:
@@ -131,24 +127,26 @@ def rinexobs3(
 
                 # genfromtext is bottleneck, limit calls this way
                 darr = np.atleast_2d(
-                        np.genfromtxt(np.char.lstrip(f[isv == i], sv),
-                                      delimiter=(14, 1, 1) * hdr["Fmax"]))
+                    np.genfromtxt(
+                        np.char.lstrip(f[isv == i], sv), delimiter=(14, 1, 1) * hdr["Fmax"]
+                    )
+                )
 
                 t_i = timeIndex[isv == i]
 
                 # jsv == i-1 except for files spanning 1999-2000
                 jsv = svl == sv
 
-                for j, jobl in enumerate(hdr['fields'][sv[0]]):
+                for j, jobl in enumerate(hdr["fields"][sv[0]]):
                     o = obl == jobl
                     if not np.any(o):
                         continue
 
-                    data[o, t_i, jsv] = darr[:, j*3]
+                    data[o, t_i, jsv] = darr[:, j * 3]
 
                     if useindicators:
-                        data_lli[np.ix_(o, t_i, jsv)] = darr[:, j*3+1]
-                        data_ssi[np.ix_(o, t_i, jsv)] = darr[:, j*3+2]
+                        data_lli[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 1]
+                        data_ssi[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 2]
 
             obs = xarray.Dataset(coords={"sv": svl, "time": times})
             for i, k in enumerate(obl):
@@ -159,15 +157,15 @@ def rinexobs3(
                     continue
                 obs[k] = (("time", "sv"), data[i, :, :])
                 if useindicators:
-                    if k[0] == 'L':
-                        obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
-                        obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+                    if k[0] == "L":
+                        obs[k + "lli"] = (("time", "sv"), data_lli[i, :, :])
+                        obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :])
                     # elif k[0] == 'C': # only for code?
                     else:
-                        obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+                        obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :])
 
             obs = obs.dropna(dim="sv", how="all")
-             # obs = obs.dropna(dim="time", how="all")  # when tlim specified
+            # obs = obs.dropna(dim="time", how="all")  # when tlim specified
 
             data = obs
 
@@ -221,8 +219,7 @@ def rinexobs3(
                 data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
 
     # %% patch SV names in case of "G 7" => "G07"
-    data = data.assign_coords(sv=[s.replace(" ", "0")
-                                  for s in data.sv.values.tolist()])
+    data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
     # %% other attributes
     data.attrs["version"] = hdr["version"]
 
@@ -232,8 +229,7 @@ def rinexobs3(
     elif "time" in data.coords.keys():
         # median is robust against gaps
         try:
-            data.attrs["interval"] = np.median(np.diff(data.time)
-                                               / np.timedelta64(1, "s"))
+            data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s"))
         except TypeError:
             pass
     else:
@@ -318,8 +314,8 @@ def _epoch(
     block processing of each epoch (time step)
     """
     darr = np.atleast_2d(
-        np.genfromtxt(io.BytesIO(raw.encode("ascii")),
-                      delimiter=(14, 1, 1) * hdr["Fmax"]))
+        np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
+    )
     # data = xarray.Dataset({}, coords={"time": [], "sv": []})
     # %% assign data for each time step
     for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
@@ -340,7 +336,7 @@ def _epoch(
             dsf[k] = (("time", "sv"), np.atleast_2d(garr[:, i * 3]))
 
             if useindicators:
-                dsf = _indicators(dsf, k, garr[:, i * 3 + 1: i * 3 + 3])
+                dsf = _indicators(dsf, k, garr[:, i * 3 + 1 : i * 3 + 3])
 
         if verbose:
             print(time, "\r", end="")
@@ -408,7 +404,6 @@ def rinexsystem3(
     last_epoch = None
     # %% loop
     with opener(fn) as f:
-
         if fast:
             hdr = obsheader3(f)
         else:
@@ -418,16 +413,16 @@ def rinexsystem3(
         for sk in hdr["fields"]:
             obl = obl + hdr["fields"][sk]
         obl = np.unique(np.array(obl))
-        obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])]
+        obl = obl[np.argsort([i[1:] + i[0] for i in iter(obl)])]
 
         if meas is not None:
             obl = obl[np.isin(obl, meas)]
 
         Nt = times.size
         Npages = len(obl)
-        Nsv = int(hdr['# OF SATELLITES'])
+        Nsv = int(hdr["# OF SATELLITES"])
 
-        svl = np.tile('   ', Nsv)
+        svl = np.tile("   ", Nsv)
 
         data = np.empty((Npages, Nt, Nsv))
         data.fill(np.nan)
@@ -482,11 +477,11 @@ def rinexsystem3(
 
             for k in sv:
                 if k not in svl:
-                    svl[np.argmax(svl == '   ')] = k
+                    svl[np.argmax(svl == "   ")] = k
 
             darr = np.atleast_2d(
-                    np.genfromtxt(io.BytesIO(raw.encode("ascii")),
-                                  delimiter=(14, 1, 1) * hdr["Fmax"]))
+                np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
+            )
 
             t = time == times
 
@@ -499,19 +494,19 @@ def rinexsystem3(
                 gsv = np.array(sv)[si]
                 isv = [i for i, s in enumerate(svl) if s in gsv]
 
-                for i, j in enumerate(hdr['fields'][sk]):
+                for i, j in enumerate(hdr["fields"][sk]):
                     o = obl == j
-                    data[o, t, isv]=darr[si,i*3]
+                    data[o, t, isv] = darr[si, i * 3]
                     if useindicators:
-                        data_lli[o, t, isv]=darr[si, i*3+1]
-                        data_ssi[o, t, isv]=darr[si, i*3+2]
+                        data_lli[o, t, isv] = darr[si, i * 3 + 1]
+                        data_ssi[o, t, isv] = darr[si, i * 3 + 2]
 
-    if '   ' in svl:
-        svl = svl[:np.argmax(svl == '   ')]
+    if "   " in svl:
+        svl = svl[: np.argmax(svl == "   ")]
 
     svl, isv = np.unique(svl, return_index=True)
 
-    data=data[:, :, isv]
+    data = data[:, :, isv]
     if useindicators:
         data_lli = data_lli[:, :, isv]
         data_ssi = data_ssi[:, :, isv]
@@ -525,11 +520,11 @@ def rinexsystem3(
             continue
         obs[k] = (("time", "sv"), data[i, :, :])
         if useindicators:
-            if k[0] == 'L':
-                obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :])
-                obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
-            elif k[0] == 'C':
-                obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :])
+            if k[0] == "L":
+                obs[k + "lli"] = (("time", "sv"), data_lli[i, :, :])
+                obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :])
+            elif k[0] == "C":
+                obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :])
 
     obs = obs.dropna(dim="sv", how="all")
     obs = obs.dropna(dim="time", how="all")  # when tlim specified
@@ -671,7 +666,7 @@ def obsheader3(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dic
         fields = {k: fields[k] for k in use if k in fields}
 
     # perhaps this could be done more efficiently, but it's probably low impact on overall program.
-    # simple set and frozenset operations do NOT preserve order, which would completely mess up reading!
+    # simple set and frozenset operations do NOT preserve order, which would mess up reading!
     sysind: dict[str, T.Any] = {}
     if isinstance(meas, (tuple, list, np.ndarray)):
         for sk in fields:  # iterate over each system
@@ -685,7 +680,7 @@ def obsheader3(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dic
             fields[sk] = np.array(fields[sk])[ind].tolist()
             sysind[sk] = np.empty(Fmax * 3, dtype=bool)  # *3 due to LLI, SSI
             for j, i in enumerate(ind):
-                sysind[sk][j * 3: j * 3 + 3] = i
+                sysind[sk][j * 3 : j * 3 + 3] = i
     else:
         sysind = {k: np.s_[:] for k in fields}
 
diff --git a/src/georinex/obs4.py b/src/georinex/obs4.py
new file mode 100644
index 0000000..5113e85
--- /dev/null
+++ b/src/georinex/obs4.py
@@ -0,0 +1,474 @@
+from __future__ import annotations
+from pathlib import Path
+import numpy as np
+import logging
+from datetime import datetime, timedelta
+import io
+import xarray
+import re
+import typing as T
+
+try:
+    from pymap3d import ecef2geodetic
+except ImportError:
+    ecef2geodetic = None
+#
+from .rio import opener, rinexinfo
+from .common import determine_time_system, check_time_interval, check_unique_times
+
+"""https://github.com/mvglasow/satstat/wiki/NMEA-IDs"""
+
+SBAS = 100  # offset for ID
+GLONASS = 37
+QZSS = 192
+BEIDOU = 0
+
+
+def rinexobs4(
+    fn: T.TextIO | Path,
+    use: set[str] = None,
+    tlim: tuple[datetime, datetime] = None,
+    useindicators: bool = False,
+    meas: list[str] = None,
+    verbose: bool = False,
+    *,
+    fast: bool = False,
+    interval: float | int | timedelta = None,
+) -> xarray.Dataset:
+    """
+    process RINEX 4 OBS data
+
+    fn: RINEX OBS 3 filename
+    use: 'G'  or ['G', 'R'] or similar
+
+    tlim: read between these time bounds
+    useindicators: SSI, LLI are output
+    meas:  'L1C'  or  ['L1C', 'C1C'] or similar
+
+    fast: Loads entire file into memory as a string
+            TODO: time interval, time offset, etc.
+          if false, uses old _epoch method
+
+    interval: allows decimating file read by time e.g. every 5 seconds.
+                Useful to speed up reading of very large RINEX files
+    """
+
+    interval = check_time_interval(interval)
+
+    if isinstance(use, str):
+        use = {use}
+
+    if isinstance(meas, str):
+        meas = [meas]
+
+    if not meas or not meas[0].strip():
+        meas = None
+
+    if isinstance(fn, Path):
+        file_name = fn
+    else:
+        file_name = None
+
+    # %% allocate
+    if fast:
+        with opener(fn) as f:
+            fn = [ln for ln in f]
+        times = obstime4(fn)
+    else:
+        data = xarray.Dataset({}, coords={"time": [], "sv": []})
+
+    if tlim is not None and not isinstance(tlim[0], datetime):
+        raise TypeError("time bounds are specified as datetime.datetime")
+
+    last_epoch = None
+    # %% loop
+    with opener(fn) as f:
+        if fast:
+            hdr = obsheader4(f, use)
+
+            obl = []
+            for sk in hdr["fields"]:
+                obl = obl + hdr["fields"][sk]
+            obl = np.unique(np.array(obl))
+            obl = obl[np.argsort([i[1:] + i[0] for i in iter(obl)])]
+
+            if meas is not None:
+                obl = obl[np.any([np.char.find(obl, j) == 0 for i, j in enumerate(meas)], 0)]
+
+            Nt = times.size
+            Npages = len(obl)
+
+            f = np.array([f for f in f if f[1:3] != "  "])  # don't need opener anymore
+
+            sv = np.array([ln[:3] for ln in f])
+            (usv, isv) = np.unique(sv, return_inverse=True)  # get all sv lines
+
+            # list of epoch numbers per datum
+            timeIndex = np.cumsum(isv == np.flatnonzero(np.char.rfind(usv, ">") == 0)) - 1
+
+            # usv contains epoch entries too, get only sv
+            svl = usv[np.logical_not(np.char.startswith(usv, ">"))]
+            Nsv = svl.size
+
+            data = np.full((Npages, Nt, Nsv), np.nan)
+
+            if useindicators:
+                data_lli = np.full_like(data, np.nan)
+                data_ssi = np.full_like(data, np.nan)
+
+            time_offset = []  # TODO: fill this list
+
+            for i, sv in enumerate(usv):
+                if re.match(r"[A-Z][ \d]\d", sv) is None:
+                    # time offset processing here
+                    continue
+                elif use is not None and sv[0] not in use:
+                    continue
+
+                # genfromtext is bottleneck, limit calls this way
+                darr = np.atleast_2d(
+                    np.genfromtxt(
+                        np.char.lstrip(f[isv == i], sv), delimiter=(14, 1, 1) * hdr["Fmax"]
+                    )
+                )
+
+                t_i = timeIndex[isv == i]
+
+                # jsv == i-1 except for files spanning 1999-2000
+                jsv = svl == sv
+
+                for j, jobl in enumerate(hdr["fields"][sv[0]]):
+                    o = obl == jobl
+                    if not np.any(o):
+                        continue
+
+                    data[o, t_i, jsv] = darr[:, j * 3]
+
+                    if useindicators:
+                        data_lli[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 1]
+                        data_ssi[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 2]
+
+            obs = xarray.Dataset(coords={"sv": svl, "time": times})
+            for i, k in enumerate(obl):
+                if k is None:
+                    continue
+                elif np.all(np.isnan(data[i, :, :])):
+                    # drop all nan datasets like tests expect
+                    continue
+                obs[k] = (("time", "sv"), data[i, :, :])
+                if useindicators:
+                    if k[0] == "L":
+                        obs[k + "lli"] = (("time", "sv"), data_lli[i, :, :])
+                        obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :])
+                    # elif k[0] == 'C': # only for code?
+                    else:
+                        obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :])
+
+            obs = obs.dropna(dim="sv", how="all")
+            # obs = obs.dropna(dim="time", how="all")  # when tlim specified
+
+            data = obs
+
+        else:
+            hdr = obsheader4(f, use, meas)
+            # process OBS file
+            time_offset = []
+            for ln in f:
+                if not ln.startswith(">"):  # end of file
+                    break
+
+                try:
+                    time = _timeobs(ln)
+                except ValueError:  # garbage between header and RINEX data
+                    logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
+                    continue
+
+                try:
+                    time_offset.append(float(ln[41:56]))
+                except ValueError:
+                    pass
+                # %% get SV indices
+                sv = []
+                raw = ""
+                # Number of visible satellites this time %i3  pg. A13
+                for _ in range(int(ln[33:35])):
+                    ln = f.readline()
+                    if use is None or ln[0] in use:
+                        sv.append(ln[:3])
+                        raw += ln[3:]
+
+                if tlim is not None:
+                    if time < tlim[0]:
+                        continue
+                    elif time > tlim[1]:
+                        break
+
+                if interval is not None:
+                    if last_epoch is None:  # initialization
+                        last_epoch = time
+                    else:
+                        if time - last_epoch < interval:
+                            continue
+                        else:
+                            last_epoch += interval
+
+                if verbose:
+                    print(time, end="\r")
+
+                # this time epoch is complete, assemble the data.
+                data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
+
+    # %% patch SV names in case of "G 7" => "G07"
+    data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
+    # %% other attributes
+    data.attrs["version"] = hdr["version"]
+
+    # Get interval from header or derive it from the data
+    if "interval" in hdr.keys():
+        data.attrs["interval"] = hdr["interval"]
+    elif "time" in data.coords.keys():
+        # median is robust against gaps
+        try:
+            data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s"))
+        except TypeError:
+            pass
+    else:
+        data.attrs["interval"] = np.nan
+
+    data.attrs["rinextype"] = "obs"
+    data.attrs["fast_processing"] = int(fast)  # bool is not allowed in NetCDF4
+    data.attrs["time_system"] = determine_time_system(hdr)
+    if isinstance(file_name, Path):
+        data.attrs["filename"] = file_name.name
+
+    if "position" in hdr.keys():
+        data.attrs["position"] = hdr["position"]
+        if ecef2geodetic is not None:
+            data.attrs["position_geodetic"] = hdr["position_geodetic"]
+    if "rxmodel" in hdr.keys():
+        obs.attrs["rxmodel"] = hdr["rxmodel"]
+    if time_offset:
+        data.attrs["time_offset"] = time_offset
+
+    if "RCV CLOCK OFFS APPL" in hdr.keys():
+        try:
+            data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"])
+        except ValueError:
+            pass
+
+    return data
+
+
+def _timeobs(ln: str) -> datetime:
+    """
+    convert time from RINEX 3 OBS text to datetime
+    """
+
+    if not ln.startswith("> "):  # pg. A13
+        raise ValueError('RINEX 3 line beginning "> " is not present')
+
+    return datetime(
+        int(ln[2:6]),
+        int(ln[7:9]),
+        int(ln[10:12]),
+        hour=int(ln[13:15]),
+        minute=int(ln[16:18]),
+        second=int(ln[19:21]),
+        microsecond=int(float(ln[19:29]) % 1 * 1000000),
+    )
+
+
+def obstime4(fn: T.TextIO | Path, verbose: bool = False) -> np.ndarray:
+    """
+    return all times in RINEX file
+    """
+
+    times = []
+
+    with opener(fn) as f:
+        for ln in f:
+            if ln.startswith("> "):
+                try:
+                    times.append(_timeobs(ln))
+                except (ValueError, IndexError):
+                    logging.debug(f"was not a time:\n{ln}")
+                    continue
+
+    times = np.asarray(times)
+
+    check_unique_times(times)
+
+    return times
+
+
+def _epoch(
+    data: xarray.Dataset,
+    raw: str,
+    hdr: dict[str, T.Any],
+    time: datetime,
+    sv: list[str],
+    useindicators: bool,
+    verbose: bool,
+) -> xarray.Dataset:
+    """
+    block processing of each epoch (time step)
+    """
+    darr = np.atleast_2d(
+        np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
+    )
+    # data = xarray.Dataset({}, coords={"time": [], "sv": []})
+    # %% assign data for each time step
+    for sk in hdr["fields"]:  # for each satellite system type (G,R,S, etc.)
+        # satellite indices "si" to extract from this time's measurements
+        si = [i for i, s in enumerate(sv) if s[0] in sk]
+        if len(si) == 0:  # no SV of this system "sk" at this time
+            continue
+
+        # measurement indices "di" to extract at this time step
+        di = hdr["fields_ind"][sk]
+        garr = darr[si, :]
+        garr = garr[:, di]
+
+        gsv = np.array(sv)[si]
+
+        dsf: dict[str, tuple] = {}
+        for i, k in enumerate(hdr["fields"][sk]):
+            dsf[k] = (("time", "sv"), np.atleast_2d(garr[:, i * 3]))
+
+            if useindicators:
+                dsf = _indicators(dsf, k, garr[:, i * 3 + 1 : i * 3 + 3])
+
+        if verbose:
+            print(time, "\r", end="")
+
+        epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv})
+        if len(data) == 0:
+            data = epoch_data
+            # data = xarray.merge((data, epoch_data))
+        elif len(hdr["fields"]) == 1:  # one satellite system selected, faster to process
+            data = xarray.concat((data, epoch_data), dim="time")
+        else:  # general case, slower for different satellite systems all together
+            data = xarray.merge((data, epoch_data))
+
+    return data
+
+
+def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]:
+    """
+    handle LLI (loss of lock) and SSI (signal strength)
+    """
+    if k.startswith(("L1", "L2")):
+        d[k + "lli"] = (("time", "sv"), np.atleast_2d(arr[:, 0]))
+
+    d[k + "ssi"] = (("time", "sv"), np.atleast_2d(arr[:, 1]))
+
+    return d
+
+
+def obsheader4(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dict[str, T.Any]:
+    """
+    get RINEX 4 OBS types, for each system type
+    optionally, select system type and/or measurement type to greatly
+    speed reading and save memory (RAM, disk)
+    """
+    if isinstance(f, (str, Path)):
+        with opener(f, header=True) as h:
+            return obsheader4(h, use, meas)
+
+    fields = {}
+    Fmax = 0
+
+    # %% first line
+    hdr = rinexinfo(f)
+
+    for ln in f:
+        if "END OF HEADER" in ln:
+            break
+
+        hd = ln[60:80]
+        c = ln[:60]
+        if "SYS / # / OBS TYPES" in hd:
+            k = c[0]
+            fields[k] = c[6:60].split()
+            N = int(c[3:6])
+            # %% maximum number of fields in a file, to allow fast Numpy parse.
+            Fmax = max(N, Fmax)
+
+            n = N - 13
+            while n > 0:  # Rinex 3.03, pg. A6, A7
+                ln = f.readline()
+                assert "SYS / # / OBS TYPES" in ln[60:]
+                fields[k] += ln[6:60].split()
+                n -= 13
+
+            assert len(fields[k]) == N
+
+            continue
+
+        if hd.strip() not in hdr:  # Header label
+            hdr[hd.strip()] = c  # don't strip for fixed-width parsers
+            # string with info
+        else:  # concatenate to the existing string
+            hdr[hd.strip()] += " " + c
+
+    # %% list with x,y,z cartesian (OPTIONAL)
+    # Rinex 3.03, pg. A6, Table A2
+    try:
+        # some RINEX files have bad headers with mulitple APPROX POSITION XYZ.
+        # we choose to use the first such header.
+        hdr["position"] = [float(j) for j in hdr["APPROX POSITION XYZ"].split()][:3]
+        if ecef2geodetic is not None and len(hdr["position"]) == 3:
+            hdr["position_geodetic"] = ecef2geodetic(*hdr["position"])
+    except (KeyError, ValueError):
+        pass
+    # %% time
+    try:
+        t0s = hdr["TIME OF FIRST OBS"]
+        # NOTE: must do second=int(float()) due to non-conforming files
+        hdr["t0"] = datetime(
+            year=int(t0s[:6]),
+            month=int(t0s[6:12]),
+            day=int(t0s[12:18]),
+            hour=int(t0s[18:24]),
+            minute=int(t0s[24:30]),
+            second=int(float(t0s[30:36])),
+            microsecond=int(float(t0s[30:43]) % 1 * 1000000),
+        )
+    except (KeyError, ValueError):
+        pass
+
+    try:
+        hdr["interval"] = float(hdr["INTERVAL"][:10])
+    except (KeyError, ValueError):
+        pass
+    # %% select specific satellite systems only (optional)
+    if use:
+        if not set(fields.keys()).intersection(use):
+            raise KeyError(f"system type {use} not found in RINEX file")
+
+        fields = {k: fields[k] for k in use if k in fields}
+
+    # perhaps this could be done more efficiently, but it's probably low impact on overall program.
+    # simple set and frozenset operations do NOT preserve order, which would mess up reading!
+    sysind: dict[str, T.Any] = {}
+    if isinstance(meas, (tuple, list, np.ndarray)):
+        for sk in fields:  # iterate over each system
+            # ind = np.isin(fields[sk], meas)  # boolean vector
+            ind = np.zeros(len(fields[sk]), dtype=bool)
+            for m in meas:
+                for i, field in enumerate(fields[sk]):
+                    if field.startswith(m):
+                        ind[i] = True
+
+            fields[sk] = np.array(fields[sk])[ind].tolist()
+            sysind[sk] = np.empty(Fmax * 3, dtype=bool)  # *3 due to LLI, SSI
+            for j, i in enumerate(ind):
+                sysind[sk][j * 3 : j * 3 + 3] = i
+    else:
+        sysind = {k: np.s_[:] for k in fields}
+
+    hdr["fields"] = fields
+    hdr["fields_ind"] = sysind
+    hdr["Fmax"] = Fmax
+
+    return hdr