From 57b74e0100f3056a3101ed6c0f817be34b099364 Mon Sep 17 00:00:00 2001 From: breid-phys <81174535+breid-phys@users.noreply.github.com> Date: Thu, 19 May 2022 16:41:40 -0300 Subject: [PATCH 01/19] added fast processing to rinex v3 --- obs3.py | 719 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 719 insertions(+) create mode 100644 obs3.py diff --git a/obs3.py b/obs3.py new file mode 100644 index 0000000..d1f1637 --- /dev/null +++ b/obs3.py @@ -0,0 +1,719 @@ +from __future__ import annotations +from pathlib import Path +import numpy as np +import logging +from datetime import datetime, timedelta +import io +import xarray +import typing as T + +try: + from pymap3d import ecef2geodetic +except ImportError: + ecef2geodetic = None +# +from .rio import opener, rinexinfo +from .common import determine_time_system, check_time_interval, check_unique_times + +"""https://github.com/mvglasow/satstat/wiki/NMEA-IDs""" + +SBAS = 100 # offset for ID +GLONASS = 37 +QZSS = 192 +BEIDOU = 0 + + +def rinexobs3( + fn: T.TextIO | Path, + use: set[str] = None, + tlim: tuple[datetime, datetime] = None, + useindicators: bool = False, + meas: list[str] = None, + verbose: bool = False, + *, + fast: bool = False, + interval: float | int | timedelta = None, +) -> xarray.Dataset: + """ + process RINEX 3 OBS data + + fn: RINEX OBS 3 filename + use: 'G' or ['G', 'R'] or similar + + tlim: read between these time bounds + useindicators: SSI, LLI are output + meas: 'L1C' or ['L1C', 'C1C'] or similar + + fast: Still double-reading file to get times + Uses # OF SATELLITES from header to try and size nparray, falls back + to Nsvsys if missing + if false, uses old _epoch method + + interval: allows decimating file read by time e.g. every 5 seconds. + Useful to speed up reading of very large RINEX files + """ + + interval = check_time_interval(interval) + + if isinstance(use, str): + use = {use} + + if isinstance(meas, str): + meas = [meas] + + if not meas or not meas[0].strip(): + meas = None + + """ + Nsvsys may need updating as GNSS systems grow. + Let us know if you needed to change them. + + Beidou is 35 max + Galileo is 36 max + """ + Nsvsys = 36 + + # %% allocate + if fast: + times = obstime3(fn) + else: + data = xarray.Dataset({}, coords={"time": [], "sv": []}) + + if tlim is not None and not isinstance(tlim[0], datetime): + raise TypeError("time bounds are specified as datetime.datetime") + + last_epoch = None + # %% loop + with opener(fn) as f: + + + if fast: + hdr = obsheader3(f,use) + + obl = [] + for sk in hdr["fields"]: + obl=obl+hdr["fields"][sk] + obl = np.unique(np.array(obl)) + obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] + + + if meas is not None: + obl = obl[np.any([np.char.find(obl,j)==0 for i, j in enumerate(meas)],0)] + + Nt = times.size + Npages = len(obl) + if '# OF SATELLITES' in hdr.keys(): + Nsv = int(hdr['# OF SATELLITES']) + else: + Nsv = Nsvsys * len(hdr['fields']) + + svl = np.tile(' ',Nsv) + + data = np.empty((Npages, Nt, Nsv)) + data.fill(np.nan) + + if useindicators: + data_lli = np.full_like(data,np.nan) + data_ssi = np.full_like(data,np.nan) + + else: + hdr = obsheader3(f, use, meas) + + + # %% process OBS file + time_offset = [] + for ln in f: + if not ln.startswith(">"): # end of file + break + + try: + time = _timeobs(ln) + except ValueError: # garbage between header and RINEX data + logging.debug(f"garbage detected in {fn}, trying to parse at next time step") + continue + + try: + time_offset.append(float(ln[41:56])) + except ValueError: + pass + # %% get SV indices + sv = [] + raw = "" + # Number of visible satellites this time %i3 pg. A13 + for _ in range(int(ln[33:35])): + ln = f.readline() + if use is None or ln[0] in use: + sv.append(ln[:3]) + raw += ln[3:] + + if tlim is not None: + if time < tlim[0]: + continue + elif time > tlim[1]: + break + + if interval is not None: + if last_epoch is None: # initialization + last_epoch = time + else: + if time - last_epoch < interval: + continue + else: + last_epoch += interval + + if verbose: + print(time, end="\r") + + if fast: + for k in sv: + # update list of satellites + if not k in svl: + svl[np.argmax(svl==' ')]=k + + darr = np.atleast_2d( + np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) + ) + + t = time==times + + for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) + # satellite indices "si" to extract from this time's measurements + si = [i for i, s in enumerate(sv) if s[0] in sk] + if len(si) == 0: # no SV of this system "sk" at this time + continue + + gsv = np.array(sv)[si] + isv = [i for i,s in enumerate(svl) if s in gsv] + + for i,j in enumerate(hdr['fields'][sk]): + o = obl==j + if not np.any(o): + continue + data[o,t,isv]=darr[si,i*3] + if useindicators: + data_lli[o,t,isv]=darr[si,i*3+1] + data_ssi[o,t,isv]=darr[si,i*3+2] + else: + # this time epoch is complete, assemble the data. + data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) + + if fast: + if ' ' in svl: + # remove blank satellites (if tlim used) + svl=svl[:np.argmax(svl==' ')] + + svl, isv = np.unique(svl,return_index=True) + + data=data[:,:,isv] + if useindicators: + data_lli=data_lli[:,:,isv] + data_ssi=data_ssi[:,:,isv] + + obs = xarray.Dataset(coords={"sv": svl,"time":times}) + for i, k in enumerate(obl): + if k is None: + continue + elif np.all(np.isnan(data[i, :, :])): # drop all nan datasets like tests expect + continue + obs[k] = (("time", "sv"), data[i, :, :]) + if useindicators: + if k[0] == 'L': + obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + #elif k[0] == 'C': # only for code? + else: + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + + obs = obs.dropna(dim="sv", how="all") + obs = obs.dropna(dim="time", how="all") # when tlim specified + + data=obs + + + # %% patch SV names in case of "G 7" => "G07" + data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) + # %% other attributes + data.attrs["version"] = hdr["version"] + + # Get interval from header or derive it from the data + if "interval" in hdr.keys(): + data.attrs["interval"] = hdr["interval"] + elif "time" in data.coords.keys(): + # median is robust against gaps + try: + data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) + except TypeError: + pass + else: + data.attrs["interval"] = np.nan + + data.attrs["rinextype"] = "obs" + data.attrs["fast_processing"] = int(fast) # bool is not allowed in NetCDF4 + data.attrs["time_system"] = determine_time_system(hdr) + if isinstance(fn, Path): + data.attrs["filename"] = fn.name + + if "position" in hdr.keys(): + data.attrs["position"] = hdr["position"] + if ecef2geodetic is not None: + data.attrs["position_geodetic"] = hdr["position_geodetic"] + + if time_offset: + data.attrs["time_offset"] = time_offset + + if "RCV CLOCK OFFS APPL" in hdr.keys(): + try: + data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) + except ValueError: + pass + + return data + + +def _timeobs(ln: str) -> datetime: + """ + convert time from RINEX 3 OBS text to datetime + """ + + if not ln.startswith("> "): # pg. A13 + raise ValueError('RINEX 3 line beginning "> " is not present') + + return datetime( + int(ln[2:6]), + int(ln[7:9]), + int(ln[10:12]), + hour=int(ln[13:15]), + minute=int(ln[16:18]), + second=int(ln[19:21]), + microsecond=int(float(ln[19:29]) % 1 * 1000000), + ) + + +def obstime3(fn: T.TextIO | Path, verbose: bool = False) -> np.ndarray: + """ + return all times in RINEX file + """ + + times = [] + + with opener(fn) as f: + for ln in f: + if ln.startswith("> "): + try: + times.append(_timeobs(ln)) + except (ValueError, IndexError): + logging.debug(f"was not a time:\n{ln}") + continue + + times = np.asarray(times) + + check_unique_times(times) + + return times + + +def _epoch( + data: xarray.Dataset, + raw: str, + hdr: dict[str, T.Any], + time: datetime, + sv: list[str], + useindicators: bool, + verbose: bool, +) -> xarray.Dataset: + """ + block processing of each epoch (time step) + """ + darr = np.atleast_2d( + np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) + ) + #data = xarray.Dataset({}, coords={"time": [], "sv": []}) + # %% assign data for each time step + for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) + # satellite indices "si" to extract from this time's measurements + si = [i for i, s in enumerate(sv) if s[0] in sk] + if len(si) == 0: # no SV of this system "sk" at this time + continue + + # measurement indices "di" to extract at this time step + di = hdr["fields_ind"][sk] + garr = darr[si, :] + garr = garr[:, di] + + gsv = np.array(sv)[si] + + dsf: dict[str, tuple] = {} + for i, k in enumerate(hdr["fields"][sk]): + dsf[k] = (("time", "sv"), np.atleast_2d(garr[:, i * 3])) + + if useindicators: + dsf = _indicators(dsf, k, garr[:, i * 3 + 1 : i * 3 + 3]) + + if verbose: + print(time, "\r", end="") + + epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv}) + if len(data) == 0: + data = epoch_data + #data = xarray.merge((data, epoch_data)) + elif len(hdr["fields"]) == 1: # one satellite system selected, faster to process + data = xarray.concat((data, epoch_data), dim="time") + else: # general case, slower for different satellite systems all together + data = xarray.merge((data, epoch_data)) + + return data + +def rinexsystem3( + fn: T.TextIO | Path, + use: set[str] = None, + tlim: tuple[datetime, datetime] = None, + useindicators: bool = False, + meas: list[str] = None, + verbose: bool = False, + *, + fast: bool = False, + interval: float | int | timedelta = None, +) -> xarray.Dataset: + """ + process RINEX 3 OBS data + + fn: RINEX OBS 3 filename + use: 'G' or ['G', 'R'] or similar + + tlim: read between these time bounds + useindicators: SSI, LLI are output + meas: 'L1C' or ['L1C', 'C1C'] or similar + + fast: + TODO: FUTURE, not yet enabled for OBS3 + speculative preallocation based on minimum SV assumption and file size. + Avoids double-reading file and more complicated linked lists. + Believed that Numpy array should be faster than lists anyway. + Reduce Nsvmin if error (let us know) + + interval: allows decimating file read by time e.g. every 5 seconds. + Useful to speed up reading of very large RINEX files + """ + + interval = check_time_interval(interval) + + if isinstance(use, str): + use = {use} + + if isinstance(meas, str): + meas = [meas] + + if not meas or not meas[0].strip(): + meas = None + # %% allocate + times = obstime3(fn) + #data = xarray.Dataset({}, coords={"time": [], "sv": []}) + if tlim is not None and not isinstance(tlim[0], datetime): + raise TypeError("time bounds are specified as datetime.datetime") + + last_epoch = None + # %% loop + with opener(fn) as f: + + if fast: + hdr = obsheader3(f) + else: + hdr = obsheader3(f, use, meas) + + obl = [] + for sk in hdr["fields"]: + obl=obl+hdr["fields"][sk] + obl = np.unique(np.array(obl)) + obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] + + + if meas is not None: + obl = obl[np.isin(obl,meas)] + + Nt = times.size + Npages = len(obl) + Nsv = int(hdr['# OF SATELLITES']) + + svl = np.tile(' ',Nsv) + + data = np.empty((Npages, Nt, Nsv)) + data.fill(np.nan) + + if useindicators: + data_lli = np.full_like(data,np.nan) + data_ssi = np.full_like(data,np.nan) + + # %% process OBS file + time_offset = [] + for ln in f: + if not ln.startswith(">"): # end of file + break + + try: + time = _timeobs(ln) + except ValueError: # garbage between header and RINEX data + logging.debug(f"garbage detected in {fn}, trying to parse at next time step") + continue + + try: + time_offset.append(float(ln[41:56])) + except ValueError: + pass + # %% get SV indices + sv = [] + raw = "" + # Number of visible satellites this time %i3 pg. A13 + for _ in range(int(ln[33:35])): + ln = f.readline() + if use is None or ln[0] in use: + sv.append(ln[:3]) + raw += ln[3:] + + if tlim is not None: + if time < tlim[0]: + continue + elif time > tlim[1]: + break + + if interval is not None: + if last_epoch is None: # initialization + last_epoch = time + else: + if time - last_epoch < interval: + continue + else: + last_epoch += interval + + if verbose: + print(time, end="\r") + + + for k in sv: + if not k in svl: + svl[np.argmax(svl==' ')]=k + + + + darr = np.atleast_2d( + np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) + ) + + t = time==times + + + for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) + # satellite indices "si" to extract from this time's measurements + si = [i for i, s in enumerate(sv) if s[0] in sk] + if len(si) == 0: # no SV of this system "sk" at this time + continue + + gsv = np.array(sv)[si] + isv = [i for i,s in enumerate(svl) if s in gsv] + #isv = np.zeros_like(gsv,dtype=int) + + #for ii,jj in enumerate(gsv): + # isv[ii] = np.nonzero(svl==jj) + + + for i,j in enumerate(hdr['fields'][sk]): + o = obl==j + #for ii,jj in enumerate(gsv): + # data[o,t,svl==jj]=darr[si[ii],i*3] + data[o,t,isv]=darr[si,i*3] + if useindicators: + data_lli[o,t,isv]=darr[si,i*3+1] + data_ssi[o,t,isv]=darr[si,i*3+2] + + + + + + if ' ' in svl: + svl=svl[:np.argmax(svl==' ')] + + svl, isv = np.unique(svl,return_index=True) + + data=data[:,:,isv] + if useindicators: + data_lli=data_lli[:,:,isv] + data_ssi=data_ssi[:,:,isv] + + obs = xarray.Dataset(coords={"time":times,"sv": svl}) + for i, k in enumerate(obl): + # FIXME: for limited time span reads, this drops unused data variables + # if np.isnan(data[i, ...]).all(): + # continue + if k is None: + continue + obs[k] = (("time", "sv"), data[i, :, :]) + if useindicators: + if k[0] == 'L': + obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + elif k[0] == 'C': + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + + + obs = obs.dropna(dim="sv", how="all") + obs = obs.dropna(dim="time", how="all") # when tlim specified + + data=obs + # %% patch SV names in case of "G 7" => "G07" + data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) + # %% other attributes + data.attrs["version"] = hdr["version"] + + # Get interval from header or derive it from the data + if "interval" in hdr.keys(): + data.attrs["interval"] = hdr["interval"] + elif "time" in data.coords.keys(): + # median is robust against gaps + try: + data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) + except TypeError: + pass + else: + data.attrs["interval"] = np.nan + + data.attrs["rinextype"] = "obs" + data.attrs["fast_processing"] = 0 # bool is not allowed in NetCDF4 + data.attrs["time_system"] = determine_time_system(hdr) + if isinstance(fn, Path): + data.attrs["filename"] = fn.name + + if "position" in hdr.keys(): + data.attrs["position"] = hdr["position"] + if ecef2geodetic is not None: + data.attrs["position_geodetic"] = hdr["position_geodetic"] + + if time_offset: + data.attrs["time_offset"] = time_offset + + if "RCV CLOCK OFFS APPL" in hdr.keys(): + try: + data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) + except ValueError: + pass + + return data + + +def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]: + """ + handle LLI (loss of lock) and SSI (signal strength) + """ + if k.startswith(("L1", "L2")): + d[k + "lli"] = (("time", "sv"), np.atleast_2d(arr[:, 0])) + + d[k + "ssi"] = (("time", "sv"), np.atleast_2d(arr[:, 1])) + + return d + + +def obsheader3(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dict[str, T.Any]: + """ + get RINEX 3 OBS types, for each system type + optionally, select system type and/or measurement type to greatly + speed reading and save memory (RAM, disk) + """ + if isinstance(f, (str, Path)): + with opener(f, header=True) as h: + return obsheader3(h, use, meas) + + fields = {} + Fmax = 0 + + # %% first line + hdr = rinexinfo(f) + + for ln in f: + if "END OF HEADER" in ln: + break + + hd = ln[60:80] + c = ln[:60] + if "SYS / # / OBS TYPES" in hd: + k = c[0] + fields[k] = c[6:60].split() + N = int(c[3:6]) + # %% maximum number of fields in a file, to allow fast Numpy parse. + Fmax = max(N, Fmax) + + n = N - 13 + while n > 0: # Rinex 3.03, pg. A6, A7 + ln = f.readline() + assert "SYS / # / OBS TYPES" in ln[60:] + fields[k] += ln[6:60].split() + n -= 13 + + assert len(fields[k]) == N + + continue + + if hd.strip() not in hdr: # Header label + hdr[hd.strip()] = c # don't strip for fixed-width parsers + # string with info + else: # concatenate to the existing string + hdr[hd.strip()] += " " + c + + # %% list with x,y,z cartesian (OPTIONAL) + # Rinex 3.03, pg. A6, Table A2 + try: + # some RINEX files have bad headers with mulitple APPROX POSITION XYZ. + # we choose to use the first such header. + hdr["position"] = [float(j) for j in hdr["APPROX POSITION XYZ"].split()][:3] + if ecef2geodetic is not None and len(hdr["position"]) == 3: + hdr["position_geodetic"] = ecef2geodetic(*hdr["position"]) + except (KeyError, ValueError): + pass + # %% time + try: + t0s = hdr["TIME OF FIRST OBS"] + # NOTE: must do second=int(float()) due to non-conforming files + hdr["t0"] = datetime( + year=int(t0s[:6]), + month=int(t0s[6:12]), + day=int(t0s[12:18]), + hour=int(t0s[18:24]), + minute=int(t0s[24:30]), + second=int(float(t0s[30:36])), + microsecond=int(float(t0s[30:43]) % 1 * 1000000), + ) + except (KeyError, ValueError): + pass + + try: + hdr["interval"] = float(hdr["INTERVAL"][:10]) + except (KeyError, ValueError): + pass + # %% select specific satellite systems only (optional) + if use: + if not set(fields.keys()).intersection(use): + raise KeyError(f"system type {use} not found in RINEX file") + + fields = {k: fields[k] for k in use if k in fields} + + # perhaps this could be done more efficiently, but it's probably low impact on overall program. + # simple set and frozenset operations do NOT preserve order, which would completely mess up reading! + sysind: dict[str, T.Any] = {} + if isinstance(meas, (tuple, list, np.ndarray)): + for sk in fields: # iterate over each system + # ind = np.isin(fields[sk], meas) # boolean vector + ind = np.zeros(len(fields[sk]), dtype=bool) + for m in meas: + for i, field in enumerate(fields[sk]): + if field.startswith(m): + ind[i] = True + + fields[sk] = np.array(fields[sk])[ind].tolist() + sysind[sk] = np.empty(Fmax * 3, dtype=bool) # *3 due to LLI, SSI + for j, i in enumerate(ind): + sysind[sk][j * 3 : j * 3 + 3] = i + else: + sysind = {k: np.s_[:] for k in fields} + + hdr["fields"] = fields + hdr["fields_ind"] = sysind + hdr["Fmax"] = Fmax + + return hdr From a429c12b346f10ee89477f2f8918313e8f23c8a2 Mon Sep 17 00:00:00 2001 From: breid-phys <81174535+breid-phys@users.noreply.github.com> Date: Thu, 19 May 2022 16:43:36 -0300 Subject: [PATCH 02/19] Update test_rinex.py Now sets flag correctly --- src/georinex/tests/test_rinex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/georinex/tests/test_rinex.py b/src/georinex/tests/test_rinex.py index 74fd7e5..1d8c273 100644 --- a/src/georinex/tests/test_rinex.py +++ b/src/georinex/tests/test_rinex.py @@ -61,7 +61,7 @@ def test_minimal(tmp_path, filename): if int(dat.version) == 2: assert dat.fast_processing elif int(dat.version) == 3: - assert not dat.fast_processing # FIXME: update when OBS3 fast processing is added. + assert dat.fast_processing def test_dont_care_file_extension(): From 1e025c15069a373708a35cf711fb3d64999de3d0 Mon Sep 17 00:00:00 2001 From: breid-phys <81174535+breid-phys@users.noreply.github.com> Date: Thu, 26 May 2022 15:18:35 -0300 Subject: [PATCH 03/19] moved file to correct place --- src/georinex/obs3.py | 376 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 360 insertions(+), 16 deletions(-) diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index 130940b..d1f1637 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -44,12 +44,10 @@ def rinexobs3( useindicators: SSI, LLI are output meas: 'L1C' or ['L1C', 'C1C'] or similar - fast: - TODO: FUTURE, not yet enabled for OBS3 - speculative preallocation based on minimum SV assumption and file size. - Avoids double-reading file and more complicated linked lists. - Believed that Numpy array should be faster than lists anyway. - Reduce Nsvmin if error (let us know) + fast: Still double-reading file to get times + Uses # OF SATELLITES from header to try and size nparray, falls back + to Nsvsys if missing + if false, uses old _epoch method interval: allows decimating file read by time e.g. every 5 seconds. Useful to speed up reading of very large RINEX files @@ -65,16 +63,62 @@ def rinexobs3( if not meas or not meas[0].strip(): meas = None + + """ + Nsvsys may need updating as GNSS systems grow. + Let us know if you needed to change them. + + Beidou is 35 max + Galileo is 36 max + """ + Nsvsys = 36 + # %% allocate - # times = obstime3(fn) - data = xarray.Dataset({}, coords={"time": [], "sv": []}) + if fast: + times = obstime3(fn) + else: + data = xarray.Dataset({}, coords={"time": [], "sv": []}) + if tlim is not None and not isinstance(tlim[0], datetime): raise TypeError("time bounds are specified as datetime.datetime") last_epoch = None # %% loop with opener(fn) as f: - hdr = obsheader3(f, use, meas) + + + if fast: + hdr = obsheader3(f,use) + + obl = [] + for sk in hdr["fields"]: + obl=obl+hdr["fields"][sk] + obl = np.unique(np.array(obl)) + obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] + + + if meas is not None: + obl = obl[np.any([np.char.find(obl,j)==0 for i, j in enumerate(meas)],0)] + + Nt = times.size + Npages = len(obl) + if '# OF SATELLITES' in hdr.keys(): + Nsv = int(hdr['# OF SATELLITES']) + else: + Nsv = Nsvsys * len(hdr['fields']) + + svl = np.tile(' ',Nsv) + + data = np.empty((Npages, Nt, Nsv)) + data.fill(np.nan) + + if useindicators: + data_lli = np.full_like(data,np.nan) + data_ssi = np.full_like(data,np.nan) + + else: + hdr = obsheader3(f, use, meas) + # %% process OBS file time_offset = [] @@ -98,8 +142,9 @@ def rinexobs3( # Number of visible satellites this time %i3 pg. A13 for _ in range(int(ln[33:35])): ln = f.readline() - sv.append(ln[:3]) - raw += ln[3:] + if use is None or ln[0] in use: + sv.append(ln[:3]) + raw += ln[3:] if tlim is not None: if time < tlim[0]: @@ -118,10 +163,73 @@ def rinexobs3( if verbose: print(time, end="\r") - - # this time epoch is complete, assemble the data. - data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) - + + if fast: + for k in sv: + # update list of satellites + if not k in svl: + svl[np.argmax(svl==' ')]=k + + darr = np.atleast_2d( + np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) + ) + + t = time==times + + for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) + # satellite indices "si" to extract from this time's measurements + si = [i for i, s in enumerate(sv) if s[0] in sk] + if len(si) == 0: # no SV of this system "sk" at this time + continue + + gsv = np.array(sv)[si] + isv = [i for i,s in enumerate(svl) if s in gsv] + + for i,j in enumerate(hdr['fields'][sk]): + o = obl==j + if not np.any(o): + continue + data[o,t,isv]=darr[si,i*3] + if useindicators: + data_lli[o,t,isv]=darr[si,i*3+1] + data_ssi[o,t,isv]=darr[si,i*3+2] + else: + # this time epoch is complete, assemble the data. + data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) + + if fast: + if ' ' in svl: + # remove blank satellites (if tlim used) + svl=svl[:np.argmax(svl==' ')] + + svl, isv = np.unique(svl,return_index=True) + + data=data[:,:,isv] + if useindicators: + data_lli=data_lli[:,:,isv] + data_ssi=data_ssi[:,:,isv] + + obs = xarray.Dataset(coords={"sv": svl,"time":times}) + for i, k in enumerate(obl): + if k is None: + continue + elif np.all(np.isnan(data[i, :, :])): # drop all nan datasets like tests expect + continue + obs[k] = (("time", "sv"), data[i, :, :]) + if useindicators: + if k[0] == 'L': + obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + #elif k[0] == 'C': # only for code? + else: + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + + obs = obs.dropna(dim="sv", how="all") + obs = obs.dropna(dim="time", how="all") # when tlim specified + + data=obs + + # %% patch SV names in case of "G 7" => "G07" data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) # %% other attributes @@ -140,7 +248,7 @@ def rinexobs3( data.attrs["interval"] = np.nan data.attrs["rinextype"] = "obs" - data.attrs["fast_processing"] = 0 # bool is not allowed in NetCDF4 + data.attrs["fast_processing"] = int(fast) # bool is not allowed in NetCDF4 data.attrs["time_system"] = determine_time_system(hdr) if isinstance(fn, Path): data.attrs["filename"] = fn.name @@ -219,6 +327,7 @@ def _epoch( darr = np.atleast_2d( np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) ) + #data = xarray.Dataset({}, coords={"time": [], "sv": []}) # %% assign data for each time step for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) # satellite indices "si" to extract from this time's measurements @@ -246,6 +355,7 @@ def _epoch( epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv}) if len(data) == 0: data = epoch_data + #data = xarray.merge((data, epoch_data)) elif len(hdr["fields"]) == 1: # one satellite system selected, faster to process data = xarray.concat((data, epoch_data), dim="time") else: # general case, slower for different satellite systems all together @@ -253,6 +363,240 @@ def _epoch( return data +def rinexsystem3( + fn: T.TextIO | Path, + use: set[str] = None, + tlim: tuple[datetime, datetime] = None, + useindicators: bool = False, + meas: list[str] = None, + verbose: bool = False, + *, + fast: bool = False, + interval: float | int | timedelta = None, +) -> xarray.Dataset: + """ + process RINEX 3 OBS data + + fn: RINEX OBS 3 filename + use: 'G' or ['G', 'R'] or similar + + tlim: read between these time bounds + useindicators: SSI, LLI are output + meas: 'L1C' or ['L1C', 'C1C'] or similar + + fast: + TODO: FUTURE, not yet enabled for OBS3 + speculative preallocation based on minimum SV assumption and file size. + Avoids double-reading file and more complicated linked lists. + Believed that Numpy array should be faster than lists anyway. + Reduce Nsvmin if error (let us know) + + interval: allows decimating file read by time e.g. every 5 seconds. + Useful to speed up reading of very large RINEX files + """ + + interval = check_time_interval(interval) + + if isinstance(use, str): + use = {use} + + if isinstance(meas, str): + meas = [meas] + + if not meas or not meas[0].strip(): + meas = None + # %% allocate + times = obstime3(fn) + #data = xarray.Dataset({}, coords={"time": [], "sv": []}) + if tlim is not None and not isinstance(tlim[0], datetime): + raise TypeError("time bounds are specified as datetime.datetime") + + last_epoch = None + # %% loop + with opener(fn) as f: + + if fast: + hdr = obsheader3(f) + else: + hdr = obsheader3(f, use, meas) + + obl = [] + for sk in hdr["fields"]: + obl=obl+hdr["fields"][sk] + obl = np.unique(np.array(obl)) + obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] + + + if meas is not None: + obl = obl[np.isin(obl,meas)] + + Nt = times.size + Npages = len(obl) + Nsv = int(hdr['# OF SATELLITES']) + + svl = np.tile(' ',Nsv) + + data = np.empty((Npages, Nt, Nsv)) + data.fill(np.nan) + + if useindicators: + data_lli = np.full_like(data,np.nan) + data_ssi = np.full_like(data,np.nan) + + # %% process OBS file + time_offset = [] + for ln in f: + if not ln.startswith(">"): # end of file + break + + try: + time = _timeobs(ln) + except ValueError: # garbage between header and RINEX data + logging.debug(f"garbage detected in {fn}, trying to parse at next time step") + continue + + try: + time_offset.append(float(ln[41:56])) + except ValueError: + pass + # %% get SV indices + sv = [] + raw = "" + # Number of visible satellites this time %i3 pg. A13 + for _ in range(int(ln[33:35])): + ln = f.readline() + if use is None or ln[0] in use: + sv.append(ln[:3]) + raw += ln[3:] + + if tlim is not None: + if time < tlim[0]: + continue + elif time > tlim[1]: + break + + if interval is not None: + if last_epoch is None: # initialization + last_epoch = time + else: + if time - last_epoch < interval: + continue + else: + last_epoch += interval + + if verbose: + print(time, end="\r") + + + for k in sv: + if not k in svl: + svl[np.argmax(svl==' ')]=k + + + + darr = np.atleast_2d( + np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) + ) + + t = time==times + + + for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) + # satellite indices "si" to extract from this time's measurements + si = [i for i, s in enumerate(sv) if s[0] in sk] + if len(si) == 0: # no SV of this system "sk" at this time + continue + + gsv = np.array(sv)[si] + isv = [i for i,s in enumerate(svl) if s in gsv] + #isv = np.zeros_like(gsv,dtype=int) + + #for ii,jj in enumerate(gsv): + # isv[ii] = np.nonzero(svl==jj) + + + for i,j in enumerate(hdr['fields'][sk]): + o = obl==j + #for ii,jj in enumerate(gsv): + # data[o,t,svl==jj]=darr[si[ii],i*3] + data[o,t,isv]=darr[si,i*3] + if useindicators: + data_lli[o,t,isv]=darr[si,i*3+1] + data_ssi[o,t,isv]=darr[si,i*3+2] + + + + + + if ' ' in svl: + svl=svl[:np.argmax(svl==' ')] + + svl, isv = np.unique(svl,return_index=True) + + data=data[:,:,isv] + if useindicators: + data_lli=data_lli[:,:,isv] + data_ssi=data_ssi[:,:,isv] + + obs = xarray.Dataset(coords={"time":times,"sv": svl}) + for i, k in enumerate(obl): + # FIXME: for limited time span reads, this drops unused data variables + # if np.isnan(data[i, ...]).all(): + # continue + if k is None: + continue + obs[k] = (("time", "sv"), data[i, :, :]) + if useindicators: + if k[0] == 'L': + obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + elif k[0] == 'C': + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + + + obs = obs.dropna(dim="sv", how="all") + obs = obs.dropna(dim="time", how="all") # when tlim specified + + data=obs + # %% patch SV names in case of "G 7" => "G07" + data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) + # %% other attributes + data.attrs["version"] = hdr["version"] + + # Get interval from header or derive it from the data + if "interval" in hdr.keys(): + data.attrs["interval"] = hdr["interval"] + elif "time" in data.coords.keys(): + # median is robust against gaps + try: + data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) + except TypeError: + pass + else: + data.attrs["interval"] = np.nan + + data.attrs["rinextype"] = "obs" + data.attrs["fast_processing"] = 0 # bool is not allowed in NetCDF4 + data.attrs["time_system"] = determine_time_system(hdr) + if isinstance(fn, Path): + data.attrs["filename"] = fn.name + + if "position" in hdr.keys(): + data.attrs["position"] = hdr["position"] + if ecef2geodetic is not None: + data.attrs["position_geodetic"] = hdr["position_geodetic"] + + if time_offset: + data.attrs["time_offset"] = time_offset + + if "RCV CLOCK OFFS APPL" in hdr.keys(): + try: + data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) + except ValueError: + pass + + return data + def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]: """ From 9c65502934877f3365712725276b157f0842f7b9 Mon Sep 17 00:00:00 2001 From: breid-phys <81174535+breid-phys@users.noreply.github.com> Date: Thu, 26 May 2022 15:18:58 -0300 Subject: [PATCH 04/19] Delete obs3.py --- obs3.py | 719 -------------------------------------------------------- 1 file changed, 719 deletions(-) delete mode 100644 obs3.py diff --git a/obs3.py b/obs3.py deleted file mode 100644 index d1f1637..0000000 --- a/obs3.py +++ /dev/null @@ -1,719 +0,0 @@ -from __future__ import annotations -from pathlib import Path -import numpy as np -import logging -from datetime import datetime, timedelta -import io -import xarray -import typing as T - -try: - from pymap3d import ecef2geodetic -except ImportError: - ecef2geodetic = None -# -from .rio import opener, rinexinfo -from .common import determine_time_system, check_time_interval, check_unique_times - -"""https://github.com/mvglasow/satstat/wiki/NMEA-IDs""" - -SBAS = 100 # offset for ID -GLONASS = 37 -QZSS = 192 -BEIDOU = 0 - - -def rinexobs3( - fn: T.TextIO | Path, - use: set[str] = None, - tlim: tuple[datetime, datetime] = None, - useindicators: bool = False, - meas: list[str] = None, - verbose: bool = False, - *, - fast: bool = False, - interval: float | int | timedelta = None, -) -> xarray.Dataset: - """ - process RINEX 3 OBS data - - fn: RINEX OBS 3 filename - use: 'G' or ['G', 'R'] or similar - - tlim: read between these time bounds - useindicators: SSI, LLI are output - meas: 'L1C' or ['L1C', 'C1C'] or similar - - fast: Still double-reading file to get times - Uses # OF SATELLITES from header to try and size nparray, falls back - to Nsvsys if missing - if false, uses old _epoch method - - interval: allows decimating file read by time e.g. every 5 seconds. - Useful to speed up reading of very large RINEX files - """ - - interval = check_time_interval(interval) - - if isinstance(use, str): - use = {use} - - if isinstance(meas, str): - meas = [meas] - - if not meas or not meas[0].strip(): - meas = None - - """ - Nsvsys may need updating as GNSS systems grow. - Let us know if you needed to change them. - - Beidou is 35 max - Galileo is 36 max - """ - Nsvsys = 36 - - # %% allocate - if fast: - times = obstime3(fn) - else: - data = xarray.Dataset({}, coords={"time": [], "sv": []}) - - if tlim is not None and not isinstance(tlim[0], datetime): - raise TypeError("time bounds are specified as datetime.datetime") - - last_epoch = None - # %% loop - with opener(fn) as f: - - - if fast: - hdr = obsheader3(f,use) - - obl = [] - for sk in hdr["fields"]: - obl=obl+hdr["fields"][sk] - obl = np.unique(np.array(obl)) - obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] - - - if meas is not None: - obl = obl[np.any([np.char.find(obl,j)==0 for i, j in enumerate(meas)],0)] - - Nt = times.size - Npages = len(obl) - if '# OF SATELLITES' in hdr.keys(): - Nsv = int(hdr['# OF SATELLITES']) - else: - Nsv = Nsvsys * len(hdr['fields']) - - svl = np.tile(' ',Nsv) - - data = np.empty((Npages, Nt, Nsv)) - data.fill(np.nan) - - if useindicators: - data_lli = np.full_like(data,np.nan) - data_ssi = np.full_like(data,np.nan) - - else: - hdr = obsheader3(f, use, meas) - - - # %% process OBS file - time_offset = [] - for ln in f: - if not ln.startswith(">"): # end of file - break - - try: - time = _timeobs(ln) - except ValueError: # garbage between header and RINEX data - logging.debug(f"garbage detected in {fn}, trying to parse at next time step") - continue - - try: - time_offset.append(float(ln[41:56])) - except ValueError: - pass - # %% get SV indices - sv = [] - raw = "" - # Number of visible satellites this time %i3 pg. A13 - for _ in range(int(ln[33:35])): - ln = f.readline() - if use is None or ln[0] in use: - sv.append(ln[:3]) - raw += ln[3:] - - if tlim is not None: - if time < tlim[0]: - continue - elif time > tlim[1]: - break - - if interval is not None: - if last_epoch is None: # initialization - last_epoch = time - else: - if time - last_epoch < interval: - continue - else: - last_epoch += interval - - if verbose: - print(time, end="\r") - - if fast: - for k in sv: - # update list of satellites - if not k in svl: - svl[np.argmax(svl==' ')]=k - - darr = np.atleast_2d( - np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) - ) - - t = time==times - - for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) - # satellite indices "si" to extract from this time's measurements - si = [i for i, s in enumerate(sv) if s[0] in sk] - if len(si) == 0: # no SV of this system "sk" at this time - continue - - gsv = np.array(sv)[si] - isv = [i for i,s in enumerate(svl) if s in gsv] - - for i,j in enumerate(hdr['fields'][sk]): - o = obl==j - if not np.any(o): - continue - data[o,t,isv]=darr[si,i*3] - if useindicators: - data_lli[o,t,isv]=darr[si,i*3+1] - data_ssi[o,t,isv]=darr[si,i*3+2] - else: - # this time epoch is complete, assemble the data. - data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) - - if fast: - if ' ' in svl: - # remove blank satellites (if tlim used) - svl=svl[:np.argmax(svl==' ')] - - svl, isv = np.unique(svl,return_index=True) - - data=data[:,:,isv] - if useindicators: - data_lli=data_lli[:,:,isv] - data_ssi=data_ssi[:,:,isv] - - obs = xarray.Dataset(coords={"sv": svl,"time":times}) - for i, k in enumerate(obl): - if k is None: - continue - elif np.all(np.isnan(data[i, :, :])): # drop all nan datasets like tests expect - continue - obs[k] = (("time", "sv"), data[i, :, :]) - if useindicators: - if k[0] == 'L': - obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) - obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) - #elif k[0] == 'C': # only for code? - else: - obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) - - obs = obs.dropna(dim="sv", how="all") - obs = obs.dropna(dim="time", how="all") # when tlim specified - - data=obs - - - # %% patch SV names in case of "G 7" => "G07" - data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) - # %% other attributes - data.attrs["version"] = hdr["version"] - - # Get interval from header or derive it from the data - if "interval" in hdr.keys(): - data.attrs["interval"] = hdr["interval"] - elif "time" in data.coords.keys(): - # median is robust against gaps - try: - data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) - except TypeError: - pass - else: - data.attrs["interval"] = np.nan - - data.attrs["rinextype"] = "obs" - data.attrs["fast_processing"] = int(fast) # bool is not allowed in NetCDF4 - data.attrs["time_system"] = determine_time_system(hdr) - if isinstance(fn, Path): - data.attrs["filename"] = fn.name - - if "position" in hdr.keys(): - data.attrs["position"] = hdr["position"] - if ecef2geodetic is not None: - data.attrs["position_geodetic"] = hdr["position_geodetic"] - - if time_offset: - data.attrs["time_offset"] = time_offset - - if "RCV CLOCK OFFS APPL" in hdr.keys(): - try: - data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) - except ValueError: - pass - - return data - - -def _timeobs(ln: str) -> datetime: - """ - convert time from RINEX 3 OBS text to datetime - """ - - if not ln.startswith("> "): # pg. A13 - raise ValueError('RINEX 3 line beginning "> " is not present') - - return datetime( - int(ln[2:6]), - int(ln[7:9]), - int(ln[10:12]), - hour=int(ln[13:15]), - minute=int(ln[16:18]), - second=int(ln[19:21]), - microsecond=int(float(ln[19:29]) % 1 * 1000000), - ) - - -def obstime3(fn: T.TextIO | Path, verbose: bool = False) -> np.ndarray: - """ - return all times in RINEX file - """ - - times = [] - - with opener(fn) as f: - for ln in f: - if ln.startswith("> "): - try: - times.append(_timeobs(ln)) - except (ValueError, IndexError): - logging.debug(f"was not a time:\n{ln}") - continue - - times = np.asarray(times) - - check_unique_times(times) - - return times - - -def _epoch( - data: xarray.Dataset, - raw: str, - hdr: dict[str, T.Any], - time: datetime, - sv: list[str], - useindicators: bool, - verbose: bool, -) -> xarray.Dataset: - """ - block processing of each epoch (time step) - """ - darr = np.atleast_2d( - np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) - ) - #data = xarray.Dataset({}, coords={"time": [], "sv": []}) - # %% assign data for each time step - for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) - # satellite indices "si" to extract from this time's measurements - si = [i for i, s in enumerate(sv) if s[0] in sk] - if len(si) == 0: # no SV of this system "sk" at this time - continue - - # measurement indices "di" to extract at this time step - di = hdr["fields_ind"][sk] - garr = darr[si, :] - garr = garr[:, di] - - gsv = np.array(sv)[si] - - dsf: dict[str, tuple] = {} - for i, k in enumerate(hdr["fields"][sk]): - dsf[k] = (("time", "sv"), np.atleast_2d(garr[:, i * 3])) - - if useindicators: - dsf = _indicators(dsf, k, garr[:, i * 3 + 1 : i * 3 + 3]) - - if verbose: - print(time, "\r", end="") - - epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv}) - if len(data) == 0: - data = epoch_data - #data = xarray.merge((data, epoch_data)) - elif len(hdr["fields"]) == 1: # one satellite system selected, faster to process - data = xarray.concat((data, epoch_data), dim="time") - else: # general case, slower for different satellite systems all together - data = xarray.merge((data, epoch_data)) - - return data - -def rinexsystem3( - fn: T.TextIO | Path, - use: set[str] = None, - tlim: tuple[datetime, datetime] = None, - useindicators: bool = False, - meas: list[str] = None, - verbose: bool = False, - *, - fast: bool = False, - interval: float | int | timedelta = None, -) -> xarray.Dataset: - """ - process RINEX 3 OBS data - - fn: RINEX OBS 3 filename - use: 'G' or ['G', 'R'] or similar - - tlim: read between these time bounds - useindicators: SSI, LLI are output - meas: 'L1C' or ['L1C', 'C1C'] or similar - - fast: - TODO: FUTURE, not yet enabled for OBS3 - speculative preallocation based on minimum SV assumption and file size. - Avoids double-reading file and more complicated linked lists. - Believed that Numpy array should be faster than lists anyway. - Reduce Nsvmin if error (let us know) - - interval: allows decimating file read by time e.g. every 5 seconds. - Useful to speed up reading of very large RINEX files - """ - - interval = check_time_interval(interval) - - if isinstance(use, str): - use = {use} - - if isinstance(meas, str): - meas = [meas] - - if not meas or not meas[0].strip(): - meas = None - # %% allocate - times = obstime3(fn) - #data = xarray.Dataset({}, coords={"time": [], "sv": []}) - if tlim is not None and not isinstance(tlim[0], datetime): - raise TypeError("time bounds are specified as datetime.datetime") - - last_epoch = None - # %% loop - with opener(fn) as f: - - if fast: - hdr = obsheader3(f) - else: - hdr = obsheader3(f, use, meas) - - obl = [] - for sk in hdr["fields"]: - obl=obl+hdr["fields"][sk] - obl = np.unique(np.array(obl)) - obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] - - - if meas is not None: - obl = obl[np.isin(obl,meas)] - - Nt = times.size - Npages = len(obl) - Nsv = int(hdr['# OF SATELLITES']) - - svl = np.tile(' ',Nsv) - - data = np.empty((Npages, Nt, Nsv)) - data.fill(np.nan) - - if useindicators: - data_lli = np.full_like(data,np.nan) - data_ssi = np.full_like(data,np.nan) - - # %% process OBS file - time_offset = [] - for ln in f: - if not ln.startswith(">"): # end of file - break - - try: - time = _timeobs(ln) - except ValueError: # garbage between header and RINEX data - logging.debug(f"garbage detected in {fn}, trying to parse at next time step") - continue - - try: - time_offset.append(float(ln[41:56])) - except ValueError: - pass - # %% get SV indices - sv = [] - raw = "" - # Number of visible satellites this time %i3 pg. A13 - for _ in range(int(ln[33:35])): - ln = f.readline() - if use is None or ln[0] in use: - sv.append(ln[:3]) - raw += ln[3:] - - if tlim is not None: - if time < tlim[0]: - continue - elif time > tlim[1]: - break - - if interval is not None: - if last_epoch is None: # initialization - last_epoch = time - else: - if time - last_epoch < interval: - continue - else: - last_epoch += interval - - if verbose: - print(time, end="\r") - - - for k in sv: - if not k in svl: - svl[np.argmax(svl==' ')]=k - - - - darr = np.atleast_2d( - np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) - ) - - t = time==times - - - for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) - # satellite indices "si" to extract from this time's measurements - si = [i for i, s in enumerate(sv) if s[0] in sk] - if len(si) == 0: # no SV of this system "sk" at this time - continue - - gsv = np.array(sv)[si] - isv = [i for i,s in enumerate(svl) if s in gsv] - #isv = np.zeros_like(gsv,dtype=int) - - #for ii,jj in enumerate(gsv): - # isv[ii] = np.nonzero(svl==jj) - - - for i,j in enumerate(hdr['fields'][sk]): - o = obl==j - #for ii,jj in enumerate(gsv): - # data[o,t,svl==jj]=darr[si[ii],i*3] - data[o,t,isv]=darr[si,i*3] - if useindicators: - data_lli[o,t,isv]=darr[si,i*3+1] - data_ssi[o,t,isv]=darr[si,i*3+2] - - - - - - if ' ' in svl: - svl=svl[:np.argmax(svl==' ')] - - svl, isv = np.unique(svl,return_index=True) - - data=data[:,:,isv] - if useindicators: - data_lli=data_lli[:,:,isv] - data_ssi=data_ssi[:,:,isv] - - obs = xarray.Dataset(coords={"time":times,"sv": svl}) - for i, k in enumerate(obl): - # FIXME: for limited time span reads, this drops unused data variables - # if np.isnan(data[i, ...]).all(): - # continue - if k is None: - continue - obs[k] = (("time", "sv"), data[i, :, :]) - if useindicators: - if k[0] == 'L': - obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) - obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) - elif k[0] == 'C': - obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) - - - obs = obs.dropna(dim="sv", how="all") - obs = obs.dropna(dim="time", how="all") # when tlim specified - - data=obs - # %% patch SV names in case of "G 7" => "G07" - data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) - # %% other attributes - data.attrs["version"] = hdr["version"] - - # Get interval from header or derive it from the data - if "interval" in hdr.keys(): - data.attrs["interval"] = hdr["interval"] - elif "time" in data.coords.keys(): - # median is robust against gaps - try: - data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) - except TypeError: - pass - else: - data.attrs["interval"] = np.nan - - data.attrs["rinextype"] = "obs" - data.attrs["fast_processing"] = 0 # bool is not allowed in NetCDF4 - data.attrs["time_system"] = determine_time_system(hdr) - if isinstance(fn, Path): - data.attrs["filename"] = fn.name - - if "position" in hdr.keys(): - data.attrs["position"] = hdr["position"] - if ecef2geodetic is not None: - data.attrs["position_geodetic"] = hdr["position_geodetic"] - - if time_offset: - data.attrs["time_offset"] = time_offset - - if "RCV CLOCK OFFS APPL" in hdr.keys(): - try: - data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) - except ValueError: - pass - - return data - - -def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]: - """ - handle LLI (loss of lock) and SSI (signal strength) - """ - if k.startswith(("L1", "L2")): - d[k + "lli"] = (("time", "sv"), np.atleast_2d(arr[:, 0])) - - d[k + "ssi"] = (("time", "sv"), np.atleast_2d(arr[:, 1])) - - return d - - -def obsheader3(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dict[str, T.Any]: - """ - get RINEX 3 OBS types, for each system type - optionally, select system type and/or measurement type to greatly - speed reading and save memory (RAM, disk) - """ - if isinstance(f, (str, Path)): - with opener(f, header=True) as h: - return obsheader3(h, use, meas) - - fields = {} - Fmax = 0 - - # %% first line - hdr = rinexinfo(f) - - for ln in f: - if "END OF HEADER" in ln: - break - - hd = ln[60:80] - c = ln[:60] - if "SYS / # / OBS TYPES" in hd: - k = c[0] - fields[k] = c[6:60].split() - N = int(c[3:6]) - # %% maximum number of fields in a file, to allow fast Numpy parse. - Fmax = max(N, Fmax) - - n = N - 13 - while n > 0: # Rinex 3.03, pg. A6, A7 - ln = f.readline() - assert "SYS / # / OBS TYPES" in ln[60:] - fields[k] += ln[6:60].split() - n -= 13 - - assert len(fields[k]) == N - - continue - - if hd.strip() not in hdr: # Header label - hdr[hd.strip()] = c # don't strip for fixed-width parsers - # string with info - else: # concatenate to the existing string - hdr[hd.strip()] += " " + c - - # %% list with x,y,z cartesian (OPTIONAL) - # Rinex 3.03, pg. A6, Table A2 - try: - # some RINEX files have bad headers with mulitple APPROX POSITION XYZ. - # we choose to use the first such header. - hdr["position"] = [float(j) for j in hdr["APPROX POSITION XYZ"].split()][:3] - if ecef2geodetic is not None and len(hdr["position"]) == 3: - hdr["position_geodetic"] = ecef2geodetic(*hdr["position"]) - except (KeyError, ValueError): - pass - # %% time - try: - t0s = hdr["TIME OF FIRST OBS"] - # NOTE: must do second=int(float()) due to non-conforming files - hdr["t0"] = datetime( - year=int(t0s[:6]), - month=int(t0s[6:12]), - day=int(t0s[12:18]), - hour=int(t0s[18:24]), - minute=int(t0s[24:30]), - second=int(float(t0s[30:36])), - microsecond=int(float(t0s[30:43]) % 1 * 1000000), - ) - except (KeyError, ValueError): - pass - - try: - hdr["interval"] = float(hdr["INTERVAL"][:10]) - except (KeyError, ValueError): - pass - # %% select specific satellite systems only (optional) - if use: - if not set(fields.keys()).intersection(use): - raise KeyError(f"system type {use} not found in RINEX file") - - fields = {k: fields[k] for k in use if k in fields} - - # perhaps this could be done more efficiently, but it's probably low impact on overall program. - # simple set and frozenset operations do NOT preserve order, which would completely mess up reading! - sysind: dict[str, T.Any] = {} - if isinstance(meas, (tuple, list, np.ndarray)): - for sk in fields: # iterate over each system - # ind = np.isin(fields[sk], meas) # boolean vector - ind = np.zeros(len(fields[sk]), dtype=bool) - for m in meas: - for i, field in enumerate(fields[sk]): - if field.startswith(m): - ind[i] = True - - fields[sk] = np.array(fields[sk])[ind].tolist() - sysind[sk] = np.empty(Fmax * 3, dtype=bool) # *3 due to LLI, SSI - for j, i in enumerate(ind): - sysind[sk][j * 3 : j * 3 + 3] = i - else: - sysind = {k: np.s_[:] for k in fields} - - hdr["fields"] = fields - hdr["fields_ind"] = sysind - hdr["Fmax"] = Fmax - - return hdr From f521b9ef2d9e87aa912ab016232d034735a8cebd Mon Sep 17 00:00:00 2001 From: breid-phys <81174535+breid-phys@users.noreply.github.com> Date: Thu, 26 May 2022 15:21:41 -0300 Subject: [PATCH 05/19] Updated rio.py .Z .Z compressed files can also have Hatanaka compression --- src/georinex/rio.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/georinex/rio.py b/src/georinex/rio.py index cb9b2cf..4787428 100644 --- a/src/georinex/rio.py +++ b/src/georinex/rio.py @@ -72,6 +72,13 @@ def opener(fn: T.TextIO | Path, header: bool = False) -> T.Iterator[T.TextIO]: raise ImportError("pip install unlzw3") with fn.open("rb") as zu: with io.StringIO(unlzw(zu.read()).decode("ascii")) as f: + _, is_crinex = rinex_version(first_nonblank_line(f)) + f.seek(0) + + if is_crinex and not header: + # Conversion to string is necessary because of a quirk where gzip.open() + # even with 'rt' doesn't decompress until read. + f = io.StringIO(crx2rnx(f.read())) yield f else: # assume not compressed (or Hatanaka) with fn.open("r", encoding="ascii", errors="ignore") as f: From 254d8e1184da9c846911645aee257cc0be42e5ec Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 Jul 2022 11:33:07 -0300 Subject: [PATCH 06/19] fixed critical indexing mistake in fast obs3 --- src/georinex/obs3.py | 389 +++++++++++++++++++++++++++++-- src/georinex/rio.py | 7 + src/georinex/tests/test_rinex.py | 2 +- 3 files changed, 380 insertions(+), 18 deletions(-) diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index 130940b..c16bea4 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -44,12 +44,10 @@ def rinexobs3( useindicators: SSI, LLI are output meas: 'L1C' or ['L1C', 'C1C'] or similar - fast: - TODO: FUTURE, not yet enabled for OBS3 - speculative preallocation based on minimum SV assumption and file size. - Avoids double-reading file and more complicated linked lists. - Believed that Numpy array should be faster than lists anyway. - Reduce Nsvmin if error (let us know) + fast: Still double-reading file to get times + Uses # OF SATELLITES from header to try and size nparray, falls back + to Nsvsys if missing + if false, uses old _epoch method interval: allows decimating file read by time e.g. every 5 seconds. Useful to speed up reading of very large RINEX files @@ -65,16 +63,62 @@ def rinexobs3( if not meas or not meas[0].strip(): meas = None + + """ + Nsvsys may need updating as GNSS systems grow. + Let us know if you needed to change them. + + Beidou is 35 max + Galileo is 36 max + """ + Nsvsys = 36 + # %% allocate - # times = obstime3(fn) - data = xarray.Dataset({}, coords={"time": [], "sv": []}) + if fast: + times = obstime3(fn) + else: + data = xarray.Dataset({}, coords={"time": [], "sv": []}) + if tlim is not None and not isinstance(tlim[0], datetime): raise TypeError("time bounds are specified as datetime.datetime") last_epoch = None # %% loop with opener(fn) as f: - hdr = obsheader3(f, use, meas) + + + if fast: + hdr = obsheader3(f,use) + + obl = [] + for sk in hdr["fields"]: + obl=obl+hdr["fields"][sk] + obl = np.unique(np.array(obl)) + obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] + + + if meas is not None: + obl = obl[np.any([np.char.find(obl,j)==0 for i, j in enumerate(meas)],0)] + + Nt = times.size + Npages = len(obl) + if '# OF SATELLITES' in hdr.keys(): + Nsv = int(hdr['# OF SATELLITES']) + else: + Nsv = Nsvsys * len(hdr['fields']) + + svl = np.tile(' ',Nsv) + + data = np.empty((Npages, Nt, Nsv)) + data.fill(np.nan) + + if useindicators: + data_lli = np.full_like(data,np.nan) + data_ssi = np.full_like(data,np.nan) + + else: + hdr = obsheader3(f, use, meas) + # %% process OBS file time_offset = [] @@ -98,8 +142,9 @@ def rinexobs3( # Number of visible satellites this time %i3 pg. A13 for _ in range(int(ln[33:35])): ln = f.readline() - sv.append(ln[:3]) - raw += ln[3:] + if use is None or ln[0] in use: + sv.append(ln[:3]) + raw += ln[3:] if tlim is not None: if time < tlim[0]: @@ -118,10 +163,84 @@ def rinexobs3( if verbose: print(time, end="\r") - - # this time epoch is complete, assemble the data. - data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) - + + if fast: + for k in sv: + # update list of satellites + if not k in svl: + svl[np.argmax(svl==' ')]=k + + + darr = np.atleast_2d( + np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) + ) + + t = time==times + + #print(darr.shape) + #print(sv) + for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) + # satellite indices "si" to extract from this time's measurements + + + si = [i for i, s in enumerate(sv) if s[0] in sk] + if len(si) == 0: # no SV of this system "sk" at this time + continue + + gsv = np.array(sv)[si] + #print(gsv) + #isv = [i for i,s in enumerate(svl) if s in gsv] + isv = [np.where(s==gsv) for i,s in enumerate(svl)] + isv,jsv = np.nonzero(np.atleast_2d(svl).T == np.atleast_2d(gsv)) + #jsv = [i for i,s in enumerate(sv) if s in gsv] + #print(isv) + #print(jsv) + #print(svl[isv]) + for i,j in enumerate(hdr['fields'][sk]): + o = obl==j + if not np.any(o): + continue + data[o,t,isv]=darr[jsv,i*3] + if useindicators: + data_lli[o,t,isv]=darr[jsv,i*3+1] + data_ssi[o,t,isv]=darr[jsv,i*3+2] + else: + # this time epoch is complete, assemble the data. + data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) + + if fast: + if ' ' in svl: + # remove blank satellites (if tlim used) + svl=svl[:np.argmax(svl==' ')] + + svl, isv = np.unique(svl,return_index=True) + #isv = range(svl.size) + data=data[:,:,isv] + if useindicators: + data_lli=data_lli[:,:,isv] + data_ssi=data_ssi[:,:,isv] + + obs = xarray.Dataset(coords={"sv": svl,"time":times}) + for i, k in enumerate(obl): + if k is None: + continue + elif np.all(np.isnan(data[i, :, :])): # drop all nan datasets like tests expect + continue + obs[k] = (("time", "sv"), data[i, :, :]) + if useindicators: + if k[0] == 'L': + obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + #elif k[0] == 'C': # only for code? + else: + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + + obs = obs.dropna(dim="sv", how="all") + obs = obs.dropna(dim="time", how="all") # when tlim specified + + data=obs + + # %% patch SV names in case of "G 7" => "G07" data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) # %% other attributes @@ -140,7 +259,7 @@ def rinexobs3( data.attrs["interval"] = np.nan data.attrs["rinextype"] = "obs" - data.attrs["fast_processing"] = 0 # bool is not allowed in NetCDF4 + data.attrs["fast_processing"] = int(fast) # bool is not allowed in NetCDF4 data.attrs["time_system"] = determine_time_system(hdr) if isinstance(fn, Path): data.attrs["filename"] = fn.name @@ -152,7 +271,7 @@ def rinexobs3( if time_offset: data.attrs["time_offset"] = time_offset - + if "RCV CLOCK OFFS APPL" in hdr.keys(): try: data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) @@ -219,6 +338,7 @@ def _epoch( darr = np.atleast_2d( np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) ) + #data = xarray.Dataset({}, coords={"time": [], "sv": []}) # %% assign data for each time step for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) # satellite indices "si" to extract from this time's measurements @@ -246,6 +366,7 @@ def _epoch( epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv}) if len(data) == 0: data = epoch_data + #data = xarray.merge((data, epoch_data)) elif len(hdr["fields"]) == 1: # one satellite system selected, faster to process data = xarray.concat((data, epoch_data), dim="time") else: # general case, slower for different satellite systems all together @@ -253,6 +374,240 @@ def _epoch( return data +def rinexsystem3( + fn: T.TextIO | Path, + use: set[str] = None, + tlim: tuple[datetime, datetime] = None, + useindicators: bool = False, + meas: list[str] = None, + verbose: bool = False, + *, + fast: bool = False, + interval: float | int | timedelta = None, +) -> xarray.Dataset: + """ + process RINEX 3 OBS data + + fn: RINEX OBS 3 filename + use: 'G' or ['G', 'R'] or similar + + tlim: read between these time bounds + useindicators: SSI, LLI are output + meas: 'L1C' or ['L1C', 'C1C'] or similar + + fast: + TODO: FUTURE, not yet enabled for OBS3 + speculative preallocation based on minimum SV assumption and file size. + Avoids double-reading file and more complicated linked lists. + Believed that Numpy array should be faster than lists anyway. + Reduce Nsvmin if error (let us know) + + interval: allows decimating file read by time e.g. every 5 seconds. + Useful to speed up reading of very large RINEX files + """ + + interval = check_time_interval(interval) + + if isinstance(use, str): + use = {use} + + if isinstance(meas, str): + meas = [meas] + + if not meas or not meas[0].strip(): + meas = None + # %% allocate + times = obstime3(fn) + #data = xarray.Dataset({}, coords={"time": [], "sv": []}) + if tlim is not None and not isinstance(tlim[0], datetime): + raise TypeError("time bounds are specified as datetime.datetime") + + last_epoch = None + # %% loop + with opener(fn) as f: + + if fast: + hdr = obsheader3(f) + else: + hdr = obsheader3(f, use, meas) + + obl = [] + for sk in hdr["fields"]: + obl=obl+hdr["fields"][sk] + obl = np.unique(np.array(obl)) + obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] + + + if meas is not None: + obl = obl[np.isin(obl,meas)] + + Nt = times.size + Npages = len(obl) + Nsv = int(hdr['# OF SATELLITES']) + + svl = np.tile(' ',Nsv) + + data = np.empty((Npages, Nt, Nsv)) + data.fill(np.nan) + + if useindicators: + data_lli = np.full_like(data,np.nan) + data_ssi = np.full_like(data,np.nan) + + # %% process OBS file + time_offset = [] + for ln in f: + if not ln.startswith(">"): # end of file + break + + try: + time = _timeobs(ln) + except ValueError: # garbage between header and RINEX data + logging.debug(f"garbage detected in {fn}, trying to parse at next time step") + continue + + try: + time_offset.append(float(ln[41:56])) + except ValueError: + pass + # %% get SV indices + sv = [] + raw = "" + # Number of visible satellites this time %i3 pg. A13 + for _ in range(int(ln[33:35])): + ln = f.readline() + if use is None or ln[0] in use: + sv.append(ln[:3]) + raw += ln[3:] + + if tlim is not None: + if time < tlim[0]: + continue + elif time > tlim[1]: + break + + if interval is not None: + if last_epoch is None: # initialization + last_epoch = time + else: + if time - last_epoch < interval: + continue + else: + last_epoch += interval + + if verbose: + print(time, end="\r") + + + for k in sv: + if not k in svl: + svl[np.argmax(svl==' ')]=k + + + + darr = np.atleast_2d( + np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) + ) + + t = time==times + + + for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) + # satellite indices "si" to extract from this time's measurements + si = [i for i, s in enumerate(sv) if s[0] in sk] + if len(si) == 0: # no SV of this system "sk" at this time + continue + + gsv = np.array(sv)[si] + isv = [i for i,s in enumerate(svl) if s in gsv] + #isv = np.zeros_like(gsv,dtype=int) + + #for ii,jj in enumerate(gsv): + # isv[ii] = np.nonzero(svl==jj) + + + for i,j in enumerate(hdr['fields'][sk]): + o = obl==j + #for ii,jj in enumerate(gsv): + # data[o,t,svl==jj]=darr[si[ii],i*3] + data[o,t,isv]=darr[si,i*3] + if useindicators: + data_lli[o,t,isv]=darr[si,i*3+1] + data_ssi[o,t,isv]=darr[si,i*3+2] + + + + + + if ' ' in svl: + svl=svl[:np.argmax(svl==' ')] + + svl, isv = np.unique(svl,return_index=True) + + data=data[:,:,isv] + if useindicators: + data_lli=data_lli[:,:,isv] + data_ssi=data_ssi[:,:,isv] + + obs = xarray.Dataset(coords={"time":times,"sv": svl}) + for i, k in enumerate(obl): + # FIXME: for limited time span reads, this drops unused data variables + # if np.isnan(data[i, ...]).all(): + # continue + if k is None: + continue + obs[k] = (("time", "sv"), data[i, :, :]) + if useindicators: + if k[0] == 'L': + obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + elif k[0] == 'C': + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + + + obs = obs.dropna(dim="sv", how="all") + obs = obs.dropna(dim="time", how="all") # when tlim specified + + data=obs + # %% patch SV names in case of "G 7" => "G07" + data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) + # %% other attributes + data.attrs["version"] = hdr["version"] + + # Get interval from header or derive it from the data + if "interval" in hdr.keys(): + data.attrs["interval"] = hdr["interval"] + elif "time" in data.coords.keys(): + # median is robust against gaps + try: + data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) + except TypeError: + pass + else: + data.attrs["interval"] = np.nan + + data.attrs["rinextype"] = "obs" + data.attrs["fast_processing"] = 0 # bool is not allowed in NetCDF4 + data.attrs["time_system"] = determine_time_system(hdr) + if isinstance(fn, Path): + data.attrs["filename"] = fn.name + + if "position" in hdr.keys(): + data.attrs["position"] = hdr["position"] + if ecef2geodetic is not None: + data.attrs["position_geodetic"] = hdr["position_geodetic"] + + if time_offset: + data.attrs["time_offset"] = time_offset + + if "RCV CLOCK OFFS APPL" in hdr.keys(): + try: + data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) + except ValueError: + pass + + return data + def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]: """ diff --git a/src/georinex/rio.py b/src/georinex/rio.py index cb9b2cf..4787428 100644 --- a/src/georinex/rio.py +++ b/src/georinex/rio.py @@ -72,6 +72,13 @@ def opener(fn: T.TextIO | Path, header: bool = False) -> T.Iterator[T.TextIO]: raise ImportError("pip install unlzw3") with fn.open("rb") as zu: with io.StringIO(unlzw(zu.read()).decode("ascii")) as f: + _, is_crinex = rinex_version(first_nonblank_line(f)) + f.seek(0) + + if is_crinex and not header: + # Conversion to string is necessary because of a quirk where gzip.open() + # even with 'rt' doesn't decompress until read. + f = io.StringIO(crx2rnx(f.read())) yield f else: # assume not compressed (or Hatanaka) with fn.open("r", encoding="ascii", errors="ignore") as f: diff --git a/src/georinex/tests/test_rinex.py b/src/georinex/tests/test_rinex.py index 74fd7e5..1d8c273 100644 --- a/src/georinex/tests/test_rinex.py +++ b/src/georinex/tests/test_rinex.py @@ -61,7 +61,7 @@ def test_minimal(tmp_path, filename): if int(dat.version) == 2: assert dat.fast_processing elif int(dat.version) == 3: - assert not dat.fast_processing # FIXME: update when OBS3 fast processing is added. + assert dat.fast_processing def test_dont_care_file_extension(): From bdb26bbaccd8775f1ffa8ce6187373405b6dc758 Mon Sep 17 00:00:00 2001 From: Ben Date: Fri, 8 Jul 2022 09:58:12 -0300 Subject: [PATCH 07/19] fixed index when multi-constellation --- src/georinex/obs3.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index 8a72dfd..e48a30b 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -186,9 +186,11 @@ def rinexobs3( gsv = np.array(sv)[si] - isv = [np.where(s==gsv) for i,s in enumerate(svl)] - isv,jsv = np.nonzero(np.atleast_2d(svl).T == np.atleast_2d(gsv)) - + #isv = [np.where(s==gsv) for i,s in enumerate(svl)] + isv,jsv = np.nonzero(np.logical_and( + np.atleast_2d(svl).T == np.atleast_2d(sv), + np.isin(sv,gsv)) ) + for i,j in enumerate(hdr['fields'][sk]): o = obl==j From 82f8971d9e3dc092f7d726a8d662ece1ff4ce8fb Mon Sep 17 00:00:00 2001 From: Ben Date: Fri, 16 Sep 2022 18:05:23 -0300 Subject: [PATCH 08/19] minor tweaks for high speed processing --- src/georinex/base.py | 6 +++--- src/georinex/obs2.py | 4 ++-- src/georinex/obs3.py | 9 ++++----- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/georinex/base.py b/src/georinex/base.py index becb78b..5d419d6 100644 --- a/src/georinex/base.py +++ b/src/georinex/base.py @@ -154,7 +154,7 @@ def rinexnav( if fn.suffix == ".nc": try: - return xarray.open_dataset(fn, group=group) + return xarray.load_dataset(fn, group=group) except OSError as e: raise LookupError(f"Group {group} not found in {fn} {e}") @@ -205,7 +205,7 @@ def rinexobs( # %% NetCDF4 if fn.suffix == ".nc": try: - return xarray.open_dataset(fn, group=group) + return xarray.load_dataset(fn, group=group) except OSError as e: raise LookupError(f"Group {group} not found in {fn} {e}") @@ -259,7 +259,7 @@ def _groupexists(fn: Path, group: str, overwrite: bool) -> str: # be sure there isn't already NAV in it try: - xarray.open_dataset(fn, group=group) + xarray.load_dataset(fn, group=group) raise ValueError(f"{group} already in {fn}") except OSError: pass diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py index 4608bab..5dee6db 100644 --- a/src/georinex/obs2.py +++ b/src/georinex/obs2.py @@ -36,7 +36,7 @@ def rinexobs2( if not use: use = {"C", "E", "G", "J", "R", "S"} - obs = xarray.Dataset({}, coords={"time": [], "sv": []}) + obs = xarray.Dataset({}, coords={"time": np.array([],dtype=np.datetime64), "sv": []}) attrs: dict[str, T.Any] = {} for u in use: o = rinexsystem2( @@ -388,7 +388,7 @@ def obsheader2( hdr["Nl_sv"] = ceil(hdr["Nobs"] / 5) # %% list with receiver location in x,y,z cartesian ECEF (OPTIONAL) try: - hdr["position"] = [float(j) for j in hdr["APPROX POSITION XYZ"].split()] + hdr["position"] = [float(j) for j in hdr["APPROX POSITION XYZ"].split()][:3] if ecef2geodetic is not None: hdr["position_geodetic"] = ecef2geodetic(*hdr["position"]) except (KeyError, ValueError): diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index e48a30b..77ac5fc 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -88,7 +88,7 @@ def rinexobs3( if fast: - hdr = obsheader3(f,use) + hdr = obsheader3(f,use) obl = [] for sk in hdr["fields"]: @@ -96,7 +96,6 @@ def rinexobs3( obl = np.unique(np.array(obl)) obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] - if meas is not None: obl = obl[np.any([np.char.find(obl,j)==0 for i, j in enumerate(meas)],0)] @@ -186,7 +185,6 @@ def rinexobs3( gsv = np.array(sv)[si] - #isv = [np.where(s==gsv) for i,s in enumerate(svl)] isv,jsv = np.nonzero(np.logical_and( np.atleast_2d(svl).T == np.atleast_2d(sv), np.isin(sv,gsv)) ) @@ -234,7 +232,7 @@ def rinexobs3( obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) obs = obs.dropna(dim="sv", how="all") - obs = obs.dropna(dim="time", how="all") # when tlim specified + #obs = obs.dropna(dim="time", how="all") # when tlim specified data=obs @@ -266,7 +264,8 @@ def rinexobs3( data.attrs["position"] = hdr["position"] if ecef2geodetic is not None: data.attrs["position_geodetic"] = hdr["position_geodetic"] - + if "rxmodel" in hdr.keys(): + obs.attrs["rxmodel"] = hdr["rxmodel"] if time_offset: data.attrs["time_offset"] = time_offset From 2ea8aca5616e25fcaa53b92ea298a7bcd5647ace Mon Sep 17 00:00:00 2001 From: Ben Date: Fri, 4 Nov 2022 10:12:56 -0300 Subject: [PATCH 09/19] force unique times --- src/georinex/obs3.py | 117 +++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 59 deletions(-) diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index 77ac5fc..b3717ec 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -63,7 +63,7 @@ def rinexobs3( if not meas or not meas[0].strip(): meas = None - + """ Nsvsys may need updating as GNSS systems grow. Let us know if you needed to change them. @@ -72,45 +72,45 @@ def rinexobs3( Galileo is 36 max """ Nsvsys = 36 - + # %% allocate if fast: - times = obstime3(fn) + times = np.unique(obstime3(fn)) else: data = xarray.Dataset({}, coords={"time": [], "sv": []}) - + if tlim is not None and not isinstance(tlim[0], datetime): raise TypeError("time bounds are specified as datetime.datetime") last_epoch = None # %% loop with opener(fn) as f: - + if fast: - hdr = obsheader3(f,use) - + hdr = obsheader3(f,use) + obl = [] for sk in hdr["fields"]: obl=obl+hdr["fields"][sk] obl = np.unique(np.array(obl)) obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] - + if meas is not None: obl = obl[np.any([np.char.find(obl,j)==0 for i, j in enumerate(meas)],0)] - + Nt = times.size Npages = len(obl) if '# OF SATELLITES' in hdr.keys(): Nsv = int(hdr['# OF SATELLITES']) else: Nsv = Nsvsys * len(hdr['fields']) - + svl = np.tile(' ',Nsv) - + data = np.empty((Npages, Nt, Nsv)) data.fill(np.nan) - + if useindicators: data_lli = np.full_like(data,np.nan) data_ssi = np.full_like(data,np.nan) @@ -162,18 +162,18 @@ def rinexobs3( if verbose: print(time, end="\r") - + if fast: for k in sv: # update list of satellites if not k in svl: svl[np.argmax(svl==' ')]=k - + darr = np.atleast_2d( np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) ) - + t = time==times for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) @@ -182,23 +182,22 @@ def rinexobs3( si = [i for i, s in enumerate(sv) if s[0] in sk] if len(si) == 0: # no SV of this system "sk" at this time continue - + gsv = np.array(sv)[si] isv,jsv = np.nonzero(np.logical_and( np.atleast_2d(svl).T == np.atleast_2d(sv), np.isin(sv,gsv)) ) - + for i,j in enumerate(hdr['fields'][sk]): o = obl==j if not np.any(o): continue - - data[o,t,isv]=darr[jsv,i*3] + data[o,t,isv]=darr[jsv,i*3] if useindicators: - data_lli[o,t,isv]=darr[jsv,i*3+1] - data_ssi[o,t,isv]=darr[jsv,i*3+2] + data_lli[o,t,isv]=darr[jsv,i*3+1] + data_ssi[o,t,isv]=darr[jsv,i*3+2] else: # this time epoch is complete, assemble the data. @@ -208,14 +207,14 @@ def rinexobs3( if ' ' in svl: # remove blank satellites (if tlim used) svl=svl[:np.argmax(svl==' ')] - + svl, isv = np.unique(svl,return_index=True) data=data[:,:,isv] if useindicators: data_lli=data_lli[:,:,isv] data_ssi=data_ssi[:,:,isv] - + obs = xarray.Dataset(coords={"sv": svl,"time":times}) for i, k in enumerate(obl): if k is None: @@ -230,13 +229,13 @@ def rinexobs3( #elif k[0] == 'C': # only for code? else: obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) - + obs = obs.dropna(dim="sv", how="all") #obs = obs.dropna(dim="time", how="all") # when tlim specified - + data=obs - - + + # %% patch SV names in case of "G 7" => "G07" data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) # %% other attributes @@ -268,7 +267,7 @@ def rinexobs3( obs.attrs["rxmodel"] = hdr["rxmodel"] if time_offset: data.attrs["time_offset"] = time_offset - + if "RCV CLOCK OFFS APPL" in hdr.keys(): try: data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) @@ -422,31 +421,31 @@ def rinexsystem3( last_epoch = None # %% loop with opener(fn) as f: - + if fast: hdr = obsheader3(f) else: hdr = obsheader3(f, use, meas) - + obl = [] for sk in hdr["fields"]: obl=obl+hdr["fields"][sk] obl = np.unique(np.array(obl)) obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] - - + + if meas is not None: obl = obl[np.isin(obl,meas)] - + Nt = times.size Npages = len(obl) Nsv = int(hdr['# OF SATELLITES']) - + svl = np.tile(' ',Nsv) - + data = np.empty((Npages, Nt, Nsv)) data.fill(np.nan) - + if useindicators: data_lli = np.full_like(data,np.nan) data_ssi = np.full_like(data,np.nan) @@ -495,57 +494,57 @@ def rinexsystem3( if verbose: print(time, end="\r") - + for k in sv: if not k in svl: svl[np.argmax(svl==' ')]=k - + darr = np.atleast_2d( np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) ) - + t = time==times - - + + for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) # satellite indices "si" to extract from this time's measurements si = [i for i, s in enumerate(sv) if s[0] in sk] if len(si) == 0: # no SV of this system "sk" at this time continue - + gsv = np.array(sv)[si] isv = [i for i,s in enumerate(svl) if s in gsv] #isv = np.zeros_like(gsv,dtype=int) - + #for ii,jj in enumerate(gsv): # isv[ii] = np.nonzero(svl==jj) - - + + for i,j in enumerate(hdr['fields'][sk]): o = obl==j #for ii,jj in enumerate(gsv): # data[o,t,svl==jj]=darr[si[ii],i*3] - data[o,t,isv]=darr[si,i*3] + data[o,t,isv]=darr[si,i*3] if useindicators: - data_lli[o,t,isv]=darr[si,i*3+1] - data_ssi[o,t,isv]=darr[si,i*3+2] - - - - - + data_lli[o,t,isv]=darr[si,i*3+1] + data_ssi[o,t,isv]=darr[si,i*3+2] + + + + + if ' ' in svl: svl=svl[:np.argmax(svl==' ')] - + svl, isv = np.unique(svl,return_index=True) - + data=data[:,:,isv] if useindicators: data_lli=data_lli[:,:,isv] data_ssi=data_ssi[:,:,isv] - + obs = xarray.Dataset(coords={"time":times,"sv": svl}) for i, k in enumerate(obl): # FIXME: for limited time span reads, this drops unused data variables @@ -560,11 +559,11 @@ def rinexsystem3( obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) elif k[0] == 'C': obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) - - + + obs = obs.dropna(dim="sv", how="all") obs = obs.dropna(dim="time", how="all") # when tlim specified - + data=obs # %% patch SV names in case of "G 7" => "G07" data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) From 0c6b3d2e8f6acac92645fef5615877c69a04f5bf Mon Sep 17 00:00:00 2001 From: Ben Date: Fri, 4 Nov 2022 10:30:03 -0300 Subject: [PATCH 10/19] actual bugfix --- src/georinex/obs3.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index b3717ec..45151b5 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -75,7 +75,7 @@ def rinexobs3( # %% allocate if fast: - times = np.unique(obstime3(fn)) + times = obstime3(fn) else: data = xarray.Dataset({}, coords={"time": [], "sv": []}) @@ -194,10 +194,11 @@ def rinexobs3( o = obl==j if not np.any(o): continue - data[o,t,isv]=darr[jsv,i*3] + data[np.ix_(o,t,isv)]=darr[jsv,i*3] + if useindicators: - data_lli[o,t,isv]=darr[jsv,i*3+1] - data_ssi[o,t,isv]=darr[jsv,i*3+2] + data_lli[np.ix_(o,t,isv)]=darr[jsv,i*3+1] + data_ssi[np.ix_(o,t,isv)]=darr[jsv,i*3+2] else: # this time epoch is complete, assemble the data. From ef75b05798b8ab5371488dfc37d6ad5f82d295ee Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 8 Dec 2022 15:20:29 -0400 Subject: [PATCH 11/19] code formatting cleanup --- src/georinex/obs3.py | 187 ++++++++++++++++++++----------------------- 1 file changed, 85 insertions(+), 102 deletions(-) diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index 45151b5..1e98e4d 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -86,18 +86,18 @@ def rinexobs3( # %% loop with opener(fn) as f: - if fast: - hdr = obsheader3(f,use) + hdr = obsheader3(f, use) obl = [] for sk in hdr["fields"]: - obl=obl+hdr["fields"][sk] + obl = obl + hdr["fields"][sk] obl = np.unique(np.array(obl)) obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] if meas is not None: - obl = obl[np.any([np.char.find(obl,j)==0 for i, j in enumerate(meas)],0)] + obl = obl[np.any([np.char.find(obl, j) == 0 + for i, j in enumerate(meas)], 0)] Nt = times.size Npages = len(obl) @@ -106,20 +106,19 @@ def rinexobs3( else: Nsv = Nsvsys * len(hdr['fields']) - svl = np.tile(' ',Nsv) + svl = np.tile(' ', Nsv) data = np.empty((Npages, Nt, Nsv)) data.fill(np.nan) if useindicators: - data_lli = np.full_like(data,np.nan) - data_ssi = np.full_like(data,np.nan) + data_lli = np.full_like(data, np.nan) + data_ssi = np.full_like(data, np.nan) else: hdr = obsheader3(f, use, meas) - - # %% process OBS file + # process OBS file time_offset = [] for ln in f: if not ln.startswith(">"): # end of file @@ -166,15 +165,14 @@ def rinexobs3( if fast: for k in sv: # update list of satellites - if not k in svl: - svl[np.argmax(svl==' ')]=k - + if k not in svl: + svl[np.argmax(svl == ' ')] = k darr = np.atleast_2d( - np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) - ) + np.genfromtxt(io.BytesIO(raw.encode("ascii")), + delimiter=(14, 1, 1) * hdr["Fmax"])) - t = time==times + t = time == times for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) # satellite indices "si" to extract from this time's measurements @@ -185,60 +183,60 @@ def rinexobs3( gsv = np.array(sv)[si] - isv,jsv = np.nonzero(np.logical_and( + isv, jsv = np.nonzero(np.logical_and( np.atleast_2d(svl).T == np.atleast_2d(sv), - np.isin(sv,gsv)) ) + np.isin(sv, gsv)) ) - - for i,j in enumerate(hdr['fields'][sk]): - o = obl==j + for i, j in enumerate(hdr['fields'][sk]): + o = obl == j if not np.any(o): continue - data[np.ix_(o,t,isv)]=darr[jsv,i*3] + data[np.ix_(o, t, isv)] = darr[jsv, i*3] if useindicators: - data_lli[np.ix_(o,t,isv)]=darr[jsv,i*3+1] - data_ssi[np.ix_(o,t,isv)]=darr[jsv,i*3+2] + data_lli[np.ix_(o, t, isv)] = darr[jsv, i*3+1] + data_ssi[np.ix_(o, t, isv)] = darr[jsv, i*3+2] else: # this time epoch is complete, assemble the data. data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) if fast: - if ' ' in svl: - # remove blank satellites (if tlim used) - svl=svl[:np.argmax(svl==' ')] - - svl, isv = np.unique(svl,return_index=True) - - data=data[:,:,isv] - if useindicators: - data_lli=data_lli[:,:,isv] - data_ssi=data_ssi[:,:,isv] - - obs = xarray.Dataset(coords={"sv": svl,"time":times}) - for i, k in enumerate(obl): - if k is None: - continue - elif np.all(np.isnan(data[i, :, :])): # drop all nan datasets like tests expect - continue - obs[k] = (("time", "sv"), data[i, :, :]) - if useindicators: - if k[0] == 'L': - obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) - obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) - #elif k[0] == 'C': # only for code? - else: - obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) - - obs = obs.dropna(dim="sv", how="all") - #obs = obs.dropna(dim="time", how="all") # when tlim specified - - data=obs + if ' ' in svl: + # remove blank satellites (if tlim used) + svl = svl[:np.argmax(svl == ' ')] + + svl, isv = np.unique(svl, return_index=True) + data = data[:, :, isv] + if useindicators: + data_lli = data_lli[:, :, isv] + data_ssi = data_ssi[:, :, isv] + + obs = xarray.Dataset(coords={"sv": svl, "time": times}) + for i, k in enumerate(obl): + if k is None: + continue + elif np.all(np.isnan(data[i, :, :])): + # drop all nan datasets like tests expect + continue + obs[k] = (("time", "sv"), data[i, :, :]) + if useindicators: + if k[0] == 'L': + obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + # elif k[0] == 'C': # only for code? + else: + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + + obs = obs.dropna(dim="sv", how="all") + # obs = obs.dropna(dim="time", how="all") # when tlim specified + + data = obs # %% patch SV names in case of "G 7" => "G07" - data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) + data = data.assign_coords(sv=[s.replace(" ", "0") + for s in data.sv.values.tolist()]) # %% other attributes data.attrs["version"] = hdr["version"] @@ -248,7 +246,8 @@ def rinexobs3( elif "time" in data.coords.keys(): # median is robust against gaps try: - data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) + data.attrs["interval"] = np.median(np.diff(data.time) + / np.timedelta64(1, "s")) except TypeError: pass else: @@ -333,9 +332,9 @@ def _epoch( block processing of each epoch (time step) """ darr = np.atleast_2d( - np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) - ) - #data = xarray.Dataset({}, coords={"time": [], "sv": []}) + np.genfromtxt(io.BytesIO(raw.encode("ascii")), + delimiter=(14, 1, 1) * hdr["Fmax"])) + # data = xarray.Dataset({}, coords={"time": [], "sv": []}) # %% assign data for each time step for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) # satellite indices "si" to extract from this time's measurements @@ -355,7 +354,7 @@ def _epoch( dsf[k] = (("time", "sv"), np.atleast_2d(garr[:, i * 3])) if useindicators: - dsf = _indicators(dsf, k, garr[:, i * 3 + 1 : i * 3 + 3]) + dsf = _indicators(dsf, k, garr[:, i * 3 + 1: i * 3 + 3]) if verbose: print(time, "\r", end="") @@ -363,7 +362,7 @@ def _epoch( epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv}) if len(data) == 0: data = epoch_data - #data = xarray.merge((data, epoch_data)) + # data = xarray.merge((data, epoch_data)) elif len(hdr["fields"]) == 1: # one satellite system selected, faster to process data = xarray.concat((data, epoch_data), dim="time") else: # general case, slower for different satellite systems all together @@ -371,6 +370,7 @@ def _epoch( return data + def rinexsystem3( fn: T.TextIO | Path, use: set[str] = None, @@ -415,7 +415,7 @@ def rinexsystem3( meas = None # %% allocate times = obstime3(fn) - #data = xarray.Dataset({}, coords={"time": [], "sv": []}) + # data = xarray.Dataset({}, coords={"time": [], "sv": []}) if tlim is not None and not isinstance(tlim[0], datetime): raise TypeError("time bounds are specified as datetime.datetime") @@ -430,26 +430,25 @@ def rinexsystem3( obl = [] for sk in hdr["fields"]: - obl=obl+hdr["fields"][sk] + obl = obl + hdr["fields"][sk] obl = np.unique(np.array(obl)) obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] - if meas is not None: - obl = obl[np.isin(obl,meas)] + obl = obl[np.isin(obl, meas)] Nt = times.size Npages = len(obl) Nsv = int(hdr['# OF SATELLITES']) - svl = np.tile(' ',Nsv) + svl = np.tile(' ', Nsv) data = np.empty((Npages, Nt, Nsv)) data.fill(np.nan) if useindicators: - data_lli = np.full_like(data,np.nan) - data_ssi = np.full_like(data,np.nan) + data_lli = np.full_like(data, np.nan) + data_ssi = np.full_like(data, np.nan) # %% process OBS file time_offset = [] @@ -495,19 +494,15 @@ def rinexsystem3( if verbose: print(time, end="\r") - for k in sv: - if not k in svl: - svl[np.argmax(svl==' ')]=k - - + if k not in svl: + svl[np.argmax(svl == ' ')] = k darr = np.atleast_2d( - np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) - ) - - t = time==times + np.genfromtxt(io.BytesIO(raw.encode("ascii")), + delimiter=(14, 1, 1) * hdr["Fmax"])) + t = time == times for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) # satellite indices "si" to extract from this time's measurements @@ -516,37 +511,26 @@ def rinexsystem3( continue gsv = np.array(sv)[si] - isv = [i for i,s in enumerate(svl) if s in gsv] - #isv = np.zeros_like(gsv,dtype=int) + isv = [i for i, s in enumerate(svl) if s in gsv] - #for ii,jj in enumerate(gsv): - # isv[ii] = np.nonzero(svl==jj) - - - for i,j in enumerate(hdr['fields'][sk]): - o = obl==j - #for ii,jj in enumerate(gsv): - # data[o,t,svl==jj]=darr[si[ii],i*3] - data[o,t,isv]=darr[si,i*3] + for i, j in enumerate(hdr['fields'][sk]): + o = obl == j + data[o, t, isv]=darr[si,i*3] if useindicators: - data_lli[o,t,isv]=darr[si,i*3+1] - data_ssi[o,t,isv]=darr[si,i*3+2] - - - - + data_lli[o, t, isv]=darr[si, i*3+1] + data_ssi[o, t, isv]=darr[si, i*3+2] if ' ' in svl: - svl=svl[:np.argmax(svl==' ')] + svl = svl[:np.argmax(svl == ' ')] - svl, isv = np.unique(svl,return_index=True) + svl, isv = np.unique(svl, return_index=True) - data=data[:,:,isv] + data=data[:, :, isv] if useindicators: - data_lli=data_lli[:,:,isv] - data_ssi=data_ssi[:,:,isv] + data_lli = data_lli[:, :, isv] + data_ssi = data_ssi[:, :, isv] - obs = xarray.Dataset(coords={"time":times,"sv": svl}) + obs = xarray.Dataset(coords={"time": times, "sv": svl}) for i, k in enumerate(obl): # FIXME: for limited time span reads, this drops unused data variables # if np.isnan(data[i, ...]).all(): @@ -561,11 +545,10 @@ def rinexsystem3( elif k[0] == 'C': obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) - obs = obs.dropna(dim="sv", how="all") obs = obs.dropna(dim="time", how="all") # when tlim specified - data=obs + data = obs # %% patch SV names in case of "G 7" => "G07" data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) # %% other attributes @@ -716,7 +699,7 @@ def obsheader3(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dic fields[sk] = np.array(fields[sk])[ind].tolist() sysind[sk] = np.empty(Fmax * 3, dtype=bool) # *3 due to LLI, SSI for j, i in enumerate(ind): - sysind[sk][j * 3 : j * 3 + 3] = i + sysind[sk][j * 3: j * 3 + 3] = i else: sysind = {k: np.s_[:] for k in fields} From e7fd313fb713109c3c05781296fed06b088deadc Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 8 Dec 2022 17:35:18 -0400 Subject: [PATCH 12/19] fast v3 loads to memory --- src/georinex/obs3.py | 13 ++++++++++--- src/georinex/rio.py | 6 +++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index 1e98e4d..0563753 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -44,7 +44,7 @@ def rinexobs3( useindicators: SSI, LLI are output meas: 'L1C' or ['L1C', 'C1C'] or similar - fast: Still double-reading file to get times + fast: Loads entire file into memory as a string Uses # OF SATELLITES from header to try and size nparray, falls back to Nsvsys if missing if false, uses old _epoch method @@ -73,8 +73,15 @@ def rinexobs3( """ Nsvsys = 36 + if isinstance(fn, Path): + file_name = fn + else: + file_name = None + # %% allocate if fast: + with opener(fn) as f: + fn = [ln for ln in f] times = obstime3(fn) else: data = xarray.Dataset({}, coords={"time": [], "sv": []}) @@ -256,8 +263,8 @@ def rinexobs3( data.attrs["rinextype"] = "obs" data.attrs["fast_processing"] = int(fast) # bool is not allowed in NetCDF4 data.attrs["time_system"] = determine_time_system(hdr) - if isinstance(fn, Path): - data.attrs["filename"] = fn.name + if isinstance(file_name, Path): + data.attrs["filename"] = file_name.name if "position" in hdr.keys(): data.attrs["position"] = hdr["position"] diff --git a/src/georinex/rio.py b/src/georinex/rio.py index 4787428..5bcedc6 100644 --- a/src/georinex/rio.py +++ b/src/georinex/rio.py @@ -20,7 +20,7 @@ @contextmanager -def opener(fn: T.TextIO | Path, header: bool = False) -> T.Iterator[T.TextIO]: +def opener(fn: T.TextIO | Path | list, header: bool = False) -> T.Iterator[T.TextIO]: """provides file handle for regular ASCII or gzip files transparently""" if isinstance(fn, str): fn = Path(fn).expanduser() @@ -28,6 +28,10 @@ def opener(fn: T.TextIO | Path, header: bool = False) -> T.Iterator[T.TextIO]: if isinstance(fn, io.StringIO): fn.seek(0) yield fn + elif isinstance(fn, list): + f = ''.join(fn) + fn = io.StringIO(f) + yield fn elif isinstance(fn, Path): # need to have this check for Windows if not fn.is_file(): From c35576f59568a9dd22bafc318242eabe789671b6 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 18 Jan 2023 19:13:38 -0400 Subject: [PATCH 13/19] limited calls to genfromtxt for 2x speed --- src/georinex/obs3.py | 209 +++++++++++++++++++------------------------ 1 file changed, 93 insertions(+), 116 deletions(-) diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index 0563753..eb0b614 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -45,8 +45,7 @@ def rinexobs3( meas: 'L1C' or ['L1C', 'C1C'] or similar fast: Loads entire file into memory as a string - Uses # OF SATELLITES from header to try and size nparray, falls back - to Nsvsys if missing + TODO: time interval, time offset, etc. if false, uses old _epoch method interval: allows decimating file read by time e.g. every 5 seconds. @@ -64,15 +63,6 @@ def rinexobs3( if not meas or not meas[0].strip(): meas = None - """ - Nsvsys may need updating as GNSS systems grow. - Let us know if you needed to change them. - - Beidou is 35 max - Galileo is 36 max - """ - Nsvsys = 36 - if isinstance(fn, Path): file_name = fn else: @@ -108,138 +98,125 @@ def rinexobs3( Nt = times.size Npages = len(obl) - if '# OF SATELLITES' in hdr.keys(): - Nsv = int(hdr['# OF SATELLITES']) - else: - Nsv = Nsvsys * len(hdr['fields']) - svl = np.tile(' ', Nsv) + f = np.array([f for f in f]) # don't need opener anymore + sv = np.array([ln[:3] for ln in f]) + (usv, isv) = np.unique(sv, return_inverse=True) # get all sv lines + + # list of epoch numbers per datum + timeIndex = np.cumsum(isv == + np.flatnonzero(np.char.rfind(usv, '>') == 0) + )-1 - data = np.empty((Npages, Nt, Nsv)) - data.fill(np.nan) + # usv contains epoch entries too, get only sv + svl = usv[np.logical_not(np.char.startswith(usv, '>'))] + Nsv = svl.size + + data = np.full((Npages, Nt, Nsv), np.nan) if useindicators: data_lli = np.full_like(data, np.nan) data_ssi = np.full_like(data, np.nan) - else: - hdr = obsheader3(f, use, meas) - - # process OBS file - time_offset = [] - for ln in f: - if not ln.startswith(">"): # end of file - break - - try: - time = _timeobs(ln) - except ValueError: # garbage between header and RINEX data - logging.debug(f"garbage detected in {fn}, trying to parse at next time step") - continue - - try: - time_offset.append(float(ln[41:56])) - except ValueError: - pass - # %% get SV indices - sv = [] - raw = "" - # Number of visible satellites this time %i3 pg. A13 - for _ in range(int(ln[33:35])): - ln = f.readline() - if use is None or ln[0] in use: - sv.append(ln[:3]) - raw += ln[3:] + time_offset = [] # TODO: fill this list - if tlim is not None: - if time < tlim[0]: + for i, sv in enumerate(usv): + if sv[0] == '>': + # time offset processing here + continue + elif use is not None and sv[0] not in use: continue - elif time > tlim[1]: - break - - if interval is not None: - if last_epoch is None: # initialization - last_epoch = time - else: - if time - last_epoch < interval: - continue - else: - last_epoch += interval - - if verbose: - print(time, end="\r") - - if fast: - for k in sv: - # update list of satellites - if k not in svl: - svl[np.argmax(svl == ' ')] = k + # genfromtext is bottleneck, limit calls this way darr = np.atleast_2d( - np.genfromtxt(io.BytesIO(raw.encode("ascii")), + np.genfromtxt(np.char.lstrip(f[isv == i], sv), delimiter=(14, 1, 1) * hdr["Fmax"])) - t = time == times + t_i = timeIndex[isv == i] - for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) - # satellite indices "si" to extract from this time's measurements + # jsv == i-1 except for files spanning 1999-2000 + jsv = svl == sv - si = [i for i, s in enumerate(sv) if s[0] in sk] - if len(si) == 0: # no SV of this system "sk" at this time + for j, jobl in enumerate(hdr['fields'][sv[0]]): + o = obl == jobl + if not np.any(o): continue - gsv = np.array(sv)[si] + data[o, t_i, jsv] = darr[:, j*3] - isv, jsv = np.nonzero(np.logical_and( - np.atleast_2d(svl).T == np.atleast_2d(sv), - np.isin(sv, gsv)) ) + if useindicators: + data_lli[np.ix_(o, t_i, jsv)] = darr[:, j*3+1] + data_ssi[np.ix_(o, t_i, jsv)] = darr[:, j*3+2] - for i, j in enumerate(hdr['fields'][sk]): - o = obl == j - if not np.any(o): - continue - data[np.ix_(o, t, isv)] = darr[jsv, i*3] + obs = xarray.Dataset(coords={"sv": svl, "time": times}) + for i, k in enumerate(obl): + if k is None: + continue + elif np.all(np.isnan(data[i, :, :])): + # drop all nan datasets like tests expect + continue + obs[k] = (("time", "sv"), data[i, :, :]) + if useindicators: + if k[0] == 'L': + obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + # elif k[0] == 'C': # only for code? + else: + obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) - if useindicators: - data_lli[np.ix_(o, t, isv)] = darr[jsv, i*3+1] - data_ssi[np.ix_(o, t, isv)] = darr[jsv, i*3+2] + obs = obs.dropna(dim="sv", how="all") + # obs = obs.dropna(dim="time", how="all") # when tlim specified - else: - # this time epoch is complete, assemble the data. - data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) + data = obs - if fast: - if ' ' in svl: - # remove blank satellites (if tlim used) - svl = svl[:np.argmax(svl == ' ')] + else: + hdr = obsheader3(f, use, meas) + # process OBS file + time_offset = [] + for ln in f: + if not ln.startswith(">"): # end of file + break - svl, isv = np.unique(svl, return_index=True) + try: + time = _timeobs(ln) + except ValueError: # garbage between header and RINEX data + logging.debug(f"garbage detected in {fn}, trying to parse at next time step") + continue - data = data[:, :, isv] - if useindicators: - data_lli = data_lli[:, :, isv] - data_ssi = data_ssi[:, :, isv] + try: + time_offset.append(float(ln[41:56])) + except ValueError: + pass + # %% get SV indices + sv = [] + raw = "" + # Number of visible satellites this time %i3 pg. A13 + for _ in range(int(ln[33:35])): + ln = f.readline() + if use is None or ln[0] in use: + sv.append(ln[:3]) + raw += ln[3:] + + if tlim is not None: + if time < tlim[0]: + continue + elif time > tlim[1]: + break - obs = xarray.Dataset(coords={"sv": svl, "time": times}) - for i, k in enumerate(obl): - if k is None: - continue - elif np.all(np.isnan(data[i, :, :])): - # drop all nan datasets like tests expect - continue - obs[k] = (("time", "sv"), data[i, :, :]) - if useindicators: - if k[0] == 'L': - obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) - obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) - # elif k[0] == 'C': # only for code? - else: - obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + if interval is not None: + if last_epoch is None: # initialization + last_epoch = time + else: + if time - last_epoch < interval: + continue + else: + last_epoch += interval - obs = obs.dropna(dim="sv", how="all") - # obs = obs.dropna(dim="time", how="all") # when tlim specified + if verbose: + print(time, end="\r") - data = obs + # this time epoch is complete, assemble the data. + data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) # %% patch SV names in case of "G 7" => "G07" data = data.assign_coords(sv=[s.replace(" ", "0") From 5f50d9db1fc702d321ad2c8c2bb6c2be2ed30b78 Mon Sep 17 00:00:00 2001 From: breid-phys Date: Tue, 27 Jun 2023 12:41:34 -0300 Subject: [PATCH 14/19] fixed datetime64 issue --- src/georinex/obs2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py index 5dee6db..e468d92 100644 --- a/src/georinex/obs2.py +++ b/src/georinex/obs2.py @@ -36,7 +36,7 @@ def rinexobs2( if not use: use = {"C", "E", "G", "J", "R", "S"} - obs = xarray.Dataset({}, coords={"time": np.array([],dtype=np.datetime64), "sv": []}) + obs = xarray.Dataset({}, coords={"time": np.array([],dtype='datetime64[ns]'), "sv": []}) attrs: dict[str, T.Any] = {} for u in use: o = rinexsystem2( From 0d2f69ba6f41a2e68746077dea43114fed35a698 Mon Sep 17 00:00:00 2001 From: breid-phys Date: Thu, 29 Jun 2023 10:35:04 -0300 Subject: [PATCH 15/19] fixed bug with nav errors --- channeldata.json | 7 +++ index.html | 83 +++++++++++++++++++++++++ noarch/current_repodata.json | 9 +++ noarch/current_repodata.json.bz2 | Bin 0 -> 126 bytes noarch/index.html | 82 ++++++++++++++++++++++++ noarch/repodata.json | 9 +++ noarch/repodata.json.bz2 | Bin 0 -> 126 bytes noarch/repodata_from_packages.json | 9 +++ noarch/repodata_from_packages.json.bz2 | Bin 0 -> 126 bytes 9 files changed, 199 insertions(+) create mode 100644 channeldata.json create mode 100644 index.html create mode 100644 noarch/current_repodata.json create mode 100644 noarch/current_repodata.json.bz2 create mode 100644 noarch/index.html create mode 100644 noarch/repodata.json create mode 100644 noarch/repodata.json.bz2 create mode 100644 noarch/repodata_from_packages.json create mode 100644 noarch/repodata_from_packages.json.bz2 diff --git a/channeldata.json b/channeldata.json new file mode 100644 index 0000000..fdd2409 --- /dev/null +++ b/channeldata.json @@ -0,0 +1,7 @@ +{ + "channeldata_version": 1, + "packages": {}, + "subdirs": [ + "noarch" + ] +} diff --git a/index.html b/index.html new file mode 100644 index 0000000..3eda6ed --- /dev/null +++ b/index.html @@ -0,0 +1,83 @@ + + + georinex + + + +

georinex

+

RSS Feed   channeldata.json

+noarch    + + + + + + + + +
PackageLatest VersionDocDevLicensenoarch Summary
+
Updated: 2023-06-08 14:39:19 +0000 - Files: 0
+ + \ No newline at end of file diff --git a/noarch/current_repodata.json b/noarch/current_repodata.json new file mode 100644 index 0000000..c8a47d1 --- /dev/null +++ b/noarch/current_repodata.json @@ -0,0 +1,9 @@ +{ + "info": { + "subdir": "noarch" + }, + "packages": {}, + "packages.conda": {}, + "removed": [], + "repodata_version": 1 +} diff --git a/noarch/current_repodata.json.bz2 b/noarch/current_repodata.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..34499176b4ca978123fa4d2b1d85533774f5f867 GIT binary patch literal 126 zcmV-^0D=EPT4*^jL0KkKSsDu+S^xk%TYx|iPz4|m01ChB-wGfAbQ+22Hj(KDgFwch z$wt)m10m`FG$`~a3eYtYm7oYhXV(wUZotdp>j?sm)=p!=7h6LU_DABFZAI`Dme~+Q g7Jaajjb0$+AWY8UiY)tU6j$+gBvXY60idzfpkos=Q~&?~ literal 0 HcmV?d00001 diff --git a/noarch/index.html b/noarch/index.html new file mode 100644 index 0000000..3610ed4 --- /dev/null +++ b/noarch/index.html @@ -0,0 +1,82 @@ + + + georinex/noarch + + + +

georinex/noarch

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FilenameSizeLast ModifiedSHA256MD5
repodata.json127 B2023-06-08 14:39:19 +0000b546412dc20b790c5f9c223e394ff0e39a27ba12a99631ca4d1bb6c0ca3bd05c917338f97423c09c911618661fef3056
repodata.json.bz2126 B2023-06-08 14:39:19 +000032d48c11b6d5ee61a54a9076f8c06a62ba795f69b671db3e44b1301fa0efae0e1ecb5b301b9a2ba831f0a68a7ace5f00
repodata_from_packages.json127 B2023-06-08 14:39:19 +0000b546412dc20b790c5f9c223e394ff0e39a27ba12a99631ca4d1bb6c0ca3bd05c917338f97423c09c911618661fef3056
repodata_from_packages.json.bz2126 B2023-06-08 14:39:19 +000032d48c11b6d5ee61a54a9076f8c06a62ba795f69b671db3e44b1301fa0efae0e1ecb5b301b9a2ba831f0a68a7ace5f00
+
Updated: 2023-06-08 14:39:19 +0000 - Files: 0
+ + \ No newline at end of file diff --git a/noarch/repodata.json b/noarch/repodata.json new file mode 100644 index 0000000..c8a47d1 --- /dev/null +++ b/noarch/repodata.json @@ -0,0 +1,9 @@ +{ + "info": { + "subdir": "noarch" + }, + "packages": {}, + "packages.conda": {}, + "removed": [], + "repodata_version": 1 +} diff --git a/noarch/repodata.json.bz2 b/noarch/repodata.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..34499176b4ca978123fa4d2b1d85533774f5f867 GIT binary patch literal 126 zcmV-^0D=EPT4*^jL0KkKSsDu+S^xk%TYx|iPz4|m01ChB-wGfAbQ+22Hj(KDgFwch z$wt)m10m`FG$`~a3eYtYm7oYhXV(wUZotdp>j?sm)=p!=7h6LU_DABFZAI`Dme~+Q g7Jaajjb0$+AWY8UiY)tU6j$+gBvXY60idzfpkos=Q~&?~ literal 0 HcmV?d00001 diff --git a/noarch/repodata_from_packages.json b/noarch/repodata_from_packages.json new file mode 100644 index 0000000..c8a47d1 --- /dev/null +++ b/noarch/repodata_from_packages.json @@ -0,0 +1,9 @@ +{ + "info": { + "subdir": "noarch" + }, + "packages": {}, + "packages.conda": {}, + "removed": [], + "repodata_version": 1 +} diff --git a/noarch/repodata_from_packages.json.bz2 b/noarch/repodata_from_packages.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..34499176b4ca978123fa4d2b1d85533774f5f867 GIT binary patch literal 126 zcmV-^0D=EPT4*^jL0KkKSsDu+S^xk%TYx|iPz4|m01ChB-wGfAbQ+22Hj(KDgFwch z$wt)m10m`FG$`~a3eYtYm7oYhXV(wUZotdp>j?sm)=p!=7h6LU_DABFZAI`Dme~+Q g7Jaajjb0$+AWY8UiY)tU6j$+gBvXY60idzfpkos=Q~&?~ literal 0 HcmV?d00001 From 9d283613567cb0e20df690fa85c510cce7ae584f Mon Sep 17 00:00:00 2001 From: breid-phys Date: Thu, 29 Jun 2023 16:21:12 -0300 Subject: [PATCH 16/19] fixes for file reading edge cases --- src/georinex/obs3.py | 5 +- src/georinex/rio.py | 121 ++++++++++++++++++++++++++----------------- 2 files changed, 76 insertions(+), 50 deletions(-) diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index eb0b614..0669331 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -99,7 +99,8 @@ def rinexobs3( Nt = times.size Npages = len(obl) - f = np.array([f for f in f]) # don't need opener anymore + f = np.array([f for f in f if f[1:3] != ' ']) # don't need opener anymore + sv = np.array([ln[:3] for ln in f]) (usv, isv) = np.unique(sv, return_inverse=True) # get all sv lines @@ -121,7 +122,7 @@ def rinexobs3( time_offset = [] # TODO: fill this list for i, sv in enumerate(usv): - if sv[0] == '>': + if sv[0] == '>' or sv[2] == ' ': # time offset processing here continue elif use is not None and sv[0] not in use: diff --git a/src/georinex/rio.py b/src/georinex/rio.py index 5bcedc6..cbd6d25 100644 --- a/src/georinex/rio.py +++ b/src/georinex/rio.py @@ -20,7 +20,7 @@ @contextmanager -def opener(fn: T.TextIO | Path | list, header: bool = False) -> T.Iterator[T.TextIO]: +def opener(fn: T.TextIO | Path | list, header: bool = False, encoding="ascii") -> T.Iterator[T.TextIO]: """provides file handle for regular ASCII or gzip files transparently""" if isinstance(fn, str): fn = Path(fn).expanduser() @@ -37,45 +37,15 @@ def opener(fn: T.TextIO | Path | list, header: bool = False) -> T.Iterator[T.Tex if not fn.is_file(): raise FileNotFoundError(fn) - finf = fn.stat() - if finf.st_size > 100e6: - logging.info(f"opening {finf.st_size/1e6} MByte {fn.name}") - - suffix = fn.suffix.lower() - - if suffix == ".gz": - with gzip.open(fn, "rt") as f: - _, is_crinex = rinex_version(first_nonblank_line(f)) - f.seek(0) - - if is_crinex and not header: - # Conversion to string is necessary because of a quirk where gzip.open() - # even with 'rt' doesn't decompress until read. - f = io.StringIO(crx2rnx(f.read())) - yield f - elif suffix == ".bz2": - # this is for plain bzip2 files, NOT tar.bz2, which requires f.seek(512) - with bz2.open(fn, "rt") as f: - _, is_crinex = rinex_version(first_nonblank_line(f)) - f.seek(0) - - if is_crinex and not header: - f = io.StringIO(crx2rnx(f.read())) - yield f - elif suffix == ".zip": - with zipfile.ZipFile(fn, "r") as z: - flist = z.namelist() - for rinexfn in flist: - with z.open(rinexfn, "r") as bf: - f = io.StringIO( - io.TextIOWrapper(bf, encoding="ascii", errors="ignore").read() # type: ignore - ) - yield f - elif suffix == ".z": - if unlzw is None: - raise ImportError("pip install unlzw3") - with fn.open("rb") as zu: - with io.StringIO(unlzw(zu.read()).decode("ascii")) as f: + try: + finf = fn.stat() + if finf.st_size > 100e6: + logging.info(f"opening {finf.st_size/1e6} MByte {fn.name}") + + suffix = fn.suffix.lower() + + if suffix == ".gz": + with gzip.open(fn, "rt", encoding=encoding) as f: _, is_crinex = rinex_version(first_nonblank_line(f)) f.seek(0) @@ -84,14 +54,69 @@ def opener(fn: T.TextIO | Path | list, header: bool = False) -> T.Iterator[T.Tex # even with 'rt' doesn't decompress until read. f = io.StringIO(crx2rnx(f.read())) yield f - else: # assume not compressed (or Hatanaka) - with fn.open("r", encoding="ascii", errors="ignore") as f: - _, is_crinex = rinex_version(first_nonblank_line(f)) - f.seek(0) - - if is_crinex and not header: - f = io.StringIO(crx2rnx(f)) - yield f + elif suffix == ".bz2": + # this is for plain bzip2 files, NOT tar.bz2, which requires f.seek(512) + with bz2.open(fn, "rt", encoding=encoding) as f: + _, is_crinex = rinex_version(first_nonblank_line(f)) + f.seek(0) + + if is_crinex and not header: + f = io.StringIO(crx2rnx(f.read())) + yield f + elif suffix == ".zip": + with zipfile.ZipFile(fn, "r") as z: + flist = z.namelist() + for rinexfn in flist: + with z.open(rinexfn, "r") as bf: + f = io.StringIO( + io.TextIOWrapper(bf, encoding=encoding, errors="ignore").read() # type: ignore + ) + yield f + elif suffix == ".z": + if unlzw is None: + raise ImportError("pip install unlzw3") + try: + with fn.open("rb") as zu: + with io.StringIO(unlzw(zu.read()).decode(encoding)) as f: + _, is_crinex = rinex_version(first_nonblank_line(f)) + f.seek(0) + + if is_crinex and not header: + # Conversion to string is necessary because of a quirk where gzip.open() + # even with 'rt' doesn't decompress until read. + f = io.StringIO(crx2rnx(f.read())) + yield f + except ValueError as e: + if 'magic bytes' in e.args[0]: + # sometimes .Z files are actually gzipped + with gzip.open(fn, "rt", encoding=encoding) as f: + _, is_crinex = rinex_version(first_nonblank_line(f)) + f.seek(0) + + if is_crinex and not header: + # Conversion to string is necessary because of a quirk where gzip.open() + # even with 'rt' doesn't decompress until read. + f = io.StringIO(crx2rnx(f.read())) + yield f + + else: # assume not compressed (or Hatanaka) + with fn.open("r", encoding=encoding, errors="ignore") as f: + _, is_crinex = rinex_version(first_nonblank_line(f)) + f.seek(0) + + if is_crinex and not header: + f = io.StringIO(crx2rnx(f)) + yield f + except UnicodeDecodeError as e: + # some RINEX files have awkward encoding + if encoding == "ascii": + with opener(fn, header, encoding="latin-1") as f: + yield f + elif encoding == "latin-1": + with opener(fn, header, encoding="utf-8") as f: + yield f + else: + raise(e) else: raise OSError(f"Unsure what to do with input of type: {type(fn)}") From 66551247ebe4a32ff549c9df50f2e62bf92d545d Mon Sep 17 00:00:00 2001 From: breid-phys Date: Wed, 5 Jul 2023 18:17:32 -0300 Subject: [PATCH 17/19] added regex to reject bad potential sv --- src/georinex/obs3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index 0669331..c0f5aa4 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -5,6 +5,7 @@ from datetime import datetime, timedelta import io import xarray +import re import typing as T try: @@ -122,7 +123,7 @@ def rinexobs3( time_offset = [] # TODO: fill this list for i, sv in enumerate(usv): - if sv[0] == '>' or sv[2] == ' ': + if re.match(r'[A-Z][ \d]\d', sv) is None: # time offset processing here continue elif use is not None and sv[0] not in use: From a8579a2c36d14474895cf921101c5ddc3b17510c Mon Sep 17 00:00:00 2001 From: breid-phys Date: Thu, 5 Oct 2023 18:22:14 -0300 Subject: [PATCH 18/19] fixed SBAS error for v2 fast code --- src/georinex/obs2.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py index e468d92..0f53448 100644 --- a/src/georinex/obs2.py +++ b/src/georinex/obs2.py @@ -29,14 +29,13 @@ def rinexobs2( fast: bool = True, interval: float | int | timedelta = None, ) -> xarray.Dataset: - if isinstance(use, str): use = {use} if not use: use = {"C", "E", "G", "J", "R", "S"} - obs = xarray.Dataset({}, coords={"time": np.array([],dtype='datetime64[ns]'), "sv": []}) + obs = xarray.Dataset({}, coords={"time": np.array([], dtype="datetime64[ns]"), "sv": []}) attrs: dict[str, T.Any] = {} for u in use: o = rinexsystem2( @@ -233,7 +232,10 @@ def rinexsystem2( assert darr.shape[0] == gsv.size # %% select only "used" satellites - isv = [int(s[1:]) - 1 for s in gsv] + if system == "S": + isv = range(len(gsv)) + else: + isv = [int(s[1:]) - 1 for s in gsv] for i, k in enumerate(hdr["fields_ind"]): if useindicators: @@ -263,9 +265,12 @@ def rinexsystem2( else: fields.extend([None, None]) - obs = xarray.Dataset( - coords={"time": times, "sv": [f"{system}{i:02d}" for i in range(1, Nsvsys + 1)]} - ) + if system == "S": + obs = xarray.Dataset(coords={"time": times, "sv": gsv}) + else: + obs = xarray.Dataset( + coords={"time": times, "sv": [f"{system}{i:02d}" for i in range(1, Nsvsys + 1)]} + ) for i, k in enumerate(fields): # FIXME: for limited time span reads, this drops unused data variables @@ -273,7 +278,7 @@ def rinexsystem2( # continue if k is None: continue - obs[k] = (("time", "sv"), data[i, :, :]) + obs[k] = (("time", "sv"), data[i, :, : len(obs["sv"])]) obs = obs.dropna(dim="sv", how="all") obs = obs.dropna(dim="time", how="all") # when tlim specified @@ -557,7 +562,6 @@ def _timehdr(ln: str) -> datetime: def _timeobs(ln: str) -> datetime: - year = int(ln[1:3]) if year < 80: year += 2000 From bb1310d8331136e8cfe24b93ec6f33725ec44f16 Mon Sep 17 00:00:00 2001 From: breid-phys Date: Tue, 17 Oct 2023 11:35:56 -0300 Subject: [PATCH 19/19] added support for rinex v4 --- src/georinex/base.py | 12 ++ src/georinex/obs3.py | 93 ++++----- src/georinex/obs4.py | 474 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 530 insertions(+), 49 deletions(-) create mode 100644 src/georinex/obs4.py diff --git a/src/georinex/base.py b/src/georinex/base.py index 5d419d6..fc26e56 100644 --- a/src/georinex/base.py +++ b/src/georinex/base.py @@ -8,6 +8,7 @@ from .rio import rinexinfo from .obs2 import rinexobs2 from .obs3 import rinexobs3 +from .obs4 import rinexobs4 from .nav2 import rinexnav2 from .nav3 import rinexnav3 from .sp3 import load_sp3 @@ -235,6 +236,17 @@ def rinexobs( fast=fast, interval=interval, ) + elif int(info["version"]) == 4: + obs = rinexobs4( + fn, + use, + tlim=tlim, + useindicators=useindicators, + meas=meas, + verbose=verbose, + fast=fast, + interval=interval, + ) else: raise ValueError(f"unknown RINEX {info} {fn}") diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index c0f5aa4..77ed12a 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -83,7 +83,6 @@ def rinexobs3( last_epoch = None # %% loop with opener(fn) as f: - if fast: hdr = obsheader3(f, use) @@ -91,27 +90,24 @@ def rinexobs3( for sk in hdr["fields"]: obl = obl + hdr["fields"][sk] obl = np.unique(np.array(obl)) - obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] + obl = obl[np.argsort([i[1:] + i[0] for i in iter(obl)])] if meas is not None: - obl = obl[np.any([np.char.find(obl, j) == 0 - for i, j in enumerate(meas)], 0)] + obl = obl[np.any([np.char.find(obl, j) == 0 for i, j in enumerate(meas)], 0)] Nt = times.size Npages = len(obl) - f = np.array([f for f in f if f[1:3] != ' ']) # don't need opener anymore + f = np.array([f for f in f if f[1:3] != " "]) # don't need opener anymore sv = np.array([ln[:3] for ln in f]) (usv, isv) = np.unique(sv, return_inverse=True) # get all sv lines # list of epoch numbers per datum - timeIndex = np.cumsum(isv == - np.flatnonzero(np.char.rfind(usv, '>') == 0) - )-1 + timeIndex = np.cumsum(isv == np.flatnonzero(np.char.rfind(usv, ">") == 0)) - 1 # usv contains epoch entries too, get only sv - svl = usv[np.logical_not(np.char.startswith(usv, '>'))] + svl = usv[np.logical_not(np.char.startswith(usv, ">"))] Nsv = svl.size data = np.full((Npages, Nt, Nsv), np.nan) @@ -123,7 +119,7 @@ def rinexobs3( time_offset = [] # TODO: fill this list for i, sv in enumerate(usv): - if re.match(r'[A-Z][ \d]\d', sv) is None: + if re.match(r"[A-Z][ \d]\d", sv) is None: # time offset processing here continue elif use is not None and sv[0] not in use: @@ -131,24 +127,26 @@ def rinexobs3( # genfromtext is bottleneck, limit calls this way darr = np.atleast_2d( - np.genfromtxt(np.char.lstrip(f[isv == i], sv), - delimiter=(14, 1, 1) * hdr["Fmax"])) + np.genfromtxt( + np.char.lstrip(f[isv == i], sv), delimiter=(14, 1, 1) * hdr["Fmax"] + ) + ) t_i = timeIndex[isv == i] # jsv == i-1 except for files spanning 1999-2000 jsv = svl == sv - for j, jobl in enumerate(hdr['fields'][sv[0]]): + for j, jobl in enumerate(hdr["fields"][sv[0]]): o = obl == jobl if not np.any(o): continue - data[o, t_i, jsv] = darr[:, j*3] + data[o, t_i, jsv] = darr[:, j * 3] if useindicators: - data_lli[np.ix_(o, t_i, jsv)] = darr[:, j*3+1] - data_ssi[np.ix_(o, t_i, jsv)] = darr[:, j*3+2] + data_lli[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 1] + data_ssi[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 2] obs = xarray.Dataset(coords={"sv": svl, "time": times}) for i, k in enumerate(obl): @@ -159,15 +157,15 @@ def rinexobs3( continue obs[k] = (("time", "sv"), data[i, :, :]) if useindicators: - if k[0] == 'L': - obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) - obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + if k[0] == "L": + obs[k + "lli"] = (("time", "sv"), data_lli[i, :, :]) + obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :]) # elif k[0] == 'C': # only for code? else: - obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :]) obs = obs.dropna(dim="sv", how="all") - # obs = obs.dropna(dim="time", how="all") # when tlim specified + # obs = obs.dropna(dim="time", how="all") # when tlim specified data = obs @@ -221,8 +219,7 @@ def rinexobs3( data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) # %% patch SV names in case of "G 7" => "G07" - data = data.assign_coords(sv=[s.replace(" ", "0") - for s in data.sv.values.tolist()]) + data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) # %% other attributes data.attrs["version"] = hdr["version"] @@ -232,8 +229,7 @@ def rinexobs3( elif "time" in data.coords.keys(): # median is robust against gaps try: - data.attrs["interval"] = np.median(np.diff(data.time) - / np.timedelta64(1, "s")) + data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) except TypeError: pass else: @@ -318,8 +314,8 @@ def _epoch( block processing of each epoch (time step) """ darr = np.atleast_2d( - np.genfromtxt(io.BytesIO(raw.encode("ascii")), - delimiter=(14, 1, 1) * hdr["Fmax"])) + np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) + ) # data = xarray.Dataset({}, coords={"time": [], "sv": []}) # %% assign data for each time step for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) @@ -340,7 +336,7 @@ def _epoch( dsf[k] = (("time", "sv"), np.atleast_2d(garr[:, i * 3])) if useindicators: - dsf = _indicators(dsf, k, garr[:, i * 3 + 1: i * 3 + 3]) + dsf = _indicators(dsf, k, garr[:, i * 3 + 1 : i * 3 + 3]) if verbose: print(time, "\r", end="") @@ -408,7 +404,6 @@ def rinexsystem3( last_epoch = None # %% loop with opener(fn) as f: - if fast: hdr = obsheader3(f) else: @@ -418,16 +413,16 @@ def rinexsystem3( for sk in hdr["fields"]: obl = obl + hdr["fields"][sk] obl = np.unique(np.array(obl)) - obl = obl[np.argsort([i[1:]+i[0] for i in iter(obl)])] + obl = obl[np.argsort([i[1:] + i[0] for i in iter(obl)])] if meas is not None: obl = obl[np.isin(obl, meas)] Nt = times.size Npages = len(obl) - Nsv = int(hdr['# OF SATELLITES']) + Nsv = int(hdr["# OF SATELLITES"]) - svl = np.tile(' ', Nsv) + svl = np.tile(" ", Nsv) data = np.empty((Npages, Nt, Nsv)) data.fill(np.nan) @@ -482,11 +477,11 @@ def rinexsystem3( for k in sv: if k not in svl: - svl[np.argmax(svl == ' ')] = k + svl[np.argmax(svl == " ")] = k darr = np.atleast_2d( - np.genfromtxt(io.BytesIO(raw.encode("ascii")), - delimiter=(14, 1, 1) * hdr["Fmax"])) + np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) + ) t = time == times @@ -499,19 +494,19 @@ def rinexsystem3( gsv = np.array(sv)[si] isv = [i for i, s in enumerate(svl) if s in gsv] - for i, j in enumerate(hdr['fields'][sk]): + for i, j in enumerate(hdr["fields"][sk]): o = obl == j - data[o, t, isv]=darr[si,i*3] + data[o, t, isv] = darr[si, i * 3] if useindicators: - data_lli[o, t, isv]=darr[si, i*3+1] - data_ssi[o, t, isv]=darr[si, i*3+2] + data_lli[o, t, isv] = darr[si, i * 3 + 1] + data_ssi[o, t, isv] = darr[si, i * 3 + 2] - if ' ' in svl: - svl = svl[:np.argmax(svl == ' ')] + if " " in svl: + svl = svl[: np.argmax(svl == " ")] svl, isv = np.unique(svl, return_index=True) - data=data[:, :, isv] + data = data[:, :, isv] if useindicators: data_lli = data_lli[:, :, isv] data_ssi = data_ssi[:, :, isv] @@ -525,11 +520,11 @@ def rinexsystem3( continue obs[k] = (("time", "sv"), data[i, :, :]) if useindicators: - if k[0] == 'L': - obs[k+'lli'] = (("time", "sv"), data_lli[i, :, :]) - obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) - elif k[0] == 'C': - obs[k+'ssi'] = (("time", "sv"), data_ssi[i, :, :]) + if k[0] == "L": + obs[k + "lli"] = (("time", "sv"), data_lli[i, :, :]) + obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :]) + elif k[0] == "C": + obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :]) obs = obs.dropna(dim="sv", how="all") obs = obs.dropna(dim="time", how="all") # when tlim specified @@ -671,7 +666,7 @@ def obsheader3(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dic fields = {k: fields[k] for k in use if k in fields} # perhaps this could be done more efficiently, but it's probably low impact on overall program. - # simple set and frozenset operations do NOT preserve order, which would completely mess up reading! + # simple set and frozenset operations do NOT preserve order, which would mess up reading! sysind: dict[str, T.Any] = {} if isinstance(meas, (tuple, list, np.ndarray)): for sk in fields: # iterate over each system @@ -685,7 +680,7 @@ def obsheader3(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dic fields[sk] = np.array(fields[sk])[ind].tolist() sysind[sk] = np.empty(Fmax * 3, dtype=bool) # *3 due to LLI, SSI for j, i in enumerate(ind): - sysind[sk][j * 3: j * 3 + 3] = i + sysind[sk][j * 3 : j * 3 + 3] = i else: sysind = {k: np.s_[:] for k in fields} diff --git a/src/georinex/obs4.py b/src/georinex/obs4.py new file mode 100644 index 0000000..5113e85 --- /dev/null +++ b/src/georinex/obs4.py @@ -0,0 +1,474 @@ +from __future__ import annotations +from pathlib import Path +import numpy as np +import logging +from datetime import datetime, timedelta +import io +import xarray +import re +import typing as T + +try: + from pymap3d import ecef2geodetic +except ImportError: + ecef2geodetic = None +# +from .rio import opener, rinexinfo +from .common import determine_time_system, check_time_interval, check_unique_times + +"""https://github.com/mvglasow/satstat/wiki/NMEA-IDs""" + +SBAS = 100 # offset for ID +GLONASS = 37 +QZSS = 192 +BEIDOU = 0 + + +def rinexobs4( + fn: T.TextIO | Path, + use: set[str] = None, + tlim: tuple[datetime, datetime] = None, + useindicators: bool = False, + meas: list[str] = None, + verbose: bool = False, + *, + fast: bool = False, + interval: float | int | timedelta = None, +) -> xarray.Dataset: + """ + process RINEX 4 OBS data + + fn: RINEX OBS 3 filename + use: 'G' or ['G', 'R'] or similar + + tlim: read between these time bounds + useindicators: SSI, LLI are output + meas: 'L1C' or ['L1C', 'C1C'] or similar + + fast: Loads entire file into memory as a string + TODO: time interval, time offset, etc. + if false, uses old _epoch method + + interval: allows decimating file read by time e.g. every 5 seconds. + Useful to speed up reading of very large RINEX files + """ + + interval = check_time_interval(interval) + + if isinstance(use, str): + use = {use} + + if isinstance(meas, str): + meas = [meas] + + if not meas or not meas[0].strip(): + meas = None + + if isinstance(fn, Path): + file_name = fn + else: + file_name = None + + # %% allocate + if fast: + with opener(fn) as f: + fn = [ln for ln in f] + times = obstime4(fn) + else: + data = xarray.Dataset({}, coords={"time": [], "sv": []}) + + if tlim is not None and not isinstance(tlim[0], datetime): + raise TypeError("time bounds are specified as datetime.datetime") + + last_epoch = None + # %% loop + with opener(fn) as f: + if fast: + hdr = obsheader4(f, use) + + obl = [] + for sk in hdr["fields"]: + obl = obl + hdr["fields"][sk] + obl = np.unique(np.array(obl)) + obl = obl[np.argsort([i[1:] + i[0] for i in iter(obl)])] + + if meas is not None: + obl = obl[np.any([np.char.find(obl, j) == 0 for i, j in enumerate(meas)], 0)] + + Nt = times.size + Npages = len(obl) + + f = np.array([f for f in f if f[1:3] != " "]) # don't need opener anymore + + sv = np.array([ln[:3] for ln in f]) + (usv, isv) = np.unique(sv, return_inverse=True) # get all sv lines + + # list of epoch numbers per datum + timeIndex = np.cumsum(isv == np.flatnonzero(np.char.rfind(usv, ">") == 0)) - 1 + + # usv contains epoch entries too, get only sv + svl = usv[np.logical_not(np.char.startswith(usv, ">"))] + Nsv = svl.size + + data = np.full((Npages, Nt, Nsv), np.nan) + + if useindicators: + data_lli = np.full_like(data, np.nan) + data_ssi = np.full_like(data, np.nan) + + time_offset = [] # TODO: fill this list + + for i, sv in enumerate(usv): + if re.match(r"[A-Z][ \d]\d", sv) is None: + # time offset processing here + continue + elif use is not None and sv[0] not in use: + continue + + # genfromtext is bottleneck, limit calls this way + darr = np.atleast_2d( + np.genfromtxt( + np.char.lstrip(f[isv == i], sv), delimiter=(14, 1, 1) * hdr["Fmax"] + ) + ) + + t_i = timeIndex[isv == i] + + # jsv == i-1 except for files spanning 1999-2000 + jsv = svl == sv + + for j, jobl in enumerate(hdr["fields"][sv[0]]): + o = obl == jobl + if not np.any(o): + continue + + data[o, t_i, jsv] = darr[:, j * 3] + + if useindicators: + data_lli[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 1] + data_ssi[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 2] + + obs = xarray.Dataset(coords={"sv": svl, "time": times}) + for i, k in enumerate(obl): + if k is None: + continue + elif np.all(np.isnan(data[i, :, :])): + # drop all nan datasets like tests expect + continue + obs[k] = (("time", "sv"), data[i, :, :]) + if useindicators: + if k[0] == "L": + obs[k + "lli"] = (("time", "sv"), data_lli[i, :, :]) + obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :]) + # elif k[0] == 'C': # only for code? + else: + obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :]) + + obs = obs.dropna(dim="sv", how="all") + # obs = obs.dropna(dim="time", how="all") # when tlim specified + + data = obs + + else: + hdr = obsheader4(f, use, meas) + # process OBS file + time_offset = [] + for ln in f: + if not ln.startswith(">"): # end of file + break + + try: + time = _timeobs(ln) + except ValueError: # garbage between header and RINEX data + logging.debug(f"garbage detected in {fn}, trying to parse at next time step") + continue + + try: + time_offset.append(float(ln[41:56])) + except ValueError: + pass + # %% get SV indices + sv = [] + raw = "" + # Number of visible satellites this time %i3 pg. A13 + for _ in range(int(ln[33:35])): + ln = f.readline() + if use is None or ln[0] in use: + sv.append(ln[:3]) + raw += ln[3:] + + if tlim is not None: + if time < tlim[0]: + continue + elif time > tlim[1]: + break + + if interval is not None: + if last_epoch is None: # initialization + last_epoch = time + else: + if time - last_epoch < interval: + continue + else: + last_epoch += interval + + if verbose: + print(time, end="\r") + + # this time epoch is complete, assemble the data. + data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) + + # %% patch SV names in case of "G 7" => "G07" + data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) + # %% other attributes + data.attrs["version"] = hdr["version"] + + # Get interval from header or derive it from the data + if "interval" in hdr.keys(): + data.attrs["interval"] = hdr["interval"] + elif "time" in data.coords.keys(): + # median is robust against gaps + try: + data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) + except TypeError: + pass + else: + data.attrs["interval"] = np.nan + + data.attrs["rinextype"] = "obs" + data.attrs["fast_processing"] = int(fast) # bool is not allowed in NetCDF4 + data.attrs["time_system"] = determine_time_system(hdr) + if isinstance(file_name, Path): + data.attrs["filename"] = file_name.name + + if "position" in hdr.keys(): + data.attrs["position"] = hdr["position"] + if ecef2geodetic is not None: + data.attrs["position_geodetic"] = hdr["position_geodetic"] + if "rxmodel" in hdr.keys(): + obs.attrs["rxmodel"] = hdr["rxmodel"] + if time_offset: + data.attrs["time_offset"] = time_offset + + if "RCV CLOCK OFFS APPL" in hdr.keys(): + try: + data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) + except ValueError: + pass + + return data + + +def _timeobs(ln: str) -> datetime: + """ + convert time from RINEX 3 OBS text to datetime + """ + + if not ln.startswith("> "): # pg. A13 + raise ValueError('RINEX 3 line beginning "> " is not present') + + return datetime( + int(ln[2:6]), + int(ln[7:9]), + int(ln[10:12]), + hour=int(ln[13:15]), + minute=int(ln[16:18]), + second=int(ln[19:21]), + microsecond=int(float(ln[19:29]) % 1 * 1000000), + ) + + +def obstime4(fn: T.TextIO | Path, verbose: bool = False) -> np.ndarray: + """ + return all times in RINEX file + """ + + times = [] + + with opener(fn) as f: + for ln in f: + if ln.startswith("> "): + try: + times.append(_timeobs(ln)) + except (ValueError, IndexError): + logging.debug(f"was not a time:\n{ln}") + continue + + times = np.asarray(times) + + check_unique_times(times) + + return times + + +def _epoch( + data: xarray.Dataset, + raw: str, + hdr: dict[str, T.Any], + time: datetime, + sv: list[str], + useindicators: bool, + verbose: bool, +) -> xarray.Dataset: + """ + block processing of each epoch (time step) + """ + darr = np.atleast_2d( + np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) + ) + # data = xarray.Dataset({}, coords={"time": [], "sv": []}) + # %% assign data for each time step + for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) + # satellite indices "si" to extract from this time's measurements + si = [i for i, s in enumerate(sv) if s[0] in sk] + if len(si) == 0: # no SV of this system "sk" at this time + continue + + # measurement indices "di" to extract at this time step + di = hdr["fields_ind"][sk] + garr = darr[si, :] + garr = garr[:, di] + + gsv = np.array(sv)[si] + + dsf: dict[str, tuple] = {} + for i, k in enumerate(hdr["fields"][sk]): + dsf[k] = (("time", "sv"), np.atleast_2d(garr[:, i * 3])) + + if useindicators: + dsf = _indicators(dsf, k, garr[:, i * 3 + 1 : i * 3 + 3]) + + if verbose: + print(time, "\r", end="") + + epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv}) + if len(data) == 0: + data = epoch_data + # data = xarray.merge((data, epoch_data)) + elif len(hdr["fields"]) == 1: # one satellite system selected, faster to process + data = xarray.concat((data, epoch_data), dim="time") + else: # general case, slower for different satellite systems all together + data = xarray.merge((data, epoch_data)) + + return data + + +def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]: + """ + handle LLI (loss of lock) and SSI (signal strength) + """ + if k.startswith(("L1", "L2")): + d[k + "lli"] = (("time", "sv"), np.atleast_2d(arr[:, 0])) + + d[k + "ssi"] = (("time", "sv"), np.atleast_2d(arr[:, 1])) + + return d + + +def obsheader4(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dict[str, T.Any]: + """ + get RINEX 4 OBS types, for each system type + optionally, select system type and/or measurement type to greatly + speed reading and save memory (RAM, disk) + """ + if isinstance(f, (str, Path)): + with opener(f, header=True) as h: + return obsheader4(h, use, meas) + + fields = {} + Fmax = 0 + + # %% first line + hdr = rinexinfo(f) + + for ln in f: + if "END OF HEADER" in ln: + break + + hd = ln[60:80] + c = ln[:60] + if "SYS / # / OBS TYPES" in hd: + k = c[0] + fields[k] = c[6:60].split() + N = int(c[3:6]) + # %% maximum number of fields in a file, to allow fast Numpy parse. + Fmax = max(N, Fmax) + + n = N - 13 + while n > 0: # Rinex 3.03, pg. A6, A7 + ln = f.readline() + assert "SYS / # / OBS TYPES" in ln[60:] + fields[k] += ln[6:60].split() + n -= 13 + + assert len(fields[k]) == N + + continue + + if hd.strip() not in hdr: # Header label + hdr[hd.strip()] = c # don't strip for fixed-width parsers + # string with info + else: # concatenate to the existing string + hdr[hd.strip()] += " " + c + + # %% list with x,y,z cartesian (OPTIONAL) + # Rinex 3.03, pg. A6, Table A2 + try: + # some RINEX files have bad headers with mulitple APPROX POSITION XYZ. + # we choose to use the first such header. + hdr["position"] = [float(j) for j in hdr["APPROX POSITION XYZ"].split()][:3] + if ecef2geodetic is not None and len(hdr["position"]) == 3: + hdr["position_geodetic"] = ecef2geodetic(*hdr["position"]) + except (KeyError, ValueError): + pass + # %% time + try: + t0s = hdr["TIME OF FIRST OBS"] + # NOTE: must do second=int(float()) due to non-conforming files + hdr["t0"] = datetime( + year=int(t0s[:6]), + month=int(t0s[6:12]), + day=int(t0s[12:18]), + hour=int(t0s[18:24]), + minute=int(t0s[24:30]), + second=int(float(t0s[30:36])), + microsecond=int(float(t0s[30:43]) % 1 * 1000000), + ) + except (KeyError, ValueError): + pass + + try: + hdr["interval"] = float(hdr["INTERVAL"][:10]) + except (KeyError, ValueError): + pass + # %% select specific satellite systems only (optional) + if use: + if not set(fields.keys()).intersection(use): + raise KeyError(f"system type {use} not found in RINEX file") + + fields = {k: fields[k] for k in use if k in fields} + + # perhaps this could be done more efficiently, but it's probably low impact on overall program. + # simple set and frozenset operations do NOT preserve order, which would mess up reading! + sysind: dict[str, T.Any] = {} + if isinstance(meas, (tuple, list, np.ndarray)): + for sk in fields: # iterate over each system + # ind = np.isin(fields[sk], meas) # boolean vector + ind = np.zeros(len(fields[sk]), dtype=bool) + for m in meas: + for i, field in enumerate(fields[sk]): + if field.startswith(m): + ind[i] = True + + fields[sk] = np.array(fields[sk])[ind].tolist() + sysind[sk] = np.empty(Fmax * 3, dtype=bool) # *3 due to LLI, SSI + for j, i in enumerate(ind): + sysind[sk][j * 3 : j * 3 + 3] = i + else: + sysind = {k: np.s_[:] for k in fields} + + hdr["fields"] = fields + hdr["fields_ind"] = sysind + hdr["Fmax"] = Fmax + + return hdr