diff --git a/channeldata.json b/channeldata.json new file mode 100644 index 0000000..fdd2409 --- /dev/null +++ b/channeldata.json @@ -0,0 +1,7 @@ +{ + "channeldata_version": 1, + "packages": {}, + "subdirs": [ + "noarch" + ] +} diff --git a/index.html b/index.html new file mode 100644 index 0000000..3eda6ed --- /dev/null +++ b/index.html @@ -0,0 +1,83 @@ + + + georinex + + + +

georinex

+

RSS Feed   channeldata.json

+noarch    + + + + + + + + +
PackageLatest VersionDocDevLicensenoarch Summary
+
Updated: 2023-06-08 14:39:19 +0000 - Files: 0
+ + \ No newline at end of file diff --git a/noarch/current_repodata.json b/noarch/current_repodata.json new file mode 100644 index 0000000..c8a47d1 --- /dev/null +++ b/noarch/current_repodata.json @@ -0,0 +1,9 @@ +{ + "info": { + "subdir": "noarch" + }, + "packages": {}, + "packages.conda": {}, + "removed": [], + "repodata_version": 1 +} diff --git a/noarch/current_repodata.json.bz2 b/noarch/current_repodata.json.bz2 new file mode 100644 index 0000000..3449917 Binary files /dev/null and b/noarch/current_repodata.json.bz2 differ diff --git a/noarch/index.html b/noarch/index.html new file mode 100644 index 0000000..3610ed4 --- /dev/null +++ b/noarch/index.html @@ -0,0 +1,82 @@ + + + georinex/noarch + + + +

georinex/noarch

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FilenameSizeLast ModifiedSHA256MD5
repodata.json127 B2023-06-08 14:39:19 +0000b546412dc20b790c5f9c223e394ff0e39a27ba12a99631ca4d1bb6c0ca3bd05c917338f97423c09c911618661fef3056
repodata.json.bz2126 B2023-06-08 14:39:19 +000032d48c11b6d5ee61a54a9076f8c06a62ba795f69b671db3e44b1301fa0efae0e1ecb5b301b9a2ba831f0a68a7ace5f00
repodata_from_packages.json127 B2023-06-08 14:39:19 +0000b546412dc20b790c5f9c223e394ff0e39a27ba12a99631ca4d1bb6c0ca3bd05c917338f97423c09c911618661fef3056
repodata_from_packages.json.bz2126 B2023-06-08 14:39:19 +000032d48c11b6d5ee61a54a9076f8c06a62ba795f69b671db3e44b1301fa0efae0e1ecb5b301b9a2ba831f0a68a7ace5f00
+
Updated: 2023-06-08 14:39:19 +0000 - Files: 0
+ + \ No newline at end of file diff --git a/noarch/repodata.json b/noarch/repodata.json new file mode 100644 index 0000000..c8a47d1 --- /dev/null +++ b/noarch/repodata.json @@ -0,0 +1,9 @@ +{ + "info": { + "subdir": "noarch" + }, + "packages": {}, + "packages.conda": {}, + "removed": [], + "repodata_version": 1 +} diff --git a/noarch/repodata.json.bz2 b/noarch/repodata.json.bz2 new file mode 100644 index 0000000..3449917 Binary files /dev/null and b/noarch/repodata.json.bz2 differ diff --git a/noarch/repodata_from_packages.json b/noarch/repodata_from_packages.json new file mode 100644 index 0000000..c8a47d1 --- /dev/null +++ b/noarch/repodata_from_packages.json @@ -0,0 +1,9 @@ +{ + "info": { + "subdir": "noarch" + }, + "packages": {}, + "packages.conda": {}, + "removed": [], + "repodata_version": 1 +} diff --git a/noarch/repodata_from_packages.json.bz2 b/noarch/repodata_from_packages.json.bz2 new file mode 100644 index 0000000..3449917 Binary files /dev/null and b/noarch/repodata_from_packages.json.bz2 differ diff --git a/src/georinex/base.py b/src/georinex/base.py index ccfff85..6e39370 100644 --- a/src/georinex/base.py +++ b/src/georinex/base.py @@ -8,6 +8,7 @@ from .rio import rinexinfo from .obs2 import rinexobs2 from .obs3 import rinexobs3 +from .obs4 import rinexobs4 from .nav2 import rinexnav2 from .nav3 import rinexnav3 from .sp3 import load_sp3 @@ -154,7 +155,7 @@ def rinexnav( if fn.suffix == ".nc": try: - return xarray.open_dataset(fn, group=group) + return xarray.load_dataset(fn, group=group) except OSError as e: raise LookupError(f"Group {group} not found in {fn} {e}") @@ -205,7 +206,7 @@ def rinexobs( # %% NetCDF4 if fn.suffix == ".nc": try: - return xarray.open_dataset(fn, group=group) + return xarray.load_dataset(fn, group=group) except OSError as e: raise LookupError(f"Group {group} not found in {fn} {e}") @@ -235,6 +236,17 @@ def rinexobs( fast=fast, interval=interval, ) + elif int(info["version"]) == 4: + obs = rinexobs4( + fn, + use, + tlim=tlim, + useindicators=useindicators, + meas=meas, + verbose=verbose, + fast=fast, + interval=interval, + ) else: raise ValueError(f"unknown RINEX {info} {fn}") @@ -259,7 +271,7 @@ def _groupexists(fn: Path, group: str, overwrite: bool) -> str: # be sure there isn't already NAV in it try: - xarray.open_dataset(fn, group=group) + xarray.load_dataset(fn, group=group) raise ValueError(f"{group} already in {fn}") except OSError: pass diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py index b442b92..55ebeac 100644 --- a/src/georinex/obs2.py +++ b/src/georinex/obs2.py @@ -28,7 +28,7 @@ def rinexobs2( *, fast: bool = True, interval: float | int | timedelta = None, -): +) -> xarray.Dataset: if isinstance(use, str): use = {use} @@ -36,10 +36,9 @@ def rinexobs2( if not use: use = {"C", "E", "G", "J", "R", "S"} - obs = xarray.Dataset( - {}, coords={"time": np.array([], dtype="datetime64[ns]"), "sv": np.array([], dtype=" datetime: def _timeobs(ln: str) -> datetime: - year = int(ln[1:3]) if year < 80: year += 2000 diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index b9f0f5d..5b7106e 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -5,6 +5,7 @@ from datetime import datetime, timedelta import io import xarray +import re import typing as T try: @@ -44,12 +45,9 @@ def rinexobs3( useindicators: SSI, LLI are output meas: 'L1C' or ['L1C', 'C1C'] or similar - fast: - TODO: FUTURE, not yet enabled for OBS3 - speculative preallocation based on minimum SV assumption and file size. - Avoids double-reading file and more complicated linked lists. - Believed that Numpy array should be faster than lists anyway. - Reduce Nsvmin if error (let us know) + fast: Loads entire file into memory as a string + TODO: time interval, time offset, etc. + if false, uses old _epoch method interval: allows decimating file read by time e.g. every 5 seconds. Useful to speed up reading of very large RINEX files @@ -65,62 +63,160 @@ def rinexobs3( if not meas or not meas[0].strip(): meas = None + + if isinstance(fn, Path): + file_name = fn + else: + file_name = None + # %% allocate - # times = obstime3(fn) - data = xarray.Dataset({}, coords={"time": [], "sv": []}) + if fast: + with opener(fn) as f: + fn = [ln for ln in f] + times = obstime3(fn) + else: + data = xarray.Dataset({}, coords={"time": [], "sv": []}) + if tlim is not None and not isinstance(tlim[0], datetime): raise TypeError("time bounds are specified as datetime.datetime") last_epoch = None # %% loop with opener(fn) as f: - hdr = obsheader3(f, use, meas) + if fast: + hdr = obsheader3(f, use) - # %% process OBS file - time_offset = [] - for ln in f: - if not ln.startswith(">"): # end of file - break + obl = [] + for sk in hdr["fields"]: + obl = obl + hdr["fields"][sk] + obl = np.unique(np.array(obl)) + obl = obl[np.argsort([i[1:] + i[0] for i in iter(obl)])] - try: - time = _timeobs(ln) - except ValueError: # garbage between header and RINEX data - logging.debug(f"garbage detected in {fn}, trying to parse at next time step") - continue + if meas is not None: + obl = obl[np.any([np.char.find(obl, j) == 0 for i, j in enumerate(meas)], 0)] - try: - time_offset.append(float(ln[41:56])) - except ValueError: - pass - # %% get SV indices - sv = [] - raw = "" - # Number of visible satellites this time %i3 pg. A13 - for _ in range(int(ln[33:35])): - ln = f.readline() - sv.append(ln[:3]) - raw += ln[3:] + Nt = times.size + Npages = len(obl) - if tlim is not None: - if time < tlim[0]: + f = np.array([f for f in f if f[1:3] != " "]) # don't need opener anymore + + sv = np.array([ln[:3] for ln in f]) + (usv, isv) = np.unique(sv, return_inverse=True) # get all sv lines + + # list of epoch numbers per datum + timeIndex = np.cumsum(isv == np.flatnonzero(np.char.rfind(usv, ">") == 0)) - 1 + + # usv contains epoch entries too, get only sv + svl = usv[np.logical_not(np.char.startswith(usv, ">"))] + Nsv = svl.size + + data = np.full((Npages, Nt, Nsv), np.nan) + + if useindicators: + data_lli = np.full_like(data, np.nan) + data_ssi = np.full_like(data, np.nan) + + time_offset = [] # TODO: fill this list + + for i, sv in enumerate(usv): + if re.match(r"[A-Z][ \d]\d", sv) is None: + # time offset processing here continue - elif time > tlim[1]: + elif use is not None and sv[0] not in use: + continue + + # genfromtext is bottleneck, limit calls this way + darr = np.atleast_2d( + np.genfromtxt( + np.char.lstrip(f[isv == i], sv), delimiter=(14, 1, 1) * hdr["Fmax"] + ) + ) + + t_i = timeIndex[isv == i] + + # jsv == i-1 except for files spanning 1999-2000 + jsv = svl == sv + + for j, jobl in enumerate(hdr["fields"][sv[0]]): + o = obl == jobl + if not np.any(o): + continue + + data[o, t_i, jsv] = darr[:, j * 3] + + if useindicators: + data_lli[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 1] + data_ssi[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 2] + + obs = xarray.Dataset(coords={"sv": svl, "time": times}) + for i, k in enumerate(obl): + if k is None: + continue + elif np.all(np.isnan(data[i, :, :])): + # drop all nan datasets like tests expect + continue + obs[k] = (("time", "sv"), data[i, :, :]) + if useindicators: + if k[0] == "L": + obs[k + "lli"] = (("time", "sv"), data_lli[i, :, :]) + obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :]) + # elif k[0] == 'C': # only for code? + else: + obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :]) + + obs = obs.dropna(dim="sv", how="all") + # obs = obs.dropna(dim="time", how="all") # when tlim specified + + data = obs + + else: + hdr = obsheader3(f, use, meas) + # process OBS file + time_offset = [] + for ln in f: + if not ln.startswith(">"): # end of file break - if interval is not None: - if last_epoch is None: # initialization - last_epoch = time - else: - if time - last_epoch < interval: + try: + time = _timeobs(ln) + except ValueError: # garbage between header and RINEX data + logging.debug(f"garbage detected in {fn}, trying to parse at next time step") + continue + + try: + time_offset.append(float(ln[41:56])) + except ValueError: + pass + # %% get SV indices + sv = [] + raw = "" + # Number of visible satellites this time %i3 pg. A13 + for _ in range(int(ln[33:35])): + ln = f.readline() + if use is None or ln[0] in use: + sv.append(ln[:3]) + raw += ln[3:] + + if tlim is not None: + if time < tlim[0]: continue + elif time > tlim[1]: + break + + if interval is not None: + if last_epoch is None: # initialization + last_epoch = time else: - last_epoch += interval + if time - last_epoch < interval: + continue + else: + last_epoch += interval - if verbose: - print(time, end="\r") + if verbose: + print(time, end="\r") - # this time epoch is complete, assemble the data. - data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) + # this time epoch is complete, assemble the data. + data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) # %% patch SV names in case of "G 7" => "G07" data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) @@ -140,16 +236,17 @@ def rinexobs3( data.attrs["interval"] = np.nan data.attrs["rinextype"] = "obs" - data.attrs["fast_processing"] = 0 # bool is not allowed in NetCDF4 + data.attrs["fast_processing"] = int(fast) # bool is not allowed in NetCDF4 data.attrs["time_system"] = determine_time_system(hdr) - if isinstance(fn, Path): - data.attrs["filename"] = fn.name + if isinstance(file_name, Path): + data.attrs["filename"] = file_name.name if "position" in hdr.keys(): data.attrs["position"] = hdr["position"] if ecef2geodetic is not None: data.attrs["position_geodetic"] = hdr["position_geodetic"] - + if "rxmodel" in hdr.keys(): + obs.attrs["rxmodel"] = hdr["rxmodel"] if time_offset: data.attrs["time_offset"] = time_offset @@ -219,6 +316,7 @@ def _epoch( darr = np.atleast_2d( np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) ) + # data = xarray.Dataset({}, coords={"time": [], "sv": []}) # %% assign data for each time step for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) # satellite indices "si" to extract from this time's measurements @@ -246,6 +344,7 @@ def _epoch( epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv}) if len(data) == 0: data = epoch_data + # data = xarray.merge((data, epoch_data)) elif len(hdr["fields"]) == 1: # one satellite system selected, faster to process data = xarray.concat((data, epoch_data), dim="time") else: # general case, slower for different satellite systems all together @@ -254,6 +353,223 @@ def _epoch( return data +def rinexsystem3( + fn: T.TextIO | Path, + use: set[str] = None, + tlim: tuple[datetime, datetime] = None, + useindicators: bool = False, + meas: list[str] = None, + verbose: bool = False, + *, + fast: bool = False, + interval: float | int | timedelta = None, +) -> xarray.Dataset: + """ + process RINEX 3 OBS data + + fn: RINEX OBS 3 filename + use: 'G' or ['G', 'R'] or similar + + tlim: read between these time bounds + useindicators: SSI, LLI are output + meas: 'L1C' or ['L1C', 'C1C'] or similar + + fast: + TODO: FUTURE, not yet enabled for OBS3 + speculative preallocation based on minimum SV assumption and file size. + Avoids double-reading file and more complicated linked lists. + Believed that Numpy array should be faster than lists anyway. + Reduce Nsvmin if error (let us know) + + interval: allows decimating file read by time e.g. every 5 seconds. + Useful to speed up reading of very large RINEX files + """ + + interval = check_time_interval(interval) + + if isinstance(use, str): + use = {use} + + if isinstance(meas, str): + meas = [meas] + + if not meas or not meas[0].strip(): + meas = None + # %% allocate + times = obstime3(fn) + # data = xarray.Dataset({}, coords={"time": [], "sv": []}) + if tlim is not None and not isinstance(tlim[0], datetime): + raise TypeError("time bounds are specified as datetime.datetime") + + last_epoch = None + # %% loop + with opener(fn) as f: + if fast: + hdr = obsheader3(f) + else: + hdr = obsheader3(f, use, meas) + + obl = [] + for sk in hdr["fields"]: + obl = obl + hdr["fields"][sk] + obl = np.unique(np.array(obl)) + obl = obl[np.argsort([i[1:] + i[0] for i in iter(obl)])] + + if meas is not None: + obl = obl[np.isin(obl, meas)] + + Nt = times.size + Npages = len(obl) + Nsv = int(hdr["# OF SATELLITES"]) + + svl = np.tile(" ", Nsv) + + data = np.empty((Npages, Nt, Nsv)) + data.fill(np.nan) + + if useindicators: + data_lli = np.full_like(data, np.nan) + data_ssi = np.full_like(data, np.nan) + + # %% process OBS file + time_offset = [] + for ln in f: + if not ln.startswith(">"): # end of file + break + + try: + time = _timeobs(ln) + except ValueError: # garbage between header and RINEX data + logging.debug(f"garbage detected in {fn}, trying to parse at next time step") + continue + + try: + time_offset.append(float(ln[41:56])) + except ValueError: + pass + # %% get SV indices + sv = [] + raw = "" + # Number of visible satellites this time %i3 pg. A13 + for _ in range(int(ln[33:35])): + ln = f.readline() + if use is None or ln[0] in use: + sv.append(ln[:3]) + raw += ln[3:] + + if tlim is not None: + if time < tlim[0]: + continue + elif time > tlim[1]: + break + + if interval is not None: + if last_epoch is None: # initialization + last_epoch = time + else: + if time - last_epoch < interval: + continue + else: + last_epoch += interval + + if verbose: + print(time, end="\r") + + for k in sv: + if k not in svl: + svl[np.argmax(svl == " ")] = k + + darr = np.atleast_2d( + np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) + ) + + t = time == times + + for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) + # satellite indices "si" to extract from this time's measurements + si = [i for i, s in enumerate(sv) if s[0] in sk] + if len(si) == 0: # no SV of this system "sk" at this time + continue + + gsv = np.array(sv)[si] + isv = [i for i, s in enumerate(svl) if s in gsv] + + for i, j in enumerate(hdr["fields"][sk]): + o = obl == j + data[o, t, isv] = darr[si, i * 3] + if useindicators: + data_lli[o, t, isv] = darr[si, i * 3 + 1] + data_ssi[o, t, isv] = darr[si, i * 3 + 2] + + if " " in svl: + svl = svl[: np.argmax(svl == " ")] + + svl, isv = np.unique(svl, return_index=True) + + data = data[:, :, isv] + if useindicators: + data_lli = data_lli[:, :, isv] + data_ssi = data_ssi[:, :, isv] + + obs = xarray.Dataset(coords={"time": times, "sv": svl}) + for i, k in enumerate(obl): + # FIXME: for limited time span reads, this drops unused data variables + # if np.isnan(data[i, ...]).all(): + # continue + if k is None: + continue + obs[k] = (("time", "sv"), data[i, :, :]) + if useindicators: + if k[0] == "L": + obs[k + "lli"] = (("time", "sv"), data_lli[i, :, :]) + obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :]) + elif k[0] == "C": + obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :]) + + obs = obs.dropna(dim="sv", how="all") + obs = obs.dropna(dim="time", how="all") # when tlim specified + + data = obs + # %% patch SV names in case of "G 7" => "G07" + data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) + # %% other attributes + data.attrs["version"] = hdr["version"] + + # Get interval from header or derive it from the data + if "interval" in hdr.keys(): + data.attrs["interval"] = hdr["interval"] + elif "time" in data.coords.keys(): + # median is robust against gaps + try: + data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) + except TypeError: + pass + else: + data.attrs["interval"] = np.nan + + data.attrs["rinextype"] = "obs" + data.attrs["fast_processing"] = 0 # bool is not allowed in NetCDF4 + data.attrs["time_system"] = determine_time_system(hdr) + if isinstance(fn, Path): + data.attrs["filename"] = fn.name + + if "position" in hdr.keys(): + data.attrs["position"] = hdr["position"] + if ecef2geodetic is not None: + data.attrs["position_geodetic"] = hdr["position_geodetic"] + + if time_offset: + data.attrs["time_offset"] = time_offset + + if "RCV CLOCK OFFS APPL" in hdr.keys(): + try: + data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) + except ValueError: + pass + + return data + + def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]: """ handle LLI (loss of lock) and SSI (signal strength) @@ -352,7 +668,7 @@ def obsheader3( fields = {k: fields[k] for k in use if k in fields} # perhaps this could be done more efficiently, but it's probably low impact on overall program. - # simple set and frozenset operations do NOT preserve order, which would completely mess up reading! + # simple set and frozenset operations do NOT preserve order, which would mess up reading! sysind: dict[str, T.Any] = {} if isinstance(meas, (tuple, list, np.ndarray)): for sk in fields: # iterate over each system diff --git a/src/georinex/obs4.py b/src/georinex/obs4.py new file mode 100644 index 0000000..5113e85 --- /dev/null +++ b/src/georinex/obs4.py @@ -0,0 +1,474 @@ +from __future__ import annotations +from pathlib import Path +import numpy as np +import logging +from datetime import datetime, timedelta +import io +import xarray +import re +import typing as T + +try: + from pymap3d import ecef2geodetic +except ImportError: + ecef2geodetic = None +# +from .rio import opener, rinexinfo +from .common import determine_time_system, check_time_interval, check_unique_times + +"""https://github.com/mvglasow/satstat/wiki/NMEA-IDs""" + +SBAS = 100 # offset for ID +GLONASS = 37 +QZSS = 192 +BEIDOU = 0 + + +def rinexobs4( + fn: T.TextIO | Path, + use: set[str] = None, + tlim: tuple[datetime, datetime] = None, + useindicators: bool = False, + meas: list[str] = None, + verbose: bool = False, + *, + fast: bool = False, + interval: float | int | timedelta = None, +) -> xarray.Dataset: + """ + process RINEX 4 OBS data + + fn: RINEX OBS 3 filename + use: 'G' or ['G', 'R'] or similar + + tlim: read between these time bounds + useindicators: SSI, LLI are output + meas: 'L1C' or ['L1C', 'C1C'] or similar + + fast: Loads entire file into memory as a string + TODO: time interval, time offset, etc. + if false, uses old _epoch method + + interval: allows decimating file read by time e.g. every 5 seconds. + Useful to speed up reading of very large RINEX files + """ + + interval = check_time_interval(interval) + + if isinstance(use, str): + use = {use} + + if isinstance(meas, str): + meas = [meas] + + if not meas or not meas[0].strip(): + meas = None + + if isinstance(fn, Path): + file_name = fn + else: + file_name = None + + # %% allocate + if fast: + with opener(fn) as f: + fn = [ln for ln in f] + times = obstime4(fn) + else: + data = xarray.Dataset({}, coords={"time": [], "sv": []}) + + if tlim is not None and not isinstance(tlim[0], datetime): + raise TypeError("time bounds are specified as datetime.datetime") + + last_epoch = None + # %% loop + with opener(fn) as f: + if fast: + hdr = obsheader4(f, use) + + obl = [] + for sk in hdr["fields"]: + obl = obl + hdr["fields"][sk] + obl = np.unique(np.array(obl)) + obl = obl[np.argsort([i[1:] + i[0] for i in iter(obl)])] + + if meas is not None: + obl = obl[np.any([np.char.find(obl, j) == 0 for i, j in enumerate(meas)], 0)] + + Nt = times.size + Npages = len(obl) + + f = np.array([f for f in f if f[1:3] != " "]) # don't need opener anymore + + sv = np.array([ln[:3] for ln in f]) + (usv, isv) = np.unique(sv, return_inverse=True) # get all sv lines + + # list of epoch numbers per datum + timeIndex = np.cumsum(isv == np.flatnonzero(np.char.rfind(usv, ">") == 0)) - 1 + + # usv contains epoch entries too, get only sv + svl = usv[np.logical_not(np.char.startswith(usv, ">"))] + Nsv = svl.size + + data = np.full((Npages, Nt, Nsv), np.nan) + + if useindicators: + data_lli = np.full_like(data, np.nan) + data_ssi = np.full_like(data, np.nan) + + time_offset = [] # TODO: fill this list + + for i, sv in enumerate(usv): + if re.match(r"[A-Z][ \d]\d", sv) is None: + # time offset processing here + continue + elif use is not None and sv[0] not in use: + continue + + # genfromtext is bottleneck, limit calls this way + darr = np.atleast_2d( + np.genfromtxt( + np.char.lstrip(f[isv == i], sv), delimiter=(14, 1, 1) * hdr["Fmax"] + ) + ) + + t_i = timeIndex[isv == i] + + # jsv == i-1 except for files spanning 1999-2000 + jsv = svl == sv + + for j, jobl in enumerate(hdr["fields"][sv[0]]): + o = obl == jobl + if not np.any(o): + continue + + data[o, t_i, jsv] = darr[:, j * 3] + + if useindicators: + data_lli[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 1] + data_ssi[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 2] + + obs = xarray.Dataset(coords={"sv": svl, "time": times}) + for i, k in enumerate(obl): + if k is None: + continue + elif np.all(np.isnan(data[i, :, :])): + # drop all nan datasets like tests expect + continue + obs[k] = (("time", "sv"), data[i, :, :]) + if useindicators: + if k[0] == "L": + obs[k + "lli"] = (("time", "sv"), data_lli[i, :, :]) + obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :]) + # elif k[0] == 'C': # only for code? + else: + obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :]) + + obs = obs.dropna(dim="sv", how="all") + # obs = obs.dropna(dim="time", how="all") # when tlim specified + + data = obs + + else: + hdr = obsheader4(f, use, meas) + # process OBS file + time_offset = [] + for ln in f: + if not ln.startswith(">"): # end of file + break + + try: + time = _timeobs(ln) + except ValueError: # garbage between header and RINEX data + logging.debug(f"garbage detected in {fn}, trying to parse at next time step") + continue + + try: + time_offset.append(float(ln[41:56])) + except ValueError: + pass + # %% get SV indices + sv = [] + raw = "" + # Number of visible satellites this time %i3 pg. A13 + for _ in range(int(ln[33:35])): + ln = f.readline() + if use is None or ln[0] in use: + sv.append(ln[:3]) + raw += ln[3:] + + if tlim is not None: + if time < tlim[0]: + continue + elif time > tlim[1]: + break + + if interval is not None: + if last_epoch is None: # initialization + last_epoch = time + else: + if time - last_epoch < interval: + continue + else: + last_epoch += interval + + if verbose: + print(time, end="\r") + + # this time epoch is complete, assemble the data. + data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) + + # %% patch SV names in case of "G 7" => "G07" + data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) + # %% other attributes + data.attrs["version"] = hdr["version"] + + # Get interval from header or derive it from the data + if "interval" in hdr.keys(): + data.attrs["interval"] = hdr["interval"] + elif "time" in data.coords.keys(): + # median is robust against gaps + try: + data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) + except TypeError: + pass + else: + data.attrs["interval"] = np.nan + + data.attrs["rinextype"] = "obs" + data.attrs["fast_processing"] = int(fast) # bool is not allowed in NetCDF4 + data.attrs["time_system"] = determine_time_system(hdr) + if isinstance(file_name, Path): + data.attrs["filename"] = file_name.name + + if "position" in hdr.keys(): + data.attrs["position"] = hdr["position"] + if ecef2geodetic is not None: + data.attrs["position_geodetic"] = hdr["position_geodetic"] + if "rxmodel" in hdr.keys(): + obs.attrs["rxmodel"] = hdr["rxmodel"] + if time_offset: + data.attrs["time_offset"] = time_offset + + if "RCV CLOCK OFFS APPL" in hdr.keys(): + try: + data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) + except ValueError: + pass + + return data + + +def _timeobs(ln: str) -> datetime: + """ + convert time from RINEX 3 OBS text to datetime + """ + + if not ln.startswith("> "): # pg. A13 + raise ValueError('RINEX 3 line beginning "> " is not present') + + return datetime( + int(ln[2:6]), + int(ln[7:9]), + int(ln[10:12]), + hour=int(ln[13:15]), + minute=int(ln[16:18]), + second=int(ln[19:21]), + microsecond=int(float(ln[19:29]) % 1 * 1000000), + ) + + +def obstime4(fn: T.TextIO | Path, verbose: bool = False) -> np.ndarray: + """ + return all times in RINEX file + """ + + times = [] + + with opener(fn) as f: + for ln in f: + if ln.startswith("> "): + try: + times.append(_timeobs(ln)) + except (ValueError, IndexError): + logging.debug(f"was not a time:\n{ln}") + continue + + times = np.asarray(times) + + check_unique_times(times) + + return times + + +def _epoch( + data: xarray.Dataset, + raw: str, + hdr: dict[str, T.Any], + time: datetime, + sv: list[str], + useindicators: bool, + verbose: bool, +) -> xarray.Dataset: + """ + block processing of each epoch (time step) + """ + darr = np.atleast_2d( + np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"]) + ) + # data = xarray.Dataset({}, coords={"time": [], "sv": []}) + # %% assign data for each time step + for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.) + # satellite indices "si" to extract from this time's measurements + si = [i for i, s in enumerate(sv) if s[0] in sk] + if len(si) == 0: # no SV of this system "sk" at this time + continue + + # measurement indices "di" to extract at this time step + di = hdr["fields_ind"][sk] + garr = darr[si, :] + garr = garr[:, di] + + gsv = np.array(sv)[si] + + dsf: dict[str, tuple] = {} + for i, k in enumerate(hdr["fields"][sk]): + dsf[k] = (("time", "sv"), np.atleast_2d(garr[:, i * 3])) + + if useindicators: + dsf = _indicators(dsf, k, garr[:, i * 3 + 1 : i * 3 + 3]) + + if verbose: + print(time, "\r", end="") + + epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv}) + if len(data) == 0: + data = epoch_data + # data = xarray.merge((data, epoch_data)) + elif len(hdr["fields"]) == 1: # one satellite system selected, faster to process + data = xarray.concat((data, epoch_data), dim="time") + else: # general case, slower for different satellite systems all together + data = xarray.merge((data, epoch_data)) + + return data + + +def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]: + """ + handle LLI (loss of lock) and SSI (signal strength) + """ + if k.startswith(("L1", "L2")): + d[k + "lli"] = (("time", "sv"), np.atleast_2d(arr[:, 0])) + + d[k + "ssi"] = (("time", "sv"), np.atleast_2d(arr[:, 1])) + + return d + + +def obsheader4(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dict[str, T.Any]: + """ + get RINEX 4 OBS types, for each system type + optionally, select system type and/or measurement type to greatly + speed reading and save memory (RAM, disk) + """ + if isinstance(f, (str, Path)): + with opener(f, header=True) as h: + return obsheader4(h, use, meas) + + fields = {} + Fmax = 0 + + # %% first line + hdr = rinexinfo(f) + + for ln in f: + if "END OF HEADER" in ln: + break + + hd = ln[60:80] + c = ln[:60] + if "SYS / # / OBS TYPES" in hd: + k = c[0] + fields[k] = c[6:60].split() + N = int(c[3:6]) + # %% maximum number of fields in a file, to allow fast Numpy parse. + Fmax = max(N, Fmax) + + n = N - 13 + while n > 0: # Rinex 3.03, pg. A6, A7 + ln = f.readline() + assert "SYS / # / OBS TYPES" in ln[60:] + fields[k] += ln[6:60].split() + n -= 13 + + assert len(fields[k]) == N + + continue + + if hd.strip() not in hdr: # Header label + hdr[hd.strip()] = c # don't strip for fixed-width parsers + # string with info + else: # concatenate to the existing string + hdr[hd.strip()] += " " + c + + # %% list with x,y,z cartesian (OPTIONAL) + # Rinex 3.03, pg. A6, Table A2 + try: + # some RINEX files have bad headers with mulitple APPROX POSITION XYZ. + # we choose to use the first such header. + hdr["position"] = [float(j) for j in hdr["APPROX POSITION XYZ"].split()][:3] + if ecef2geodetic is not None and len(hdr["position"]) == 3: + hdr["position_geodetic"] = ecef2geodetic(*hdr["position"]) + except (KeyError, ValueError): + pass + # %% time + try: + t0s = hdr["TIME OF FIRST OBS"] + # NOTE: must do second=int(float()) due to non-conforming files + hdr["t0"] = datetime( + year=int(t0s[:6]), + month=int(t0s[6:12]), + day=int(t0s[12:18]), + hour=int(t0s[18:24]), + minute=int(t0s[24:30]), + second=int(float(t0s[30:36])), + microsecond=int(float(t0s[30:43]) % 1 * 1000000), + ) + except (KeyError, ValueError): + pass + + try: + hdr["interval"] = float(hdr["INTERVAL"][:10]) + except (KeyError, ValueError): + pass + # %% select specific satellite systems only (optional) + if use: + if not set(fields.keys()).intersection(use): + raise KeyError(f"system type {use} not found in RINEX file") + + fields = {k: fields[k] for k in use if k in fields} + + # perhaps this could be done more efficiently, but it's probably low impact on overall program. + # simple set and frozenset operations do NOT preserve order, which would mess up reading! + sysind: dict[str, T.Any] = {} + if isinstance(meas, (tuple, list, np.ndarray)): + for sk in fields: # iterate over each system + # ind = np.isin(fields[sk], meas) # boolean vector + ind = np.zeros(len(fields[sk]), dtype=bool) + for m in meas: + for i, field in enumerate(fields[sk]): + if field.startswith(m): + ind[i] = True + + fields[sk] = np.array(fields[sk])[ind].tolist() + sysind[sk] = np.empty(Fmax * 3, dtype=bool) # *3 due to LLI, SSI + for j, i in enumerate(ind): + sysind[sk][j * 3 : j * 3 + 3] = i + else: + sysind = {k: np.s_[:] for k in fields} + + hdr["fields"] = fields + hdr["fields_ind"] = sysind + hdr["Fmax"] = Fmax + + return hdr diff --git a/src/georinex/rio.py b/src/georinex/rio.py index 9a0f66c..befbdd0 100644 --- a/src/georinex/rio.py +++ b/src/georinex/rio.py @@ -15,7 +15,7 @@ @contextmanager -def opener(fn: T.TextIO | Path, header: bool = False) -> T.Iterator[T.TextIO]: +def opener(fn: T.TextIO | Path | list, header: bool = False, encoding="ascii") -> T.Iterator[T.TextIO]: """provides file handle for regular ASCII or gzip files transparently""" if isinstance(fn, str): fn = Path(fn).expanduser() @@ -23,57 +23,96 @@ def opener(fn: T.TextIO | Path, header: bool = False) -> T.Iterator[T.TextIO]: if isinstance(fn, io.StringIO): fn.seek(0) yield fn + elif isinstance(fn, list): + f = ''.join(fn) + fn = io.StringIO(f) + yield fn elif isinstance(fn, Path): # need to have this check for Windows if not fn.is_file(): raise FileNotFoundError(fn) - finf = fn.stat() - if finf.st_size > 100e6: - logging.info(f"opening {finf.st_size/1e6} MByte {fn.name}") - - suffix = fn.suffix.lower() - - if suffix == ".gz": - with gzip.open(fn, "rt") as f: - _, is_crinex = rinex_version(first_nonblank_line(f)) - f.seek(0) - - if is_crinex and not header: - # Conversion to string is necessary because of a quirk where gzip.open() - # even with 'rt' doesn't decompress until read. - f = io.StringIO(crx2rnx(f.read())) - yield f - elif suffix == ".bz2": - # this is for plain bzip2 files, NOT tar.bz2, which requires f.seek(512) - with bz2.open(fn, "rt") as f: - _, is_crinex = rinex_version(first_nonblank_line(f)) - f.seek(0) - - if is_crinex and not header: - f = io.StringIO(crx2rnx(f.read())) - yield f - elif suffix == ".zip": - with zipfile.ZipFile(fn, "r") as z: - flist = z.namelist() - for rinexfn in flist: - with z.open(rinexfn, "r") as bf: - f = io.StringIO( - io.TextIOWrapper(bf, encoding="ascii", errors="ignore").read() # type: ignore - ) - yield f - elif suffix == ".z": - with fn.open("rb") as zu: - with io.StringIO(unlzw(zu.read()).decode("ascii")) as f: + try: + finf = fn.stat() + if finf.st_size > 100e6: + logging.info(f"opening {finf.st_size/1e6} MByte {fn.name}") + + suffix = fn.suffix.lower() + + if suffix == ".gz": + with gzip.open(fn, "rt", encoding=encoding) as f: + + _, is_crinex = rinex_version(first_nonblank_line(f)) + f.seek(0) + + if is_crinex and not header: + # Conversion to string is necessary because of a quirk where gzip.open() + # even with 'rt' doesn't decompress until read. + f = io.StringIO(crx2rnx(f.read())) + yield f + elif suffix == ".bz2": + # this is for plain bzip2 files, NOT tar.bz2, which requires f.seek(512) + with bz2.open(fn, "rt", encoding=encoding) as f: + _, is_crinex = rinex_version(first_nonblank_line(f)) + f.seek(0) + + if is_crinex and not header: + f = io.StringIO(crx2rnx(f.read())) yield f - else: # assume not compressed (or Hatanaka) - with fn.open("r", encoding="ascii", errors="ignore") as f: - _, is_crinex = rinex_version(first_nonblank_line(f)) - f.seek(0) - - if is_crinex and not header: - f = io.StringIO(crx2rnx(f)) - yield f + elif suffix == ".zip": + with zipfile.ZipFile(fn, "r") as z: + flist = z.namelist() + for rinexfn in flist: + with z.open(rinexfn, "r") as bf: + f = io.StringIO( + io.TextIOWrapper(bf, encoding=encoding, errors="ignore").read() # type: ignore + ) + yield f + elif suffix == ".z": + if unlzw is None: + raise ImportError("pip install unlzw3") + try: + with fn.open("rb") as zu: + with io.StringIO(unlzw(zu.read()).decode(encoding)) as f: + _, is_crinex = rinex_version(first_nonblank_line(f)) + f.seek(0) + + if is_crinex and not header: + # Conversion to string is necessary because of a quirk where gzip.open() + # even with 'rt' doesn't decompress until read. + f = io.StringIO(crx2rnx(f.read())) + yield f + except ValueError as e: + if 'magic bytes' in e.args[0]: + # sometimes .Z files are actually gzipped + with gzip.open(fn, "rt", encoding=encoding) as f: + _, is_crinex = rinex_version(first_nonblank_line(f)) + f.seek(0) + + if is_crinex and not header: + # Conversion to string is necessary because of a quirk where gzip.open() + # even with 'rt' doesn't decompress until read. + f = io.StringIO(crx2rnx(f.read())) + yield f + + else: # assume not compressed (or Hatanaka) + with fn.open("r", encoding=encoding, errors="ignore") as f: + _, is_crinex = rinex_version(first_nonblank_line(f)) + f.seek(0) + + if is_crinex and not header: + f = io.StringIO(crx2rnx(f)) + yield f + except UnicodeDecodeError as e: + # some RINEX files have awkward encoding + if encoding == "ascii": + with opener(fn, header, encoding="latin-1") as f: + yield f + elif encoding == "latin-1": + with opener(fn, header, encoding="utf-8") as f: + yield f + else: + raise(e) else: raise OSError(f"Unsure what to do with input of type: {type(fn)}") diff --git a/src/georinex/tests/test_rinex.py b/src/georinex/tests/test_rinex.py index 35c1740..5c8f26e 100644 --- a/src/georinex/tests/test_rinex.py +++ b/src/georinex/tests/test_rinex.py @@ -61,7 +61,7 @@ def test_minimal(tmp_path, filename): if int(dat.version) == 2: assert dat.fast_processing elif int(dat.version) == 3: - assert not dat.fast_processing # FIXME: update when OBS3 fast processing is added. + assert dat.fast_processing def test_dont_care_file_extension():