diff --git a/channeldata.json b/channeldata.json
new file mode 100644
index 0000000..fdd2409
--- /dev/null
+++ b/channeldata.json
@@ -0,0 +1,7 @@
+{
+ "channeldata_version": 1,
+ "packages": {},
+ "subdirs": [
+ "noarch"
+ ]
+}
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..3eda6ed
--- /dev/null
+++ b/index.html
@@ -0,0 +1,83 @@
+
+
+ georinex
+
+
+
+ georinex
+
+noarch
+
+ | Package |
+ Latest Version |
+ Doc |
+ Dev |
+ License |
+noarch | Summary |
+
+
+ Updated: 2023-06-08 14:39:19 +0000 - Files: 0
+
+
\ No newline at end of file
diff --git a/noarch/current_repodata.json b/noarch/current_repodata.json
new file mode 100644
index 0000000..c8a47d1
--- /dev/null
+++ b/noarch/current_repodata.json
@@ -0,0 +1,9 @@
+{
+ "info": {
+ "subdir": "noarch"
+ },
+ "packages": {},
+ "packages.conda": {},
+ "removed": [],
+ "repodata_version": 1
+}
diff --git a/noarch/current_repodata.json.bz2 b/noarch/current_repodata.json.bz2
new file mode 100644
index 0000000..3449917
Binary files /dev/null and b/noarch/current_repodata.json.bz2 differ
diff --git a/noarch/index.html b/noarch/index.html
new file mode 100644
index 0000000..3610ed4
--- /dev/null
+++ b/noarch/index.html
@@ -0,0 +1,82 @@
+
+
+ georinex/noarch
+
+
+
+ georinex/noarch
+
+
+ | Filename |
+ Size |
+ Last Modified |
+ SHA256 |
+ MD5 |
+
+
+ | repodata.json |
+ 127 B |
+ 2023-06-08 14:39:19 +0000 |
+ b546412dc20b790c5f9c223e394ff0e39a27ba12a99631ca4d1bb6c0ca3bd05c |
+ 917338f97423c09c911618661fef3056 |
+
+ | repodata.json.bz2 |
+ 126 B |
+ 2023-06-08 14:39:19 +0000 |
+ 32d48c11b6d5ee61a54a9076f8c06a62ba795f69b671db3e44b1301fa0efae0e |
+ 1ecb5b301b9a2ba831f0a68a7ace5f00 |
+
+ | repodata_from_packages.json |
+ 127 B |
+ 2023-06-08 14:39:19 +0000 |
+ b546412dc20b790c5f9c223e394ff0e39a27ba12a99631ca4d1bb6c0ca3bd05c |
+ 917338f97423c09c911618661fef3056 |
+
+ | repodata_from_packages.json.bz2 |
+ 126 B |
+ 2023-06-08 14:39:19 +0000 |
+ 32d48c11b6d5ee61a54a9076f8c06a62ba795f69b671db3e44b1301fa0efae0e |
+ 1ecb5b301b9a2ba831f0a68a7ace5f00 |
+
+ Updated: 2023-06-08 14:39:19 +0000 - Files: 0
+
+
\ No newline at end of file
diff --git a/noarch/repodata.json b/noarch/repodata.json
new file mode 100644
index 0000000..c8a47d1
--- /dev/null
+++ b/noarch/repodata.json
@@ -0,0 +1,9 @@
+{
+ "info": {
+ "subdir": "noarch"
+ },
+ "packages": {},
+ "packages.conda": {},
+ "removed": [],
+ "repodata_version": 1
+}
diff --git a/noarch/repodata.json.bz2 b/noarch/repodata.json.bz2
new file mode 100644
index 0000000..3449917
Binary files /dev/null and b/noarch/repodata.json.bz2 differ
diff --git a/noarch/repodata_from_packages.json b/noarch/repodata_from_packages.json
new file mode 100644
index 0000000..c8a47d1
--- /dev/null
+++ b/noarch/repodata_from_packages.json
@@ -0,0 +1,9 @@
+{
+ "info": {
+ "subdir": "noarch"
+ },
+ "packages": {},
+ "packages.conda": {},
+ "removed": [],
+ "repodata_version": 1
+}
diff --git a/noarch/repodata_from_packages.json.bz2 b/noarch/repodata_from_packages.json.bz2
new file mode 100644
index 0000000..3449917
Binary files /dev/null and b/noarch/repodata_from_packages.json.bz2 differ
diff --git a/src/georinex/base.py b/src/georinex/base.py
index ccfff85..6e39370 100644
--- a/src/georinex/base.py
+++ b/src/georinex/base.py
@@ -8,6 +8,7 @@
from .rio import rinexinfo
from .obs2 import rinexobs2
from .obs3 import rinexobs3
+from .obs4 import rinexobs4
from .nav2 import rinexnav2
from .nav3 import rinexnav3
from .sp3 import load_sp3
@@ -154,7 +155,7 @@ def rinexnav(
if fn.suffix == ".nc":
try:
- return xarray.open_dataset(fn, group=group)
+ return xarray.load_dataset(fn, group=group)
except OSError as e:
raise LookupError(f"Group {group} not found in {fn} {e}")
@@ -205,7 +206,7 @@ def rinexobs(
# %% NetCDF4
if fn.suffix == ".nc":
try:
- return xarray.open_dataset(fn, group=group)
+ return xarray.load_dataset(fn, group=group)
except OSError as e:
raise LookupError(f"Group {group} not found in {fn} {e}")
@@ -235,6 +236,17 @@ def rinexobs(
fast=fast,
interval=interval,
)
+ elif int(info["version"]) == 4:
+ obs = rinexobs4(
+ fn,
+ use,
+ tlim=tlim,
+ useindicators=useindicators,
+ meas=meas,
+ verbose=verbose,
+ fast=fast,
+ interval=interval,
+ )
else:
raise ValueError(f"unknown RINEX {info} {fn}")
@@ -259,7 +271,7 @@ def _groupexists(fn: Path, group: str, overwrite: bool) -> str:
# be sure there isn't already NAV in it
try:
- xarray.open_dataset(fn, group=group)
+ xarray.load_dataset(fn, group=group)
raise ValueError(f"{group} already in {fn}")
except OSError:
pass
diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py
index b442b92..55ebeac 100644
--- a/src/georinex/obs2.py
+++ b/src/georinex/obs2.py
@@ -28,7 +28,7 @@ def rinexobs2(
*,
fast: bool = True,
interval: float | int | timedelta = None,
-):
+) -> xarray.Dataset:
if isinstance(use, str):
use = {use}
@@ -36,10 +36,9 @@ def rinexobs2(
if not use:
use = {"C", "E", "G", "J", "R", "S"}
- obs = xarray.Dataset(
- {}, coords={"time": np.array([], dtype="datetime64[ns]"), "sv": np.array([], dtype=" datetime:
def _timeobs(ln: str) -> datetime:
-
year = int(ln[1:3])
if year < 80:
year += 2000
diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py
index b9f0f5d..5b7106e 100644
--- a/src/georinex/obs3.py
+++ b/src/georinex/obs3.py
@@ -5,6 +5,7 @@
from datetime import datetime, timedelta
import io
import xarray
+import re
import typing as T
try:
@@ -44,12 +45,9 @@ def rinexobs3(
useindicators: SSI, LLI are output
meas: 'L1C' or ['L1C', 'C1C'] or similar
- fast:
- TODO: FUTURE, not yet enabled for OBS3
- speculative preallocation based on minimum SV assumption and file size.
- Avoids double-reading file and more complicated linked lists.
- Believed that Numpy array should be faster than lists anyway.
- Reduce Nsvmin if error (let us know)
+ fast: Loads entire file into memory as a string
+ TODO: time interval, time offset, etc.
+ if false, uses old _epoch method
interval: allows decimating file read by time e.g. every 5 seconds.
Useful to speed up reading of very large RINEX files
@@ -65,62 +63,160 @@ def rinexobs3(
if not meas or not meas[0].strip():
meas = None
+
+ if isinstance(fn, Path):
+ file_name = fn
+ else:
+ file_name = None
+
# %% allocate
- # times = obstime3(fn)
- data = xarray.Dataset({}, coords={"time": [], "sv": []})
+ if fast:
+ with opener(fn) as f:
+ fn = [ln for ln in f]
+ times = obstime3(fn)
+ else:
+ data = xarray.Dataset({}, coords={"time": [], "sv": []})
+
if tlim is not None and not isinstance(tlim[0], datetime):
raise TypeError("time bounds are specified as datetime.datetime")
last_epoch = None
# %% loop
with opener(fn) as f:
- hdr = obsheader3(f, use, meas)
+ if fast:
+ hdr = obsheader3(f, use)
- # %% process OBS file
- time_offset = []
- for ln in f:
- if not ln.startswith(">"): # end of file
- break
+ obl = []
+ for sk in hdr["fields"]:
+ obl = obl + hdr["fields"][sk]
+ obl = np.unique(np.array(obl))
+ obl = obl[np.argsort([i[1:] + i[0] for i in iter(obl)])]
- try:
- time = _timeobs(ln)
- except ValueError: # garbage between header and RINEX data
- logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
- continue
+ if meas is not None:
+ obl = obl[np.any([np.char.find(obl, j) == 0 for i, j in enumerate(meas)], 0)]
- try:
- time_offset.append(float(ln[41:56]))
- except ValueError:
- pass
- # %% get SV indices
- sv = []
- raw = ""
- # Number of visible satellites this time %i3 pg. A13
- for _ in range(int(ln[33:35])):
- ln = f.readline()
- sv.append(ln[:3])
- raw += ln[3:]
+ Nt = times.size
+ Npages = len(obl)
- if tlim is not None:
- if time < tlim[0]:
+ f = np.array([f for f in f if f[1:3] != " "]) # don't need opener anymore
+
+ sv = np.array([ln[:3] for ln in f])
+ (usv, isv) = np.unique(sv, return_inverse=True) # get all sv lines
+
+ # list of epoch numbers per datum
+ timeIndex = np.cumsum(isv == np.flatnonzero(np.char.rfind(usv, ">") == 0)) - 1
+
+ # usv contains epoch entries too, get only sv
+ svl = usv[np.logical_not(np.char.startswith(usv, ">"))]
+ Nsv = svl.size
+
+ data = np.full((Npages, Nt, Nsv), np.nan)
+
+ if useindicators:
+ data_lli = np.full_like(data, np.nan)
+ data_ssi = np.full_like(data, np.nan)
+
+ time_offset = [] # TODO: fill this list
+
+ for i, sv in enumerate(usv):
+ if re.match(r"[A-Z][ \d]\d", sv) is None:
+ # time offset processing here
continue
- elif time > tlim[1]:
+ elif use is not None and sv[0] not in use:
+ continue
+
+ # genfromtext is bottleneck, limit calls this way
+ darr = np.atleast_2d(
+ np.genfromtxt(
+ np.char.lstrip(f[isv == i], sv), delimiter=(14, 1, 1) * hdr["Fmax"]
+ )
+ )
+
+ t_i = timeIndex[isv == i]
+
+ # jsv == i-1 except for files spanning 1999-2000
+ jsv = svl == sv
+
+ for j, jobl in enumerate(hdr["fields"][sv[0]]):
+ o = obl == jobl
+ if not np.any(o):
+ continue
+
+ data[o, t_i, jsv] = darr[:, j * 3]
+
+ if useindicators:
+ data_lli[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 1]
+ data_ssi[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 2]
+
+ obs = xarray.Dataset(coords={"sv": svl, "time": times})
+ for i, k in enumerate(obl):
+ if k is None:
+ continue
+ elif np.all(np.isnan(data[i, :, :])):
+ # drop all nan datasets like tests expect
+ continue
+ obs[k] = (("time", "sv"), data[i, :, :])
+ if useindicators:
+ if k[0] == "L":
+ obs[k + "lli"] = (("time", "sv"), data_lli[i, :, :])
+ obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :])
+ # elif k[0] == 'C': # only for code?
+ else:
+ obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :])
+
+ obs = obs.dropna(dim="sv", how="all")
+ # obs = obs.dropna(dim="time", how="all") # when tlim specified
+
+ data = obs
+
+ else:
+ hdr = obsheader3(f, use, meas)
+ # process OBS file
+ time_offset = []
+ for ln in f:
+ if not ln.startswith(">"): # end of file
break
- if interval is not None:
- if last_epoch is None: # initialization
- last_epoch = time
- else:
- if time - last_epoch < interval:
+ try:
+ time = _timeobs(ln)
+ except ValueError: # garbage between header and RINEX data
+ logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
+ continue
+
+ try:
+ time_offset.append(float(ln[41:56]))
+ except ValueError:
+ pass
+ # %% get SV indices
+ sv = []
+ raw = ""
+ # Number of visible satellites this time %i3 pg. A13
+ for _ in range(int(ln[33:35])):
+ ln = f.readline()
+ if use is None or ln[0] in use:
+ sv.append(ln[:3])
+ raw += ln[3:]
+
+ if tlim is not None:
+ if time < tlim[0]:
continue
+ elif time > tlim[1]:
+ break
+
+ if interval is not None:
+ if last_epoch is None: # initialization
+ last_epoch = time
else:
- last_epoch += interval
+ if time - last_epoch < interval:
+ continue
+ else:
+ last_epoch += interval
- if verbose:
- print(time, end="\r")
+ if verbose:
+ print(time, end="\r")
- # this time epoch is complete, assemble the data.
- data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
+ # this time epoch is complete, assemble the data.
+ data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
# %% patch SV names in case of "G 7" => "G07"
data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
@@ -140,16 +236,17 @@ def rinexobs3(
data.attrs["interval"] = np.nan
data.attrs["rinextype"] = "obs"
- data.attrs["fast_processing"] = 0 # bool is not allowed in NetCDF4
+ data.attrs["fast_processing"] = int(fast) # bool is not allowed in NetCDF4
data.attrs["time_system"] = determine_time_system(hdr)
- if isinstance(fn, Path):
- data.attrs["filename"] = fn.name
+ if isinstance(file_name, Path):
+ data.attrs["filename"] = file_name.name
if "position" in hdr.keys():
data.attrs["position"] = hdr["position"]
if ecef2geodetic is not None:
data.attrs["position_geodetic"] = hdr["position_geodetic"]
-
+ if "rxmodel" in hdr.keys():
+ obs.attrs["rxmodel"] = hdr["rxmodel"]
if time_offset:
data.attrs["time_offset"] = time_offset
@@ -219,6 +316,7 @@ def _epoch(
darr = np.atleast_2d(
np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
)
+ # data = xarray.Dataset({}, coords={"time": [], "sv": []})
# %% assign data for each time step
for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.)
# satellite indices "si" to extract from this time's measurements
@@ -246,6 +344,7 @@ def _epoch(
epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv})
if len(data) == 0:
data = epoch_data
+ # data = xarray.merge((data, epoch_data))
elif len(hdr["fields"]) == 1: # one satellite system selected, faster to process
data = xarray.concat((data, epoch_data), dim="time")
else: # general case, slower for different satellite systems all together
@@ -254,6 +353,223 @@ def _epoch(
return data
+def rinexsystem3(
+ fn: T.TextIO | Path,
+ use: set[str] = None,
+ tlim: tuple[datetime, datetime] = None,
+ useindicators: bool = False,
+ meas: list[str] = None,
+ verbose: bool = False,
+ *,
+ fast: bool = False,
+ interval: float | int | timedelta = None,
+) -> xarray.Dataset:
+ """
+ process RINEX 3 OBS data
+
+ fn: RINEX OBS 3 filename
+ use: 'G' or ['G', 'R'] or similar
+
+ tlim: read between these time bounds
+ useindicators: SSI, LLI are output
+ meas: 'L1C' or ['L1C', 'C1C'] or similar
+
+ fast:
+ TODO: FUTURE, not yet enabled for OBS3
+ speculative preallocation based on minimum SV assumption and file size.
+ Avoids double-reading file and more complicated linked lists.
+ Believed that Numpy array should be faster than lists anyway.
+ Reduce Nsvmin if error (let us know)
+
+ interval: allows decimating file read by time e.g. every 5 seconds.
+ Useful to speed up reading of very large RINEX files
+ """
+
+ interval = check_time_interval(interval)
+
+ if isinstance(use, str):
+ use = {use}
+
+ if isinstance(meas, str):
+ meas = [meas]
+
+ if not meas or not meas[0].strip():
+ meas = None
+ # %% allocate
+ times = obstime3(fn)
+ # data = xarray.Dataset({}, coords={"time": [], "sv": []})
+ if tlim is not None and not isinstance(tlim[0], datetime):
+ raise TypeError("time bounds are specified as datetime.datetime")
+
+ last_epoch = None
+ # %% loop
+ with opener(fn) as f:
+ if fast:
+ hdr = obsheader3(f)
+ else:
+ hdr = obsheader3(f, use, meas)
+
+ obl = []
+ for sk in hdr["fields"]:
+ obl = obl + hdr["fields"][sk]
+ obl = np.unique(np.array(obl))
+ obl = obl[np.argsort([i[1:] + i[0] for i in iter(obl)])]
+
+ if meas is not None:
+ obl = obl[np.isin(obl, meas)]
+
+ Nt = times.size
+ Npages = len(obl)
+ Nsv = int(hdr["# OF SATELLITES"])
+
+ svl = np.tile(" ", Nsv)
+
+ data = np.empty((Npages, Nt, Nsv))
+ data.fill(np.nan)
+
+ if useindicators:
+ data_lli = np.full_like(data, np.nan)
+ data_ssi = np.full_like(data, np.nan)
+
+ # %% process OBS file
+ time_offset = []
+ for ln in f:
+ if not ln.startswith(">"): # end of file
+ break
+
+ try:
+ time = _timeobs(ln)
+ except ValueError: # garbage between header and RINEX data
+ logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
+ continue
+
+ try:
+ time_offset.append(float(ln[41:56]))
+ except ValueError:
+ pass
+ # %% get SV indices
+ sv = []
+ raw = ""
+ # Number of visible satellites this time %i3 pg. A13
+ for _ in range(int(ln[33:35])):
+ ln = f.readline()
+ if use is None or ln[0] in use:
+ sv.append(ln[:3])
+ raw += ln[3:]
+
+ if tlim is not None:
+ if time < tlim[0]:
+ continue
+ elif time > tlim[1]:
+ break
+
+ if interval is not None:
+ if last_epoch is None: # initialization
+ last_epoch = time
+ else:
+ if time - last_epoch < interval:
+ continue
+ else:
+ last_epoch += interval
+
+ if verbose:
+ print(time, end="\r")
+
+ for k in sv:
+ if k not in svl:
+ svl[np.argmax(svl == " ")] = k
+
+ darr = np.atleast_2d(
+ np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
+ )
+
+ t = time == times
+
+ for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.)
+ # satellite indices "si" to extract from this time's measurements
+ si = [i for i, s in enumerate(sv) if s[0] in sk]
+ if len(si) == 0: # no SV of this system "sk" at this time
+ continue
+
+ gsv = np.array(sv)[si]
+ isv = [i for i, s in enumerate(svl) if s in gsv]
+
+ for i, j in enumerate(hdr["fields"][sk]):
+ o = obl == j
+ data[o, t, isv] = darr[si, i * 3]
+ if useindicators:
+ data_lli[o, t, isv] = darr[si, i * 3 + 1]
+ data_ssi[o, t, isv] = darr[si, i * 3 + 2]
+
+ if " " in svl:
+ svl = svl[: np.argmax(svl == " ")]
+
+ svl, isv = np.unique(svl, return_index=True)
+
+ data = data[:, :, isv]
+ if useindicators:
+ data_lli = data_lli[:, :, isv]
+ data_ssi = data_ssi[:, :, isv]
+
+ obs = xarray.Dataset(coords={"time": times, "sv": svl})
+ for i, k in enumerate(obl):
+ # FIXME: for limited time span reads, this drops unused data variables
+ # if np.isnan(data[i, ...]).all():
+ # continue
+ if k is None:
+ continue
+ obs[k] = (("time", "sv"), data[i, :, :])
+ if useindicators:
+ if k[0] == "L":
+ obs[k + "lli"] = (("time", "sv"), data_lli[i, :, :])
+ obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :])
+ elif k[0] == "C":
+ obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :])
+
+ obs = obs.dropna(dim="sv", how="all")
+ obs = obs.dropna(dim="time", how="all") # when tlim specified
+
+ data = obs
+ # %% patch SV names in case of "G 7" => "G07"
+ data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
+ # %% other attributes
+ data.attrs["version"] = hdr["version"]
+
+ # Get interval from header or derive it from the data
+ if "interval" in hdr.keys():
+ data.attrs["interval"] = hdr["interval"]
+ elif "time" in data.coords.keys():
+ # median is robust against gaps
+ try:
+ data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s"))
+ except TypeError:
+ pass
+ else:
+ data.attrs["interval"] = np.nan
+
+ data.attrs["rinextype"] = "obs"
+ data.attrs["fast_processing"] = 0 # bool is not allowed in NetCDF4
+ data.attrs["time_system"] = determine_time_system(hdr)
+ if isinstance(fn, Path):
+ data.attrs["filename"] = fn.name
+
+ if "position" in hdr.keys():
+ data.attrs["position"] = hdr["position"]
+ if ecef2geodetic is not None:
+ data.attrs["position_geodetic"] = hdr["position_geodetic"]
+
+ if time_offset:
+ data.attrs["time_offset"] = time_offset
+
+ if "RCV CLOCK OFFS APPL" in hdr.keys():
+ try:
+ data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"])
+ except ValueError:
+ pass
+
+ return data
+
+
def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]:
"""
handle LLI (loss of lock) and SSI (signal strength)
@@ -352,7 +668,7 @@ def obsheader3(
fields = {k: fields[k] for k in use if k in fields}
# perhaps this could be done more efficiently, but it's probably low impact on overall program.
- # simple set and frozenset operations do NOT preserve order, which would completely mess up reading!
+ # simple set and frozenset operations do NOT preserve order, which would mess up reading!
sysind: dict[str, T.Any] = {}
if isinstance(meas, (tuple, list, np.ndarray)):
for sk in fields: # iterate over each system
diff --git a/src/georinex/obs4.py b/src/georinex/obs4.py
new file mode 100644
index 0000000..5113e85
--- /dev/null
+++ b/src/georinex/obs4.py
@@ -0,0 +1,474 @@
+from __future__ import annotations
+from pathlib import Path
+import numpy as np
+import logging
+from datetime import datetime, timedelta
+import io
+import xarray
+import re
+import typing as T
+
+try:
+ from pymap3d import ecef2geodetic
+except ImportError:
+ ecef2geodetic = None
+#
+from .rio import opener, rinexinfo
+from .common import determine_time_system, check_time_interval, check_unique_times
+
+"""https://github.com/mvglasow/satstat/wiki/NMEA-IDs"""
+
+SBAS = 100 # offset for ID
+GLONASS = 37
+QZSS = 192
+BEIDOU = 0
+
+
+def rinexobs4(
+ fn: T.TextIO | Path,
+ use: set[str] = None,
+ tlim: tuple[datetime, datetime] = None,
+ useindicators: bool = False,
+ meas: list[str] = None,
+ verbose: bool = False,
+ *,
+ fast: bool = False,
+ interval: float | int | timedelta = None,
+) -> xarray.Dataset:
+ """
+ process RINEX 4 OBS data
+
+ fn: RINEX OBS 3 filename
+ use: 'G' or ['G', 'R'] or similar
+
+ tlim: read between these time bounds
+ useindicators: SSI, LLI are output
+ meas: 'L1C' or ['L1C', 'C1C'] or similar
+
+ fast: Loads entire file into memory as a string
+ TODO: time interval, time offset, etc.
+ if false, uses old _epoch method
+
+ interval: allows decimating file read by time e.g. every 5 seconds.
+ Useful to speed up reading of very large RINEX files
+ """
+
+ interval = check_time_interval(interval)
+
+ if isinstance(use, str):
+ use = {use}
+
+ if isinstance(meas, str):
+ meas = [meas]
+
+ if not meas or not meas[0].strip():
+ meas = None
+
+ if isinstance(fn, Path):
+ file_name = fn
+ else:
+ file_name = None
+
+ # %% allocate
+ if fast:
+ with opener(fn) as f:
+ fn = [ln for ln in f]
+ times = obstime4(fn)
+ else:
+ data = xarray.Dataset({}, coords={"time": [], "sv": []})
+
+ if tlim is not None and not isinstance(tlim[0], datetime):
+ raise TypeError("time bounds are specified as datetime.datetime")
+
+ last_epoch = None
+ # %% loop
+ with opener(fn) as f:
+ if fast:
+ hdr = obsheader4(f, use)
+
+ obl = []
+ for sk in hdr["fields"]:
+ obl = obl + hdr["fields"][sk]
+ obl = np.unique(np.array(obl))
+ obl = obl[np.argsort([i[1:] + i[0] for i in iter(obl)])]
+
+ if meas is not None:
+ obl = obl[np.any([np.char.find(obl, j) == 0 for i, j in enumerate(meas)], 0)]
+
+ Nt = times.size
+ Npages = len(obl)
+
+ f = np.array([f for f in f if f[1:3] != " "]) # don't need opener anymore
+
+ sv = np.array([ln[:3] for ln in f])
+ (usv, isv) = np.unique(sv, return_inverse=True) # get all sv lines
+
+ # list of epoch numbers per datum
+ timeIndex = np.cumsum(isv == np.flatnonzero(np.char.rfind(usv, ">") == 0)) - 1
+
+ # usv contains epoch entries too, get only sv
+ svl = usv[np.logical_not(np.char.startswith(usv, ">"))]
+ Nsv = svl.size
+
+ data = np.full((Npages, Nt, Nsv), np.nan)
+
+ if useindicators:
+ data_lli = np.full_like(data, np.nan)
+ data_ssi = np.full_like(data, np.nan)
+
+ time_offset = [] # TODO: fill this list
+
+ for i, sv in enumerate(usv):
+ if re.match(r"[A-Z][ \d]\d", sv) is None:
+ # time offset processing here
+ continue
+ elif use is not None and sv[0] not in use:
+ continue
+
+ # genfromtext is bottleneck, limit calls this way
+ darr = np.atleast_2d(
+ np.genfromtxt(
+ np.char.lstrip(f[isv == i], sv), delimiter=(14, 1, 1) * hdr["Fmax"]
+ )
+ )
+
+ t_i = timeIndex[isv == i]
+
+ # jsv == i-1 except for files spanning 1999-2000
+ jsv = svl == sv
+
+ for j, jobl in enumerate(hdr["fields"][sv[0]]):
+ o = obl == jobl
+ if not np.any(o):
+ continue
+
+ data[o, t_i, jsv] = darr[:, j * 3]
+
+ if useindicators:
+ data_lli[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 1]
+ data_ssi[np.ix_(o, t_i, jsv)] = darr[:, j * 3 + 2]
+
+ obs = xarray.Dataset(coords={"sv": svl, "time": times})
+ for i, k in enumerate(obl):
+ if k is None:
+ continue
+ elif np.all(np.isnan(data[i, :, :])):
+ # drop all nan datasets like tests expect
+ continue
+ obs[k] = (("time", "sv"), data[i, :, :])
+ if useindicators:
+ if k[0] == "L":
+ obs[k + "lli"] = (("time", "sv"), data_lli[i, :, :])
+ obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :])
+ # elif k[0] == 'C': # only for code?
+ else:
+ obs[k + "ssi"] = (("time", "sv"), data_ssi[i, :, :])
+
+ obs = obs.dropna(dim="sv", how="all")
+ # obs = obs.dropna(dim="time", how="all") # when tlim specified
+
+ data = obs
+
+ else:
+ hdr = obsheader4(f, use, meas)
+ # process OBS file
+ time_offset = []
+ for ln in f:
+ if not ln.startswith(">"): # end of file
+ break
+
+ try:
+ time = _timeobs(ln)
+ except ValueError: # garbage between header and RINEX data
+ logging.debug(f"garbage detected in {fn}, trying to parse at next time step")
+ continue
+
+ try:
+ time_offset.append(float(ln[41:56]))
+ except ValueError:
+ pass
+ # %% get SV indices
+ sv = []
+ raw = ""
+ # Number of visible satellites this time %i3 pg. A13
+ for _ in range(int(ln[33:35])):
+ ln = f.readline()
+ if use is None or ln[0] in use:
+ sv.append(ln[:3])
+ raw += ln[3:]
+
+ if tlim is not None:
+ if time < tlim[0]:
+ continue
+ elif time > tlim[1]:
+ break
+
+ if interval is not None:
+ if last_epoch is None: # initialization
+ last_epoch = time
+ else:
+ if time - last_epoch < interval:
+ continue
+ else:
+ last_epoch += interval
+
+ if verbose:
+ print(time, end="\r")
+
+ # this time epoch is complete, assemble the data.
+ data = _epoch(data, raw, hdr, time, sv, useindicators, verbose)
+
+ # %% patch SV names in case of "G 7" => "G07"
+ data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()])
+ # %% other attributes
+ data.attrs["version"] = hdr["version"]
+
+ # Get interval from header or derive it from the data
+ if "interval" in hdr.keys():
+ data.attrs["interval"] = hdr["interval"]
+ elif "time" in data.coords.keys():
+ # median is robust against gaps
+ try:
+ data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s"))
+ except TypeError:
+ pass
+ else:
+ data.attrs["interval"] = np.nan
+
+ data.attrs["rinextype"] = "obs"
+ data.attrs["fast_processing"] = int(fast) # bool is not allowed in NetCDF4
+ data.attrs["time_system"] = determine_time_system(hdr)
+ if isinstance(file_name, Path):
+ data.attrs["filename"] = file_name.name
+
+ if "position" in hdr.keys():
+ data.attrs["position"] = hdr["position"]
+ if ecef2geodetic is not None:
+ data.attrs["position_geodetic"] = hdr["position_geodetic"]
+ if "rxmodel" in hdr.keys():
+ obs.attrs["rxmodel"] = hdr["rxmodel"]
+ if time_offset:
+ data.attrs["time_offset"] = time_offset
+
+ if "RCV CLOCK OFFS APPL" in hdr.keys():
+ try:
+ data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"])
+ except ValueError:
+ pass
+
+ return data
+
+
+def _timeobs(ln: str) -> datetime:
+ """
+ convert time from RINEX 3 OBS text to datetime
+ """
+
+ if not ln.startswith("> "): # pg. A13
+ raise ValueError('RINEX 3 line beginning "> " is not present')
+
+ return datetime(
+ int(ln[2:6]),
+ int(ln[7:9]),
+ int(ln[10:12]),
+ hour=int(ln[13:15]),
+ minute=int(ln[16:18]),
+ second=int(ln[19:21]),
+ microsecond=int(float(ln[19:29]) % 1 * 1000000),
+ )
+
+
+def obstime4(fn: T.TextIO | Path, verbose: bool = False) -> np.ndarray:
+ """
+ return all times in RINEX file
+ """
+
+ times = []
+
+ with opener(fn) as f:
+ for ln in f:
+ if ln.startswith("> "):
+ try:
+ times.append(_timeobs(ln))
+ except (ValueError, IndexError):
+ logging.debug(f"was not a time:\n{ln}")
+ continue
+
+ times = np.asarray(times)
+
+ check_unique_times(times)
+
+ return times
+
+
+def _epoch(
+ data: xarray.Dataset,
+ raw: str,
+ hdr: dict[str, T.Any],
+ time: datetime,
+ sv: list[str],
+ useindicators: bool,
+ verbose: bool,
+) -> xarray.Dataset:
+ """
+ block processing of each epoch (time step)
+ """
+ darr = np.atleast_2d(
+ np.genfromtxt(io.BytesIO(raw.encode("ascii")), delimiter=(14, 1, 1) * hdr["Fmax"])
+ )
+ # data = xarray.Dataset({}, coords={"time": [], "sv": []})
+ # %% assign data for each time step
+ for sk in hdr["fields"]: # for each satellite system type (G,R,S, etc.)
+ # satellite indices "si" to extract from this time's measurements
+ si = [i for i, s in enumerate(sv) if s[0] in sk]
+ if len(si) == 0: # no SV of this system "sk" at this time
+ continue
+
+ # measurement indices "di" to extract at this time step
+ di = hdr["fields_ind"][sk]
+ garr = darr[si, :]
+ garr = garr[:, di]
+
+ gsv = np.array(sv)[si]
+
+ dsf: dict[str, tuple] = {}
+ for i, k in enumerate(hdr["fields"][sk]):
+ dsf[k] = (("time", "sv"), np.atleast_2d(garr[:, i * 3]))
+
+ if useindicators:
+ dsf = _indicators(dsf, k, garr[:, i * 3 + 1 : i * 3 + 3])
+
+ if verbose:
+ print(time, "\r", end="")
+
+ epoch_data = xarray.Dataset(dsf, coords={"time": [time], "sv": gsv})
+ if len(data) == 0:
+ data = epoch_data
+ # data = xarray.merge((data, epoch_data))
+ elif len(hdr["fields"]) == 1: # one satellite system selected, faster to process
+ data = xarray.concat((data, epoch_data), dim="time")
+ else: # general case, slower for different satellite systems all together
+ data = xarray.merge((data, epoch_data))
+
+ return data
+
+
+def _indicators(d: dict, k: str, arr: np.ndarray) -> dict[str, tuple]:
+ """
+ handle LLI (loss of lock) and SSI (signal strength)
+ """
+ if k.startswith(("L1", "L2")):
+ d[k + "lli"] = (("time", "sv"), np.atleast_2d(arr[:, 0]))
+
+ d[k + "ssi"] = (("time", "sv"), np.atleast_2d(arr[:, 1]))
+
+ return d
+
+
+def obsheader4(f: T.TextIO, use: set[str] = None, meas: list[str] = None) -> dict[str, T.Any]:
+ """
+ get RINEX 4 OBS types, for each system type
+ optionally, select system type and/or measurement type to greatly
+ speed reading and save memory (RAM, disk)
+ """
+ if isinstance(f, (str, Path)):
+ with opener(f, header=True) as h:
+ return obsheader4(h, use, meas)
+
+ fields = {}
+ Fmax = 0
+
+ # %% first line
+ hdr = rinexinfo(f)
+
+ for ln in f:
+ if "END OF HEADER" in ln:
+ break
+
+ hd = ln[60:80]
+ c = ln[:60]
+ if "SYS / # / OBS TYPES" in hd:
+ k = c[0]
+ fields[k] = c[6:60].split()
+ N = int(c[3:6])
+ # %% maximum number of fields in a file, to allow fast Numpy parse.
+ Fmax = max(N, Fmax)
+
+ n = N - 13
+ while n > 0: # Rinex 3.03, pg. A6, A7
+ ln = f.readline()
+ assert "SYS / # / OBS TYPES" in ln[60:]
+ fields[k] += ln[6:60].split()
+ n -= 13
+
+ assert len(fields[k]) == N
+
+ continue
+
+ if hd.strip() not in hdr: # Header label
+ hdr[hd.strip()] = c # don't strip for fixed-width parsers
+ # string with info
+ else: # concatenate to the existing string
+ hdr[hd.strip()] += " " + c
+
+ # %% list with x,y,z cartesian (OPTIONAL)
+ # Rinex 3.03, pg. A6, Table A2
+ try:
+ # some RINEX files have bad headers with mulitple APPROX POSITION XYZ.
+ # we choose to use the first such header.
+ hdr["position"] = [float(j) for j in hdr["APPROX POSITION XYZ"].split()][:3]
+ if ecef2geodetic is not None and len(hdr["position"]) == 3:
+ hdr["position_geodetic"] = ecef2geodetic(*hdr["position"])
+ except (KeyError, ValueError):
+ pass
+ # %% time
+ try:
+ t0s = hdr["TIME OF FIRST OBS"]
+ # NOTE: must do second=int(float()) due to non-conforming files
+ hdr["t0"] = datetime(
+ year=int(t0s[:6]),
+ month=int(t0s[6:12]),
+ day=int(t0s[12:18]),
+ hour=int(t0s[18:24]),
+ minute=int(t0s[24:30]),
+ second=int(float(t0s[30:36])),
+ microsecond=int(float(t0s[30:43]) % 1 * 1000000),
+ )
+ except (KeyError, ValueError):
+ pass
+
+ try:
+ hdr["interval"] = float(hdr["INTERVAL"][:10])
+ except (KeyError, ValueError):
+ pass
+ # %% select specific satellite systems only (optional)
+ if use:
+ if not set(fields.keys()).intersection(use):
+ raise KeyError(f"system type {use} not found in RINEX file")
+
+ fields = {k: fields[k] for k in use if k in fields}
+
+ # perhaps this could be done more efficiently, but it's probably low impact on overall program.
+ # simple set and frozenset operations do NOT preserve order, which would mess up reading!
+ sysind: dict[str, T.Any] = {}
+ if isinstance(meas, (tuple, list, np.ndarray)):
+ for sk in fields: # iterate over each system
+ # ind = np.isin(fields[sk], meas) # boolean vector
+ ind = np.zeros(len(fields[sk]), dtype=bool)
+ for m in meas:
+ for i, field in enumerate(fields[sk]):
+ if field.startswith(m):
+ ind[i] = True
+
+ fields[sk] = np.array(fields[sk])[ind].tolist()
+ sysind[sk] = np.empty(Fmax * 3, dtype=bool) # *3 due to LLI, SSI
+ for j, i in enumerate(ind):
+ sysind[sk][j * 3 : j * 3 + 3] = i
+ else:
+ sysind = {k: np.s_[:] for k in fields}
+
+ hdr["fields"] = fields
+ hdr["fields_ind"] = sysind
+ hdr["Fmax"] = Fmax
+
+ return hdr
diff --git a/src/georinex/rio.py b/src/georinex/rio.py
index 9a0f66c..befbdd0 100644
--- a/src/georinex/rio.py
+++ b/src/georinex/rio.py
@@ -15,7 +15,7 @@
@contextmanager
-def opener(fn: T.TextIO | Path, header: bool = False) -> T.Iterator[T.TextIO]:
+def opener(fn: T.TextIO | Path | list, header: bool = False, encoding="ascii") -> T.Iterator[T.TextIO]:
"""provides file handle for regular ASCII or gzip files transparently"""
if isinstance(fn, str):
fn = Path(fn).expanduser()
@@ -23,57 +23,96 @@ def opener(fn: T.TextIO | Path, header: bool = False) -> T.Iterator[T.TextIO]:
if isinstance(fn, io.StringIO):
fn.seek(0)
yield fn
+ elif isinstance(fn, list):
+ f = ''.join(fn)
+ fn = io.StringIO(f)
+ yield fn
elif isinstance(fn, Path):
# need to have this check for Windows
if not fn.is_file():
raise FileNotFoundError(fn)
- finf = fn.stat()
- if finf.st_size > 100e6:
- logging.info(f"opening {finf.st_size/1e6} MByte {fn.name}")
-
- suffix = fn.suffix.lower()
-
- if suffix == ".gz":
- with gzip.open(fn, "rt") as f:
- _, is_crinex = rinex_version(first_nonblank_line(f))
- f.seek(0)
-
- if is_crinex and not header:
- # Conversion to string is necessary because of a quirk where gzip.open()
- # even with 'rt' doesn't decompress until read.
- f = io.StringIO(crx2rnx(f.read()))
- yield f
- elif suffix == ".bz2":
- # this is for plain bzip2 files, NOT tar.bz2, which requires f.seek(512)
- with bz2.open(fn, "rt") as f:
- _, is_crinex = rinex_version(first_nonblank_line(f))
- f.seek(0)
-
- if is_crinex and not header:
- f = io.StringIO(crx2rnx(f.read()))
- yield f
- elif suffix == ".zip":
- with zipfile.ZipFile(fn, "r") as z:
- flist = z.namelist()
- for rinexfn in flist:
- with z.open(rinexfn, "r") as bf:
- f = io.StringIO(
- io.TextIOWrapper(bf, encoding="ascii", errors="ignore").read() # type: ignore
- )
- yield f
- elif suffix == ".z":
- with fn.open("rb") as zu:
- with io.StringIO(unlzw(zu.read()).decode("ascii")) as f:
+ try:
+ finf = fn.stat()
+ if finf.st_size > 100e6:
+ logging.info(f"opening {finf.st_size/1e6} MByte {fn.name}")
+
+ suffix = fn.suffix.lower()
+
+ if suffix == ".gz":
+ with gzip.open(fn, "rt", encoding=encoding) as f:
+
+ _, is_crinex = rinex_version(first_nonblank_line(f))
+ f.seek(0)
+
+ if is_crinex and not header:
+ # Conversion to string is necessary because of a quirk where gzip.open()
+ # even with 'rt' doesn't decompress until read.
+ f = io.StringIO(crx2rnx(f.read()))
+ yield f
+ elif suffix == ".bz2":
+ # this is for plain bzip2 files, NOT tar.bz2, which requires f.seek(512)
+ with bz2.open(fn, "rt", encoding=encoding) as f:
+ _, is_crinex = rinex_version(first_nonblank_line(f))
+ f.seek(0)
+
+ if is_crinex and not header:
+ f = io.StringIO(crx2rnx(f.read()))
yield f
- else: # assume not compressed (or Hatanaka)
- with fn.open("r", encoding="ascii", errors="ignore") as f:
- _, is_crinex = rinex_version(first_nonblank_line(f))
- f.seek(0)
-
- if is_crinex and not header:
- f = io.StringIO(crx2rnx(f))
- yield f
+ elif suffix == ".zip":
+ with zipfile.ZipFile(fn, "r") as z:
+ flist = z.namelist()
+ for rinexfn in flist:
+ with z.open(rinexfn, "r") as bf:
+ f = io.StringIO(
+ io.TextIOWrapper(bf, encoding=encoding, errors="ignore").read() # type: ignore
+ )
+ yield f
+ elif suffix == ".z":
+ if unlzw is None:
+ raise ImportError("pip install unlzw3")
+ try:
+ with fn.open("rb") as zu:
+ with io.StringIO(unlzw(zu.read()).decode(encoding)) as f:
+ _, is_crinex = rinex_version(first_nonblank_line(f))
+ f.seek(0)
+
+ if is_crinex and not header:
+ # Conversion to string is necessary because of a quirk where gzip.open()
+ # even with 'rt' doesn't decompress until read.
+ f = io.StringIO(crx2rnx(f.read()))
+ yield f
+ except ValueError as e:
+ if 'magic bytes' in e.args[0]:
+ # sometimes .Z files are actually gzipped
+ with gzip.open(fn, "rt", encoding=encoding) as f:
+ _, is_crinex = rinex_version(first_nonblank_line(f))
+ f.seek(0)
+
+ if is_crinex and not header:
+ # Conversion to string is necessary because of a quirk where gzip.open()
+ # even with 'rt' doesn't decompress until read.
+ f = io.StringIO(crx2rnx(f.read()))
+ yield f
+
+ else: # assume not compressed (or Hatanaka)
+ with fn.open("r", encoding=encoding, errors="ignore") as f:
+ _, is_crinex = rinex_version(first_nonblank_line(f))
+ f.seek(0)
+
+ if is_crinex and not header:
+ f = io.StringIO(crx2rnx(f))
+ yield f
+ except UnicodeDecodeError as e:
+ # some RINEX files have awkward encoding
+ if encoding == "ascii":
+ with opener(fn, header, encoding="latin-1") as f:
+ yield f
+ elif encoding == "latin-1":
+ with opener(fn, header, encoding="utf-8") as f:
+ yield f
+ else:
+ raise(e)
else:
raise OSError(f"Unsure what to do with input of type: {type(fn)}")
diff --git a/src/georinex/tests/test_rinex.py b/src/georinex/tests/test_rinex.py
index 35c1740..5c8f26e 100644
--- a/src/georinex/tests/test_rinex.py
+++ b/src/georinex/tests/test_rinex.py
@@ -61,7 +61,7 @@ def test_minimal(tmp_path, filename):
if int(dat.version) == 2:
assert dat.fast_processing
elif int(dat.version) == 3:
- assert not dat.fast_processing # FIXME: update when OBS3 fast processing is added.
+ assert dat.fast_processing
def test_dont_care_file_extension():