Skip to content

Commit e51b97f

Browse files
authored
Merge pull request #3064 from mraspaud/feature-hrit-remote
Add remote reading for hrit seviri
2 parents ef3618f + 17cf0ce commit e51b97f

File tree

8 files changed

+226
-36
lines changed

8 files changed

+226
-36
lines changed

continuous_integration/environment.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ dependencies:
4646
- pytest
4747
- pytest-cov
4848
- fsspec
49+
- universal_pathlib
4950
- botocore>=1.33
5051
- s3fs
5152
- python-geotiepoints

satpy/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# You should have received a copy of the GNU General Public License along with
1717
# satpy. If not, see <http://www.gnu.org/licenses/>.
1818
"""Pytest configuration and setup functions."""
19+
import pytest
1920

2021

2122
def pytest_configure(config):
@@ -28,3 +29,9 @@ def pytest_unconfigure(config):
2829
"""Undo previous configurations."""
2930
from satpy import aux_download
3031
aux_download.RUNNING_TESTS = False
32+
33+
34+
@pytest.fixture(scope="session")
35+
def session_tmp_path(tmp_path_factory):
36+
"""Generate a single temp path to use for the entire session."""
37+
return tmp_path_factory.mktemp("data")

satpy/readers/hrit_base.py

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
from pyresample import geometry
3737

3838
import satpy.readers.utils as utils
39-
from satpy.readers import FSFile
4039
from satpy.readers.eum_base import time_cds_short
4140
from satpy.readers.file_handlers import BaseFileHandler
4241
from satpy.readers.seviri_base import dec10216
@@ -88,14 +87,18 @@
8887
}
8988

9089

91-
def decompress(infile):
90+
def decompress_file(infile) -> bytes:
9291
"""Decompress an XRIT data file and return the decompressed buffer."""
93-
from pyPublicDecompWT import xRITDecompress
94-
9592
# decompress in-memory
9693
with open(infile, mode="rb") as fh:
97-
xrit = xRITDecompress()
98-
xrit.decompress(fh.read())
94+
return decompress_buffer(fh.read())
95+
96+
97+
def decompress_buffer(buffer) -> bytes:
98+
"""Decompress buffer."""
99+
from pyPublicDecompWT import xRITDecompress
100+
xrit = xRITDecompress()
101+
xrit.decompress(buffer)
99102

100103
return xrit.data()
101104

@@ -117,28 +120,31 @@ class HRITFileHandler(BaseFileHandler):
117120

118121
def __init__(self, filename, filename_info, filetype_info, hdr_info):
119122
"""Initialize the reader."""
120-
super(HRITFileHandler, self).__init__(filename, filename_info,
121-
filetype_info)
123+
super().__init__(filename, filename_info, filetype_info)
122124

123125
self.mda = {}
124126
self.hdr_info = hdr_info
125127
self._get_hd(self.hdr_info)
126128
self._start_time = filename_info["start_time"]
127129
self._end_time = self._start_time + dt.timedelta(minutes=15)
128130

129-
def _get_hd(self, hdr_info):
131+
def _get_hd(self, hdr_info, verbose=False):
130132
"""Open the file, read and get the basic file header info and set the mda dictionary."""
131133
hdr_map, variable_length_headers, text_headers = hdr_info
132-
133134
with utils.generic_open(self.filename, mode="rb") as fp:
134135
total_header_length = 16
135136
while fp.tell() < total_header_length:
136137
hdr_id = get_header_id(fp)
138+
if verbose:
139+
print("hdr_id") # noqa: T201
140+
print(f'np.void({hdr_id}, dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),') # noqa: T201
137141
the_type = hdr_map[hdr_id["hdr_id"]]
138142
if the_type in variable_length_headers:
139143
field_length = int((hdr_id["record_length"] - 3) /
140144
the_type.itemsize)
141145
current_hdr = get_header_content(fp, the_type, field_length)
146+
if verbose:
147+
print(f"np.zeros(({field_length}, ), dtype={the_type}),") # noqa: T201
142148
key = variable_length_headers[the_type]
143149
if key in self.mda:
144150
if not isinstance(self.mda[key], list):
@@ -152,9 +158,13 @@ def _get_hd(self, hdr_info):
152158
char = list(the_type.fields.values())[0][0].char
153159
new_type = np.dtype(char + str(field_length))
154160
current_hdr = get_header_content(fp, new_type)[0]
161+
if verbose:
162+
print(f'np.array({current_hdr}, dtype="{new_type}"),') # noqa: T201
155163
self.mda[text_headers[the_type]] = current_hdr
156164
else:
157165
current_hdr = get_header_content(fp, the_type)[0]
166+
if verbose:
167+
print(f"np.void({current_hdr}, dtype={the_type}),") # noqa: T201
158168
self.mda.update(
159169
dict(zip(current_hdr.dtype.names, current_hdr)))
160170

@@ -318,7 +328,7 @@ def _read_data_from_file(self):
318328
return self._read_data_from_disk()
319329

320330
def _is_file_like(self):
321-
return isinstance(self.filename, FSFile)
331+
return not isinstance(self.filename, str)
322332

323333
def _read_data_from_disk(self):
324334
# For reading the image data, unzip_context is faster than generic_open
@@ -327,7 +337,7 @@ def _read_data_from_disk(self):
327337

328338
if self.compressed:
329339
return np.frombuffer(
330-
decompress(fn),
340+
decompress_file(fn),
331341
offset=self.offset,
332342
dtype=dtype,
333343
count=np.prod(shape)
@@ -344,12 +354,15 @@ def _read_file_like(self):
344354
# filename is likely to be a file-like object, already in memory
345355
dtype, shape = self._get_input_info()
346356
with utils.generic_open(self.filename, mode="rb") as fp:
357+
decompressed_buffer = fp.read()
358+
if self.compressed:
359+
decompressed_buffer = decompress_buffer(decompressed_buffer)
347360
no_elements = np.prod(shape)
348-
fp.seek(self.offset)
349361
return np.frombuffer(
350-
fp.read(np.dtype(dtype).itemsize * no_elements),
362+
decompressed_buffer,
351363
dtype=np.dtype(dtype),
352-
count=no_elements.item()
364+
count=no_elements.item(),
365+
offset=self.offset
353366
).reshape(shape)
354367

355368
def _get_input_info(self):

satpy/readers/seviri_l1b_hrit.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ class HRITMSGPrologueEpilogueBase(HRITFileHandler):
312312

313313
def __init__(self, filename, filename_info, filetype_info, hdr_info):
314314
"""Initialize the file handler for prologue and epilogue files."""
315-
super(HRITMSGPrologueEpilogueBase, self).__init__(filename, filename_info, filetype_info, hdr_info)
315+
super().__init__(filename, filename_info, filetype_info, hdr_info)
316316
self._reduced = None
317317

318318
def _reduce(self, mda, max_size):
@@ -333,11 +333,11 @@ def __init__(self, filename, filename_info, filetype_info, calib_mode="nominal",
333333
ext_calib_coefs=None, include_raw_metadata=False,
334334
mda_max_array_size=None, fill_hrv=None, mask_bad_quality_scan_lines=None):
335335
"""Initialize the reader."""
336-
super(HRITMSGPrologueFileHandler, self).__init__(filename, filename_info,
337-
filetype_info,
338-
(msg_hdr_map,
339-
msg_variable_length_headers,
340-
msg_text_headers))
336+
super().__init__(filename, filename_info,
337+
filetype_info,
338+
(msg_hdr_map,
339+
msg_variable_length_headers,
340+
msg_text_headers))
341341
self.prologue = {}
342342
self.read_prologue()
343343

satpy/readers/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -355,10 +355,10 @@ def generic_open(filename, *args, **kwargs):
355355
fp = filename.open(*args, **kwargs)
356356
except AttributeError:
357357
fp = open(filename, *args, **kwargs)
358-
359-
yield fp
360-
361-
fp.close()
358+
try:
359+
yield fp
360+
finally:
361+
fp.close()
362362

363363

364364
def fromfile(filename, dtype, count=1, offset=0):

satpy/tests/reader_tests/test_hrit_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def test_read_band_filepath(self, stub_compressed_hrit_file):
241241
"""Test reading a single band from a filepath."""
242242
filename = stub_compressed_hrit_file
243243

244-
with mock.patch("satpy.readers.hrit_base.decompress", side_effect=fake_decompress) as mock_decompress:
244+
with mock.patch("satpy.readers.hrit_base.decompress_buffer", side_effect=fake_decompress) as mock_decompress:
245245
with mock.patch.object(HRITFileHandler, "_get_hd", side_effect=new_get_hd, autospec=True) as get_hd:
246246
self.reader = HRITFileHandler(filename,
247247
{"platform_shortname": "MSG3",

satpy/tests/reader_tests/test_seviri_l1b_hrit.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,21 @@
1919
"""The HRIT msg reader tests package."""
2020

2121
import datetime as dt
22+
import os
2223
import unittest
24+
import warnings
25+
import zipfile
2326
from unittest import mock
2427

28+
import fsspec
2529
import numpy as np
2630
import pytest
2731
import xarray as xr
2832
from numpy import testing as npt
2933
from pyproj import CRS
3034

3135
import satpy.tests.reader_tests.test_seviri_l1b_hrit_setup as setup
36+
from satpy.readers import FSFile
3237
from satpy.readers.seviri_l1b_hrit import HRITMSGEpilogueFileHandler, HRITMSGFileHandler, HRITMSGPrologueFileHandler
3338
from satpy.tests.reader_tests.test_seviri_base import ORBIT_POLYNOMIALS_INVALID
3439
from satpy.tests.reader_tests.test_seviri_l1b_calibration import TestFileHandlerCalibrationBase
@@ -502,3 +507,173 @@ def test_mask_bad_quality(self, file_handler):
502507
new_data[:, :] = np.nan
503508
expected = expected.copy(data=new_data)
504509
xr.testing.assert_equal(res, expected)
510+
511+
512+
@pytest.fixture(scope="session")
513+
def prologue_file(session_tmp_path, prologue_header_contents):
514+
"""Create a dummy prologue file."""
515+
from satpy.readers.seviri_l1b_native_hdr import hrit_prologue
516+
header = prologue_header_contents
517+
contents = np.void(1, dtype=hrit_prologue)
518+
contents["SatelliteStatus"]["SatelliteDefinition"]["SatelliteId"] = 324
519+
return create_file(session_tmp_path / "prologue", header + [contents])
520+
521+
522+
@pytest.fixture(scope="session")
523+
def prologue_header_contents():
524+
"""Get the contents of the header."""
525+
return [
526+
# prime header
527+
np.void((0, 16), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
528+
np.void((128, 90, 3403688),
529+
dtype=[("file_type", "u1"), ("total_header_length", ">u4"), ("data_field_length", ">u8")]),
530+
# second header
531+
np.void((4, 64), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
532+
np.array(b"H-000-MSG4__-MSG4________-_________-PRO______-201802281500-__", dtype="|S61"),
533+
# timestamp record
534+
np.void((5, 10), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
535+
np.void((64, (21973, 54911033)),
536+
dtype=[("cds_p_field", "u1"), ("timestamp", [("Days", ">u2"), ("Milliseconds", ">u4")])])
537+
]
538+
539+
540+
@pytest.fixture(scope="session")
541+
def epilogue_file(session_tmp_path, epilogue_header_contents):
542+
"""Create a dummy epilogue file."""
543+
from satpy.readers.seviri_l1b_native_hdr import hrit_epilogue
544+
header = epilogue_header_contents
545+
contents = np.void(1, dtype=hrit_epilogue)
546+
return create_file(session_tmp_path / "epilogue", header + [contents])
547+
548+
549+
@pytest.fixture(scope="session")
550+
def epilogue_header_contents():
551+
"""Get the contents of the header."""
552+
return [
553+
np.void((0, 16), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
554+
np.void((129, 90, 3042600),
555+
dtype=[("file_type", "u1"), ("total_header_length", ">u4"), ("data_field_length", ">u8")]),
556+
np.void((4, 64), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
557+
np.array(b"H-000-MSG4__-MSG4________-_________-EPI______-201802281500-__", dtype="|S61"),
558+
np.void((5, 10), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
559+
np.void((64, (21973, 54911033)),
560+
dtype=[("cds_p_field", "u1"), ("timestamp", [("Days", ">u2"), ("Milliseconds", ">u4")])]),
561+
]
562+
563+
564+
def create_file(filename, file_contents):
565+
"""Create an hrit file."""
566+
with open(filename, "wb") as fh:
567+
for array in file_contents:
568+
array.tofile(fh)
569+
return filename
570+
571+
572+
@pytest.fixture(scope="session")
573+
def segment_file(session_tmp_path):
574+
"""Create a segment_file."""
575+
cols = 3712
576+
lines = 464
577+
bpp = 10
578+
header = [
579+
np.void((0, 16), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
580+
np.void((0, 6198, 17223680), dtype=[("file_type", "u1"), ("total_header_length", ">u4"),
581+
("data_field_length", ">u8")]),
582+
np.void((1, 9), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
583+
np.void((bpp, cols, lines, 0), dtype=[("number_of_bits_per_pixel", "u1"), ("number_of_columns", ">u2"),
584+
("number_of_lines", ">u2"), ("compression_flag_for_data", "u1")]),
585+
np.void((2, 51), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
586+
np.void((b"GEOS(+000.0) ", -13642337, -13642337, 1856, 1856),
587+
dtype=[("projection_name", "S32"),
588+
("cfac", ">i4"), ("lfac", ">i4"),
589+
("coff", ">i4"), ("loff", ">i4")]),
590+
np.void((4, 64), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
591+
np.array(b"H-000-MSG4__-MSG4________-VIS008___-000001___-201802281500-__", dtype="|S61"),
592+
np.void((5, 10), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
593+
np.void((64, (21973, 54911033)), dtype=[("cds_p_field", "u1"), ("timestamp", [("Days", ">u2"),
594+
("Milliseconds", ">u4")])]),
595+
np.void((128, 13), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
596+
np.void((324, 2, 1, 1, 8, 0), dtype=[("GP_SC_ID", ">i2"), ("spectral_channel_id", "i1"),
597+
("segment_sequence_number", ">u2"),
598+
("planned_start_segment_number", ">u2"),
599+
("planned_end_segment_number", ">u2"),
600+
("data_field_representation", "i1")]),
601+
np.void((129, 6035), dtype=[("hdr_id", "u1"), ("record_length", ">u2")]),
602+
np.zeros((464, ), dtype=[("line_number_in_grid", ">i4"),
603+
("line_mean_acquisition", [("days", ">u2"), ("milliseconds", ">u4")]),
604+
("line_validity", "u1"), ("line_radiometric_quality", "u1"),
605+
("line_geometric_quality", "u1")]),
606+
]
607+
contents = np.empty(cols * lines * bpp // 8, dtype="u1")
608+
609+
return create_file(session_tmp_path / "segment", header + [contents])
610+
611+
612+
def test_read_real_segment(prologue_file, epilogue_file, segment_file):
613+
"""Test reading an hrit segment."""
614+
info = dict(start_time=dt.datetime(2018, 2, 28, 15, 0), service="")
615+
prologue_fh = HRITMSGPrologueFileHandler(prologue_file, info, dict())
616+
epilogue_fh = HRITMSGEpilogueFileHandler(epilogue_file, info, dict())
617+
with warnings.catch_warnings():
618+
warnings.filterwarnings("ignore", category=UserWarning, message="No orbit polynomial valid")
619+
filehandler = HRITMSGFileHandler(segment_file, info, dict(), prologue_fh, epilogue_fh)
620+
res = filehandler.get_dataset(dict(name="VIS008", calibration="counts"),
621+
dict(units="", wavelength=0.8, standard_name="counts"))
622+
res.compute()
623+
624+
625+
@pytest.fixture(scope="session")
626+
def compressed_seviri_hrit_files(session_tmp_path, prologue_file, epilogue_file, segment_file):
627+
"""Return the fsspec paths to the given seviri hrit files inside a zip file."""
628+
zip_full_path = session_tmp_path / "test_seviri_hrit.zip"
629+
with zipfile.ZipFile(zip_full_path, mode="w") as archive:
630+
for filename in (prologue_file, epilogue_file, segment_file):
631+
archive.write(filename, os.path.basename(filename))
632+
return {hrit_file: f"zip://{hrit_file}::file://{zip_full_path.as_posix()}"
633+
for hrit_file in ("prologue", "epilogue", "segment")}
634+
635+
def test_read_real_segment_zipped(compressed_seviri_hrit_files):
636+
"""Test reading a remote hrit segment passed as FSFile."""
637+
info = dict(start_time=dt.datetime(2018, 2, 28, 15, 0), service="")
638+
prologue = FSFile(fsspec.open(compressed_seviri_hrit_files["prologue"]))
639+
prologue_fh = HRITMSGPrologueFileHandler(prologue, info, dict())
640+
epilogue = FSFile(fsspec.open(compressed_seviri_hrit_files["epilogue"]))
641+
epilogue_fh = HRITMSGEpilogueFileHandler(epilogue, info, dict())
642+
segment = FSFile(fsspec.open(compressed_seviri_hrit_files["segment"]))
643+
with warnings.catch_warnings():
644+
warnings.filterwarnings("ignore", category=UserWarning, message="No orbit polynomial valid")
645+
filehandler = HRITMSGFileHandler(segment, info, dict(), prologue_fh, epilogue_fh)
646+
res = filehandler.get_dataset(dict(name="VIS008", calibration="counts"),
647+
dict(units="", wavelength=0.8, standard_name="counts"))
648+
res.compute()
649+
650+
651+
def to_upath(fsfile):
652+
"""Convert FSFile instance to UPath."""
653+
from upath import UPath
654+
fsfile_fs = fsfile.fs.to_dict()
655+
fsfile_fs.pop("cls")
656+
path = UPath(os.fspath(fsfile), **fsfile_fs)
657+
return path
658+
659+
660+
def test_read_real_segment_zipped_with_upath(compressed_seviri_hrit_files):
661+
"""Test reading a remote hrit segment passed as UPath."""
662+
info = dict(start_time=dt.datetime(2018, 2, 28, 15, 0), service="")
663+
664+
prologue = FSFile(fsspec.open(compressed_seviri_hrit_files["prologue"]))
665+
prologue = to_upath(prologue)
666+
prologue_fh = HRITMSGPrologueFileHandler(prologue, info, dict())
667+
668+
epilogue = FSFile(fsspec.open(compressed_seviri_hrit_files["epilogue"]))
669+
epilogue = to_upath(epilogue)
670+
epilogue_fh = HRITMSGEpilogueFileHandler(epilogue, info, dict())
671+
672+
segment = FSFile(fsspec.open(compressed_seviri_hrit_files["segment"]))
673+
segment = to_upath(segment)
674+
with warnings.catch_warnings():
675+
warnings.filterwarnings("ignore", category=UserWarning, message="No orbit polynomial valid")
676+
filehandler = HRITMSGFileHandler(segment, info, dict(), prologue_fh, epilogue_fh)
677+
res = filehandler.get_dataset(dict(name="VIS008", calibration="counts"),
678+
dict(units="", wavelength=0.8, standard_name="counts"))
679+
res.compute()

0 commit comments

Comments
 (0)