Skip to content

Commit 438f514

Browse files
MS3Record.valid_extra_headers() to validate extra headers
1 parent 772d6e7 commit 438f514

File tree

4 files changed

+498
-3
lines changed

4 files changed

+498
-3
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111
- `MS3Record.get_extra_header()` to get a specified extra header
1212
- `MS3Record.set_extra_header()` to set a specified extra header
1313
- `MS3Record.merge_extra_headers()` to apply a JSON Merge Patch to extra headers
14+
- `MS3Record.valid_extra_headers()` to validate extra headers
1415

1516
### Changed
1617
- libmseed updated to v3.2.3

pyproject.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,17 @@ dev = [
4444
"ruff>=0.1.0",
4545
"mypy>=1.0.0",
4646
"pre-commit>=3.0.0",
47+
"numpy>=1.21.0",
48+
"jsonschema>=4.0.0",
4749
]
4850

4951
# NumPy support for data arrays
5052
numpy = [
5153
"numpy>=1.21.0",
5254
]
5355

56+
jsonschema = ["jsonschema>=4.0.0"]
57+
5458
# Documentation dependencies
5559
docs = [
5660
"sphinx>=5.0.0",
@@ -90,7 +94,7 @@ zip-safe = false
9094
where = ["src"]
9195

9296
[tool.setuptools.package-data]
93-
pymseed = ["*.so", "*.pyd", "*.dll", "*.py"]
97+
pymseed = ["*.so", "*.pyd", "*.dll", "*.py", "schemas/*.json"]
9498

9599
[tool.setuptools.dynamic]
96100
version = {attr = "pymseed.__version__.__version__"}

src/pymseed/msrecord.py

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import warnings
88
from collections.abc import Iterator, Sequence
99
from contextlib import contextmanager
10+
from importlib.resources import files
1011
from typing import Any, Callable, Optional, Union
1112

1213
from .clib import cdata_to_string, clibmseed, ffi
@@ -583,7 +584,7 @@ def extra(self, value: str) -> None:
583584
... "Correction": 1.234
584585
... },
585586
... "Flags": {
586-
... "MassPositionOffscale": true
587+
... "MassPositionOffscale": true
587588
... },
588589
... },
589590
... "Operator": {
@@ -629,7 +630,7 @@ def get_extra_header(self, ptr: str) -> Union[bool, int, float, str, None]:
629630
... "Correction": 1.234
630631
... },
631632
... "Flags": {
632-
... "MassPositionOffscale": true
633+
... "MassPositionOffscale": true
633634
... }
634635
... },
635636
... "Operator": {
@@ -647,6 +648,9 @@ def get_extra_header(self, ptr: str) -> Union[bool, int, float, str, None]:
647648
>>> msr.get_extra_header("/Operator/Battery/Status")
648649
'CHARGING'
649650
651+
# Returns None when header does not exist
652+
>>> assert msr.get_extra_header("/Nonexistent/Header") is None
653+
650654
See Also:
651655
set_extra_header(): Set an extra header value
652656
"""
@@ -821,6 +825,100 @@ def merge_extra_headers(self, value: str) -> None:
821825
if status < 0:
822826
raise ValueError(f"Error merging extra header: {status}")
823827

828+
def valid_extra_headers(self, schema_id: str = "FDSN-v1.0", schema_file: str = None) -> bool:
829+
"""Check if the extra headers are valid
830+
831+
The selected schema should conform to the JSON Schema 2020-12 specification:
832+
https://json-schema.org/draft/2020-12#draft-2020-12
833+
834+
Any specified _schema_file_ will take precedence over _schema_id_.
835+
836+
The _schema_id_ is a known schema ID that can be used to select a schema from the package.
837+
As of this writing only "FDSN-v1.0" is an accepted value and uses the published
838+
schema: `ExtraHeaders-FDSN-v1.0.schema-2020-12.json`
839+
840+
Args:
841+
schema_id: ID of the known schema to use, defaults to "FDSN-v1.0"
842+
schema_file: Path to specific schema file to use, defaults to None
843+
844+
Returns:
845+
True if the extra headers are valid, False otherwise.
846+
847+
Notes:
848+
The maximum length of a string extra header is 4094 bytes, longer
849+
strings will result in an exception. This should be sufficient for
850+
most use cases. If you need to store longer strings, you can extract
851+
the header JSON and process it in Python.
852+
853+
Examples:
854+
>>> from pymseed import MS3Record
855+
>>> msr = MS3Record()
856+
>>> msr.extra = '''{
857+
... "FDSN": {
858+
... "Time": {
859+
... "Quality": 100,
860+
... "Correction": 1.234
861+
... },
862+
... "Flags": {
863+
... "MassPositionOffscale": true
864+
... }
865+
... },
866+
... "Operator": {
867+
... "Battery": {
868+
... "Status": "CHARGING"
869+
... }
870+
... }}'''
871+
>>> msr.valid_extra_headers()
872+
True
873+
874+
# INVALID headers
875+
>>> msr.extra = '''{
876+
... "FDSN": {
877+
... "Time": {
878+
... "Quality": "really good",
879+
... "Correction": false
880+
... },
881+
... "Flags": {
882+
... "MassPositionOffscale": 1.2345
883+
... },
884+
... "Invalid": {
885+
... "Header": "value not allowed in FDSN section"
886+
... }
887+
... }}'''
888+
>>> msr.valid_extra_headers()
889+
False
890+
891+
"""
892+
# No extra headers are valid
893+
if not self.extra:
894+
return True
895+
896+
try:
897+
from jsonschema import Draft202012Validator
898+
except ImportError:
899+
raise ImportError(
900+
"jsonschema is not installed. Install jsonschema or this package with [jsonschema] optional dependency"
901+
) from None
902+
903+
# Resolve schema bytes
904+
if schema_file is None:
905+
if schema_id == "FDSN-v1.0":
906+
schema_bytes = files("pymseed.schemas").joinpath(
907+
"ExtraHeaders-FDSN-v1.0.schema-2020-12.json"
908+
).read_bytes()
909+
else:
910+
raise ValueError(f"Unknown schema_id: {schema_id}")
911+
else:
912+
with open(schema_file, "rb") as fh:
913+
schema_bytes = fh.read()
914+
915+
schema = json.loads(schema_bytes)
916+
instance = json.loads(self.extra)
917+
918+
validator = Draft202012Validator(schema)
919+
920+
return validator.is_valid(instance)
921+
824922
@property
825923
def datasamples(self) -> memoryview:
826924
"""Data samples as a memoryview (zero-copy access).

0 commit comments

Comments
 (0)