Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions src/mseedlib/msrecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,3 +580,63 @@ def pack(
ct.c_int8,
],
)


def binary_compare_records(record1: MS3Record, record2: MS3Record) -> bool:
return record1.record == record2.record


def logically_compare_records(record1: MS3Record, record2: MS3Record) -> (bool, dict):
"""
Compare two MS3Record instances logically, ignoring certain fields.
Returns a tuple of (match, match_dict) where:
- match: True if all compared fields match, False otherwise
- match_dict: Dictionary with comparison results for each field

The following fields are compared:
crc
datalength
encoding
endtime
flags
formatversion
numsamples
pubversion
reclen
samplecnt
sampletype
samprate
sourceid
starttime
swapflag
datasamples

The following fields are NOT compared:
datasize
extra
extralength
record
endtime_seconds # derived from endtime
starttime_seconds # derived from starttime
samprate_raw
"""

match_dict = {
"crc": record1.crc == record2.crc,
"datalength": record1.datalength == record2.datalength,
"encoding": record1.encoding == record2.encoding,
"endtime": record1.endtime == record2.endtime,
"flags": record1.flags == record2.flags,
"formatversion": record1.formatversion == record2.formatversion,
"numsamples": record1.numsamples == record2.numsamples,
"pubversion": record1.pubversion == record2.pubversion,
"reclen": record1.reclen == record2.reclen,
"samplecnt": record1.samplecnt == record2.samplecnt,
"sampletype": record1.sampletype == record2.sampletype,
"samprate": record1.samprate == record2.samprate,
"sourceid": record1.sourceid == record2.sourceid,
"starttime": record1.starttime == record2.starttime,
"swapflag": record1.swapflag == record2.swapflag,
"datasamples": record1.datasamples == record2.datasamples,
}
return all(match_dict.values()), match_dict
69 changes: 69 additions & 0 deletions src/mseedlib/msrecord_buffer_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from .msrecord_buffer_reader import MS3RecordBufferReader
from .msrecord import binary_compare_records, logically_compare_records


def sort_mseed_content(content: bytes) -> bytes:
"""
Sort miniSEED content bytes

Args:
content (bytes): The miniSEED content to sort.

Returns:
bytes: Sorted miniSEED content.
"""
record_array = bytearray(content)
source_id_starttime_record_triplet = []
with MS3RecordBufferReader(record_array) as msreader:
for msr in msreader:
# Copy properties to avoid ctype issues
source_id_starttime_record_triplet.append(
(
msr.sourceid,
msr.starttime,
msr.record,
)
)

# x[0]:source_id x[1]: starttime
source_id_starttime_record_triplet.sort(key=lambda x: (x[0], x[1]))

reordered_records = b"".join(r for _, _, r in source_id_starttime_record_triplet)

return reordered_records


def compare_miniseed_content(
content1: bytes, content2: bytes, ignore_order: bool = True
) -> [bool, dict]:
"""
Compare two miniSEED content bytes for logical equality

Note if doing exact binary comparison, use `content1 == content2`.

Args:
content1 (bytes): First miniSEED content.
content2 (bytes): Second miniSEED content.

Returns:
bool: True if contents are equal, False otherwise.
"""

if ignore_order:
content1 = sort_mseed_content(content1)
content2 = sort_mseed_content(content2)

binary_compare_list = []
logical_compare_list = []
with (
MS3RecordBufferReader(
bytearray(content1), unpack_data=True
) as content1_msreader,
MS3RecordBufferReader(
bytearray(content2), unpack_data=True
) as content2_msreader,
):
for msr1, msr2 in zip(content1_msreader, content2_msreader):
binary_compare_list.append(binary_compare_records(msr1, msr2))
logical_compare_list.append(logically_compare_records(msr1, msr2))
return binary_compare_list, logical_compare_list
6 changes: 6 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import pytest
import math


# A sine wave of 500 samples
sine_500 = list(map(lambda x: int(math.sin(math.radians(x)) * 500), range(0, 500)))
5 changes: 1 addition & 4 deletions tests/test_msrecord.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
import pytest
import os
import json
import math
import ctypes as ct
from mseedlib import MS3Record, DataEncoding
from .conftest import sine_500

test_dir = os.path.abspath(os.path.dirname(__file__))
test_pack3 = os.path.join(test_dir, "data", "packtest_sine500.mseed3")
test_pack2 = os.path.join(test_dir, "data", "packtest_sine500.mseed2")

# A sine wave of 500 samples
sine_500 = list(map(lambda x: int(math.sin(math.radians(x)) * 500), range(0, 500)))

# A global record buffer
record_buffer = b""

Expand Down
70 changes: 70 additions & 0 deletions tests/test_msrecord_buffer_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import pytest
from mseedlib import MS3Record
from mseedlib.msrecord_buffer_compare import (
sort_mseed_content,
compare_miniseed_content,
)

from .conftest import sine_500

# A global record buffer
record_buffer = b""


@pytest.fixture(scope="module")
def msr_XX_TEST__B_S_X_bytes() -> tuple(MS3Record, bytes):
msr1 = MS3Record()
msr1.set_starttime_str("2023-01-02T01:02:03.123456789Z")
msr1.sourceid = "FDSN:XX_TEST__B_S_X"
msr1.pack(record_handler, datasamples=sine_500, sampletype="i")
msr1_bytes = bytes(record_buffer) # Make a copy of the packed record
return msr1, msr1_bytes


@pytest.fixture(scope="module")
def msr_XX_TEST__B_S_Y_bytes() -> tuple(MS3Record, bytes):
msr1 = MS3Record()
msr1.set_starttime_str("2023-01-02T01:02:03.123456789Z")
msr1.sourceid = "FDSN:XX_TEST__B_S_X"
Copy link

Copilot AI Apr 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fixture msr_XX_TEST__B_S_Y_bytes, the sourceid is set to "FDSN:XX_TEST__B_S_X" instead of a value reflecting the intended unique identity (e.g. "FDSN:XX_TEST__B_S_Y").

Suggested change
msr1.sourceid = "FDSN:XX_TEST__B_S_X"
msr1.sourceid = "FDSN:XX_TEST__B_S_Y"

Copilot uses AI. Check for mistakes.
msr1.pack(record_handler, datasamples=sine_500, sampletype="i")
msr1_bytes = bytes(record_buffer) # Make a copy of the packed record
return msr1, msr1_bytes


def record_handler(record, handler_data):
"""A callback function for MS3Record.set_record_handler()
Stores the record in a global buffer for testing
"""
print("Record handler called, record length: %d" % len(record))
global record_buffer
record_buffer = bytes(record)
Comment on lines +29 to +40
Copy link

Copilot AI Apr 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The record_handler modifies a global variable (record_buffer) for test state, which may lead to unexpected interference in concurrent test runs. Consider using a fixture-scoped or context-local variable to store the record bytes.

Suggested change
msr1.pack(record_handler, datasamples=sine_500, sampletype="i")
msr1_bytes = bytes(record_buffer) # Make a copy of the packed record
return msr1, msr1_bytes
def record_handler(record, handler_data):
"""A callback function for MS3Record.set_record_handler()
Stores the record in a global buffer for testing
"""
print("Record handler called, record length: %d" % len(record))
global record_buffer
record_buffer = bytes(record)
handler_data = {}
msr1.pack(record_handler, handler_data=handler_data, datasamples=sine_500, sampletype="i")
msr1_bytes = handler_data["record_buffer"] # Make a copy of the packed record
return msr1, msr1_bytes
def record_handler(record, handler_data):
"""A callback function for MS3Record.set_record_handler()
Stores the record in a context-local variable for testing
"""
print("Record handler called, record length: %d" % len(record))
handler_data["record_buffer"] = bytes(record)

Copilot uses AI. Check for mistakes.


def test_sort_mseed_content():
"""Test sorting of miniSEED content"""

msr1 = MS3Record()
msr1.set_starttime_str("2023-01-02T01:02:03.123456789Z")
msr1.sourceid = "FDSN:XX_TEST__B_S_X"
msr1.pack(record_handler, datasamples=sine_500, sampletype="i")
msr1_bytes = bytes(record_buffer) # Make a copy of the packed record

msr2 = MS3Record()
msr2.set_starttime_str("2023-01-02T01:02:03.123456789Z")
msr2.sourceid = "FDSN:XX_TEST__B_S_Y"
msr2.pack(record_handler, datasamples=sine_500, sampletype="i")
msr2_bytes = bytes(record_buffer) # Make a copy of the packed record

expected_sorted_bytes = msr1_bytes + msr2_bytes
unsorted_bytes = msr2_bytes + msr1_bytes

sorted_bytes = sort_mseed_content(unsorted_bytes)

assert (
sorted_bytes == expected_sorted_bytes
), "Sorted bytes do not match expected order"


class TestCompareMiniseed:
def test_sorting():
pass