EarthScope · mbriggs134 · Apr 24, 2025 · Copilot · Apr 24, 2025 · Copilot
diff --git a/src/mseedlib/msrecord.py b/src/mseedlib/msrecord.py
@@ -580,3 +580,63 @@ def pack(
         ct.c_int8,
     ],
 )
+
+
+def binary_compare_records(record1: MS3Record, record2: MS3Record) -> bool:
+    return record1.record == record2.record
+
+
+def logically_compare_records(record1: MS3Record, record2: MS3Record) -> (bool, dict):
+    """
+    Compare two MS3Record instances logically, ignoring certain fields.
+    Returns a tuple of (match, match_dict) where:
+    - match: True if all compared fields match, False otherwise
+    - match_dict: Dictionary with comparison results for each field
+
+    The following fields are compared:
+        crc
+        datalength
+        encoding
+        endtime
+        flags
+        formatversion
+        numsamples
+        pubversion
+        reclen
+        samplecnt
+        sampletype
+        samprate
+        sourceid
+        starttime
+        swapflag
+        datasamples
+
+    The following fields are NOT compared:
+        datasize
+        extra
+        extralength
+        record
+        endtime_seconds # derived from endtime
+        starttime_seconds # derived from starttime
+        samprate_raw
+    """
+
+    match_dict = {
+        "crc": record1.crc == record2.crc,
+        "datalength": record1.datalength == record2.datalength,
+        "encoding": record1.encoding == record2.encoding,
+        "endtime": record1.endtime == record2.endtime,
+        "flags": record1.flags == record2.flags,
+        "formatversion": record1.formatversion == record2.formatversion,
+        "numsamples": record1.numsamples == record2.numsamples,
+        "pubversion": record1.pubversion == record2.pubversion,
+        "reclen": record1.reclen == record2.reclen,
+        "samplecnt": record1.samplecnt == record2.samplecnt,
+        "sampletype": record1.sampletype == record2.sampletype,
+        "samprate": record1.samprate == record2.samprate,
+        "sourceid": record1.sourceid == record2.sourceid,
+        "starttime": record1.starttime == record2.starttime,
+        "swapflag": record1.swapflag == record2.swapflag,
+        "datasamples": record1.datasamples == record2.datasamples,
+    }
+    return all(match_dict.values()), match_dict
diff --git a/src/mseedlib/msrecord_buffer_compare.py b/src/mseedlib/msrecord_buffer_compare.py
@@ -0,0 +1,69 @@
+from .msrecord_buffer_reader import MS3RecordBufferReader
+from .msrecord import binary_compare_records, logically_compare_records
+
+
+def sort_mseed_content(content: bytes) -> bytes:
+    """
+    Sort miniSEED content bytes
+
+    Args:
+        content (bytes): The miniSEED content to sort.
+
+    Returns:
+        bytes: Sorted miniSEED content.
+    """
+    record_array = bytearray(content)
+    source_id_starttime_record_triplet = []
+    with MS3RecordBufferReader(record_array) as msreader:
+        for msr in msreader:
+            # Copy properties to avoid ctype issues
+            source_id_starttime_record_triplet.append(
+                (
+                    msr.sourceid,
+                    msr.starttime,
+                    msr.record,
+                )
+            )
+
+    # x[0]:source_id x[1]: starttime
+    source_id_starttime_record_triplet.sort(key=lambda x: (x[0], x[1]))
+
+    reordered_records = b"".join(r for _, _, r in source_id_starttime_record_triplet)
+
+    return reordered_records
+
+
+def compare_miniseed_content(
+    content1: bytes, content2: bytes, ignore_order: bool = True
+) -> [bool, dict]:
+    """
+    Compare two miniSEED content bytes for logical equality
+
+    Note if doing exact binary comparison, use `content1 == content2`.
+
+    Args:
+        content1 (bytes): First miniSEED content.
+        content2 (bytes): Second miniSEED content.
+
+    Returns:
+        bool: True if contents are equal, False otherwise.
+    """
+
+    if ignore_order:
+        content1 = sort_mseed_content(content1)
+        content2 = sort_mseed_content(content2)
+
+    binary_compare_list = []
+    logical_compare_list = []
+    with (
+        MS3RecordBufferReader(
+            bytearray(content1), unpack_data=True
+        ) as content1_msreader,
+        MS3RecordBufferReader(
+            bytearray(content2), unpack_data=True
+        ) as content2_msreader,
+    ):
+        for msr1, msr2 in zip(content1_msreader, content2_msreader):
+            binary_compare_list.append(binary_compare_records(msr1, msr2))
+            logical_compare_list.append(logically_compare_records(msr1, msr2))
+    return binary_compare_list, logical_compare_list
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,6 @@
+import pytest
+import math
+
+
+# A sine wave of 500 samples
+sine_500 = list(map(lambda x: int(math.sin(math.radians(x)) * 500), range(0, 500)))
diff --git a/tests/test_msrecord.py b/tests/test_msrecord.py
@@ -1,17 +1,14 @@
 import pytest
 import os
 import json
-import math
 import ctypes as ct
 from mseedlib import MS3Record, DataEncoding
+from .conftest import sine_500
 
 test_dir = os.path.abspath(os.path.dirname(__file__))
 test_pack3 = os.path.join(test_dir, "data", "packtest_sine500.mseed3")
 test_pack2 = os.path.join(test_dir, "data", "packtest_sine500.mseed2")
 
-# A sine wave of 500 samples
-sine_500 = list(map(lambda x: int(math.sin(math.radians(x)) * 500), range(0, 500)))
-
 # A global record buffer
 record_buffer = b""
 

diff --git a/tests/test_msrecord_buffer_compare.py b/tests/test_msrecord_buffer_compare.py
@@ -0,0 +1,70 @@
+import pytest
+from mseedlib import MS3Record
+from mseedlib.msrecord_buffer_compare import (
+    sort_mseed_content,
+    compare_miniseed_content,
+)
+
+from .conftest import sine_500
+
+# A global record buffer
+record_buffer = b""
+
+
+@pytest.fixture(scope="module")
+def msr_XX_TEST__B_S_X_bytes() -> tuple(MS3Record, bytes):
+    msr1 = MS3Record()
+    msr1.set_starttime_str("2023-01-02T01:02:03.123456789Z")
+    msr1.sourceid = "FDSN:XX_TEST__B_S_X"
+    msr1.pack(record_handler, datasamples=sine_500, sampletype="i")
+    msr1_bytes = bytes(record_buffer)  # Make a copy of the packed record
+    return msr1, msr1_bytes
+
+
+@pytest.fixture(scope="module")
+def msr_XX_TEST__B_S_Y_bytes() -> tuple(MS3Record, bytes):
+    msr1 = MS3Record()
+    msr1.set_starttime_str("2023-01-02T01:02:03.123456789Z")
+    msr1.sourceid = "FDSN:XX_TEST__B_S_X"
-    msr1.sourceid = "FDSN:XX_TEST__B_S_X"
+    msr1.sourceid = "FDSN:XX_TEST__B_S_Y"
-    msr1.sourceid = "FDSN:XX_TEST__B_S_X"
+    msr1.sourceid = "FDSN:XX_TEST__B_S_Y"
+    msr1.pack(record_handler, datasamples=sine_500, sampletype="i")
+    msr1_bytes = bytes(record_buffer)  # Make a copy of the packed record
+    return msr1, msr1_bytes
+
+
+def record_handler(record, handler_data):
+    """A callback function for MS3Record.set_record_handler()
+    Stores the record in a global buffer for testing
+    """
+    print("Record handler called, record length: %d" % len(record))
+    global record_buffer
+    record_buffer = bytes(record)
-    msr1.pack(record_handler, datasamples=sine_500, sampletype="i")
-    msr1_bytes = bytes(record_buffer)  # Make a copy of the packed record
-    return msr1, msr1_bytes
-
-
-def record_handler(record, handler_data):
-    """A callback function for MS3Record.set_record_handler()
-    Stores the record in a global buffer for testing
-    """
-    print("Record handler called, record length: %d" % len(record))
-    global record_buffer
-    record_buffer = bytes(record)
+    handler_data = {}
+    msr1.pack(record_handler, handler_data=handler_data, datasamples=sine_500, sampletype="i")
+    msr1_bytes = handler_data["record_buffer"]  # Make a copy of the packed record
+    return msr1, msr1_bytes
+
+
+def record_handler(record, handler_data):
+    """A callback function for MS3Record.set_record_handler()
+    Stores the record in a context-local variable for testing
+    """
+    print("Record handler called, record length: %d" % len(record))
+    handler_data["record_buffer"] = bytes(record)
-    msr1.pack(record_handler, datasamples=sine_500, sampletype="i")
-    msr1_bytes = bytes(record_buffer)  # Make a copy of the packed record
-    return msr1, msr1_bytes
-
-
-def record_handler(record, handler_data):
-    """A callback function for MS3Record.set_record_handler()
-    Stores the record in a global buffer for testing
-    """
-    print("Record handler called, record length: %d" % len(record))
-    global record_buffer
-    record_buffer = bytes(record)
+    handler_data = {}
+    msr1.pack(record_handler, handler_data=handler_data, datasamples=sine_500, sampletype="i")
+    msr1_bytes = handler_data["record_buffer"]  # Make a copy of the packed record
+    return msr1, msr1_bytes
+
+
+def record_handler(record, handler_data):
+    """A callback function for MS3Record.set_record_handler()
+    Stores the record in a context-local variable for testing
+    """
+    print("Record handler called, record length: %d" % len(record))
+    handler_data["record_buffer"] = bytes(record)
+
+
+def test_sort_mseed_content():
+    """Test sorting of miniSEED content"""
+
+    msr1 = MS3Record()
+    msr1.set_starttime_str("2023-01-02T01:02:03.123456789Z")
+    msr1.sourceid = "FDSN:XX_TEST__B_S_X"
+    msr1.pack(record_handler, datasamples=sine_500, sampletype="i")
+    msr1_bytes = bytes(record_buffer)  # Make a copy of the packed record
+
+    msr2 = MS3Record()
+    msr2.set_starttime_str("2023-01-02T01:02:03.123456789Z")
+    msr2.sourceid = "FDSN:XX_TEST__B_S_Y"
+    msr2.pack(record_handler, datasamples=sine_500, sampletype="i")
+    msr2_bytes = bytes(record_buffer)  # Make a copy of the packed record
+
+    expected_sorted_bytes = msr1_bytes + msr2_bytes
+    unsorted_bytes = msr2_bytes + msr1_bytes
+
+    sorted_bytes = sort_mseed_content(unsorted_bytes)
+
+    assert (
+        sorted_bytes == expected_sorted_bytes
+    ), "Sorted bytes do not match expected order"
+
+
+class TestCompareMiniseed:
+    def test_sorting():
+        pass