Skip to content

Commit ec80c9c

Browse files
authored
Test behavior against BytesIO reference (#24)
1 parent 8adbe93 commit ec80c9c

File tree

2 files changed

+294
-8
lines changed

2 files changed

+294
-8
lines changed

src/obspec_utils/obspec.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -236,15 +236,19 @@ def read(self, size: int = -1, /) -> bytes:
236236
Parameters
237237
----------
238238
size
239-
Number of bytes to read. If -1, read the entire file.
239+
Number of bytes to read. If -1, read from current position to end.
240240
241241
Returns
242242
-------
243243
bytes
244244
The data read from the file.
245245
"""
246246
if size == -1:
247-
return self.readall()
247+
# Read from current position to end
248+
file_size = self._get_size()
249+
size = file_size - self._position
250+
if size <= 0:
251+
return b""
248252

249253
# Check if we can satisfy from buffer
250254
buffer_end = self._buffer_start + len(self._buffer)
@@ -258,8 +262,14 @@ def read(self, size: int = -1, /) -> bytes:
258262
self._position += len(data)
259263
return data
260264

261-
# Need to fetch from store
262-
fetch_size = max(size, self._buffer_size)
265+
# Check if we're at or past EOF
266+
file_size = self._get_size()
267+
if self._position >= file_size:
268+
return b""
269+
270+
# Need to fetch from store - clamp to remaining bytes
271+
remaining = file_size - self._position
272+
fetch_size = min(max(size, self._buffer_size), remaining)
263273
data = bytes(
264274
self._store.get_range(self._path, start=self._position, length=fetch_size)
265275
)
@@ -601,18 +611,21 @@ def read(self, size: int = -1, /) -> bytes:
601611
Parameters
602612
----------
603613
size
604-
Number of bytes to read. If -1, read the entire file.
614+
Number of bytes to read. If -1, read from current position to end.
605615
606616
Returns
607617
-------
608618
bytes
609619
The data read from the file.
610620
"""
611-
if size == -1:
612-
return self.readall()
613-
614621
file_size = self._get_size()
615622

623+
if size == -1:
624+
# Read from current position to end
625+
size = file_size - self._position
626+
if size <= 0:
627+
return b""
628+
616629
# Clamp to remaining bytes
617630
remaining = file_size - self._position
618631
if size > remaining:

tests/test_registry.py

Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from io import BytesIO
12
import pytest
23
from obstore.store import MemoryStore
34

@@ -256,6 +257,27 @@ def test_reader_read_past_end(ReaderClass):
256257
assert data == b"short"
257258

258259

260+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
261+
def test_reader_read_minus_one(ReaderClass):
262+
"""Test read(-1) reads entire file for all readers."""
263+
memstore = MemoryStore()
264+
memstore.put("test.txt", b"hello world")
265+
266+
reader = ReaderClass(memstore, "test.txt")
267+
assert reader.read(-1) == b"hello world"
268+
269+
270+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
271+
def test_reader_read_minus_one_from_middle(ReaderClass):
272+
"""Test read(-1) reads from current position to end."""
273+
memstore = MemoryStore()
274+
memstore.put("test.txt", b"hello world")
275+
276+
reader = ReaderClass(memstore, "test.txt")
277+
reader.seek(6)
278+
assert reader.read(-1) == b"world"
279+
280+
259281
def test_buffered_reader_buffering():
260282
"""Test that BufferedStoreReader buffering works correctly."""
261283
memstore = MemoryStore()
@@ -1039,3 +1061,254 @@ def test_iter_stores_multiple():
10391061
assert store1 in stores
10401062
assert store2 in stores
10411063
assert store3 in stores
1064+
1065+
1066+
# --- BytesIO Consistency Tests ---
1067+
# These tests verify that readers behave consistently with Python's BytesIO
1068+
1069+
1070+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1071+
def test_reader_read_matches_bytesio(ReaderClass):
1072+
"""Reader read(n) matches BytesIO behavior."""
1073+
data = b"hello world test data"
1074+
1075+
ref = BytesIO(data)
1076+
memstore = MemoryStore()
1077+
memstore.put("test.txt", data)
1078+
reader = ReaderClass(memstore, "test.txt")
1079+
1080+
assert reader.read(5) == ref.read(5)
1081+
assert reader.tell() == ref.tell()
1082+
1083+
1084+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1085+
def test_reader_read_zero_matches_bytesio(ReaderClass):
1086+
"""Reader read(0) returns empty bytes like BytesIO."""
1087+
data = b"hello world"
1088+
1089+
ref = BytesIO(data)
1090+
memstore = MemoryStore()
1091+
memstore.put("test.txt", data)
1092+
reader = ReaderClass(memstore, "test.txt")
1093+
1094+
assert reader.read(0) == ref.read(0)
1095+
assert reader.read(0) == b""
1096+
assert reader.tell() == ref.tell()
1097+
1098+
1099+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1100+
def test_reader_read_all_matches_bytesio(ReaderClass):
1101+
"""Reader read(-1) matches BytesIO.read(-1)."""
1102+
data = b"hello world test data"
1103+
1104+
ref = BytesIO(data)
1105+
memstore = MemoryStore()
1106+
memstore.put("test.txt", data)
1107+
reader = ReaderClass(memstore, "test.txt")
1108+
1109+
assert reader.read(-1) == ref.read(-1)
1110+
assert reader.tell() == ref.tell()
1111+
1112+
1113+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1114+
def test_reader_read_no_arg_matches_bytesio(ReaderClass):
1115+
"""Reader read() with no argument matches BytesIO."""
1116+
data = b"hello world test data"
1117+
1118+
ref = BytesIO(data)
1119+
memstore = MemoryStore()
1120+
memstore.put("test.txt", data)
1121+
reader = ReaderClass(memstore, "test.txt")
1122+
1123+
assert reader.read() == ref.read()
1124+
assert reader.tell() == ref.tell()
1125+
1126+
1127+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1128+
def test_reader_sequential_reads_match_bytesio(ReaderClass):
1129+
"""Multiple consecutive reads match BytesIO behavior."""
1130+
data = b"0123456789ABCDEF"
1131+
1132+
ref = BytesIO(data)
1133+
memstore = MemoryStore()
1134+
memstore.put("test.txt", data)
1135+
reader = ReaderClass(memstore, "test.txt")
1136+
1137+
for _ in range(4):
1138+
assert reader.read(4) == ref.read(4)
1139+
assert reader.tell() == ref.tell()
1140+
1141+
1142+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1143+
def test_reader_seek_set_matches_bytesio(ReaderClass):
1144+
"""Reader seek(n, SEEK_SET) matches BytesIO."""
1145+
data = b"hello world test data"
1146+
1147+
ref = BytesIO(data)
1148+
memstore = MemoryStore()
1149+
memstore.put("test.txt", data)
1150+
reader = ReaderClass(memstore, "test.txt")
1151+
1152+
assert reader.seek(5) == ref.seek(5)
1153+
assert reader.tell() == ref.tell()
1154+
assert reader.read(5) == ref.read(5)
1155+
1156+
1157+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1158+
def test_reader_seek_cur_matches_bytesio(ReaderClass):
1159+
"""Reader seek(n, SEEK_CUR) matches BytesIO."""
1160+
data = b"hello world test data"
1161+
1162+
ref = BytesIO(data)
1163+
memstore = MemoryStore()
1164+
memstore.put("test.txt", data)
1165+
reader = ReaderClass(memstore, "test.txt")
1166+
1167+
# Move forward first
1168+
reader.read(5)
1169+
ref.read(5)
1170+
1171+
# Then seek relative
1172+
assert reader.seek(3, 1) == ref.seek(3, 1)
1173+
assert reader.tell() == ref.tell()
1174+
assert reader.read(5) == ref.read(5)
1175+
1176+
1177+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1178+
def test_reader_seek_end_matches_bytesio(ReaderClass):
1179+
"""Reader seek(n, SEEK_END) matches BytesIO."""
1180+
data = b"hello world test data"
1181+
1182+
ref = BytesIO(data)
1183+
memstore = MemoryStore()
1184+
memstore.put("test.txt", data)
1185+
reader = ReaderClass(memstore, "test.txt")
1186+
1187+
assert reader.seek(-5, 2) == ref.seek(-5, 2)
1188+
assert reader.tell() == ref.tell()
1189+
assert reader.read() == ref.read()
1190+
1191+
1192+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1193+
def test_reader_seek_returns_position_matches_bytesio(ReaderClass):
1194+
"""Reader seek() return value matches BytesIO."""
1195+
data = b"hello world test data"
1196+
1197+
ref = BytesIO(data)
1198+
memstore = MemoryStore()
1199+
memstore.put("test.txt", data)
1200+
reader = ReaderClass(memstore, "test.txt")
1201+
1202+
assert reader.seek(10) == ref.seek(10)
1203+
assert reader.seek(5, 1) == ref.seek(5, 1)
1204+
assert reader.seek(-3, 2) == ref.seek(-3, 2)
1205+
1206+
1207+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1208+
def test_reader_tell_matches_bytesio(ReaderClass):
1209+
"""Reader tell() matches BytesIO after various operations."""
1210+
data = b"hello world test data"
1211+
1212+
ref = BytesIO(data)
1213+
memstore = MemoryStore()
1214+
memstore.put("test.txt", data)
1215+
reader = ReaderClass(memstore, "test.txt")
1216+
1217+
assert reader.tell() == ref.tell()
1218+
reader.read(5)
1219+
ref.read(5)
1220+
assert reader.tell() == ref.tell()
1221+
reader.seek(10)
1222+
ref.seek(10)
1223+
assert reader.tell() == ref.tell()
1224+
1225+
1226+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1227+
def test_reader_read_past_eof_matches_bytesio(ReaderClass):
1228+
"""Reading past EOF matches BytesIO behavior."""
1229+
data = b"short"
1230+
1231+
ref = BytesIO(data)
1232+
memstore = MemoryStore()
1233+
memstore.put("test.txt", data)
1234+
reader = ReaderClass(memstore, "test.txt")
1235+
1236+
assert reader.read(100) == ref.read(100)
1237+
assert reader.tell() == ref.tell()
1238+
# Reading again at EOF should return empty
1239+
assert reader.read(10) == ref.read(10)
1240+
1241+
1242+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1243+
def test_reader_seek_negative_cur_matches_bytesio(ReaderClass):
1244+
"""Reader seek(-n, SEEK_CUR) matches BytesIO."""
1245+
data = b"hello world test data"
1246+
1247+
ref = BytesIO(data)
1248+
memstore = MemoryStore()
1249+
memstore.put("test.txt", data)
1250+
reader = ReaderClass(memstore, "test.txt")
1251+
1252+
# Move forward first
1253+
reader.read(10)
1254+
ref.read(10)
1255+
1256+
# Then seek backward
1257+
assert reader.seek(-5, 1) == ref.seek(-5, 1)
1258+
assert reader.tell() == ref.tell()
1259+
assert reader.read(5) == ref.read(5)
1260+
1261+
1262+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1263+
def test_reader_empty_file_matches_bytesio(ReaderClass):
1264+
"""Empty file behavior matches BytesIO."""
1265+
data = b""
1266+
1267+
ref = BytesIO(data)
1268+
memstore = MemoryStore()
1269+
memstore.put("test.txt", data)
1270+
reader = ReaderClass(memstore, "test.txt")
1271+
1272+
assert reader.read() == ref.read()
1273+
assert reader.tell() == ref.tell()
1274+
assert reader.read(10) == ref.read(10)
1275+
1276+
1277+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1278+
def test_reader_seek_read_sequence_matches_bytesio(ReaderClass):
1279+
"""Interleaved seek/read operations match BytesIO."""
1280+
data = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
1281+
1282+
ref = BytesIO(data)
1283+
memstore = MemoryStore()
1284+
memstore.put("test.txt", data)
1285+
reader = ReaderClass(memstore, "test.txt")
1286+
1287+
# Complex sequence of operations
1288+
assert reader.read(10) == ref.read(10)
1289+
assert reader.seek(5) == ref.seek(5)
1290+
assert reader.read(5) == ref.read(5)
1291+
assert reader.seek(-3, 1) == ref.seek(-3, 1)
1292+
assert reader.read(10) == ref.read(10)
1293+
assert reader.seek(-5, 2) == ref.seek(-5, 2)
1294+
assert reader.read() == ref.read()
1295+
assert reader.tell() == ref.tell()
1296+
1297+
1298+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
1299+
def test_reader_seek_invalid_whence_raises(ReaderClass):
1300+
"""Reader raises ValueError for invalid whence like BytesIO."""
1301+
data = b"hello world"
1302+
1303+
ref = BytesIO(data)
1304+
memstore = MemoryStore()
1305+
memstore.put("test.txt", data)
1306+
reader = ReaderClass(memstore, "test.txt")
1307+
1308+
# Verify BytesIO raises ValueError for invalid whence
1309+
with pytest.raises(ValueError):
1310+
ref.seek(0, 3)
1311+
1312+
# Reader should match
1313+
with pytest.raises(ValueError):
1314+
reader.seek(0, 3)

0 commit comments

Comments
 (0)