Skip to content

Commit 190bfe9

Browse files
author
Alexis Simon
committed
python/tskit/util.py: Add a file name to FileFormatError
Closes #2467 Uses the .add_note method of exception, introduced in Python 3.11. Also hardcodes the file name in the raised from exceptions for HDF5 or zip. Until 3.10 is EOF, needs the sys module.
1 parent b7fd993 commit 190bfe9

File tree

4 files changed

+58
-18
lines changed

4 files changed

+58
-18
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,6 @@ python/benchmark/*.html
77
.env
88
.vscode
99
env
10+
# pixi environments
11+
.pixi/*
12+
!.pixi/config.toml

python/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,6 @@ build
55
.*.swp
66
.*.swo
77
*/.ipynb_checkpoints
8+
# pixi environments
9+
.pixi/*
10+
!.pixi/config.toml

python/tests/test_file_format.py

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -261,12 +261,12 @@ def test_format_too_old_raised_for_hdf5(self):
261261

262262
with pytest.raises(
263263
exceptions.FileFormatError,
264-
match="appears to be in HDF5 format",
264+
match=f"{filename}.*appears to be in HDF5 format",
265265
):
266266
tskit.load(path)
267267
with pytest.raises(
268268
exceptions.FileFormatError,
269-
match="appears to be in HDF5 format",
269+
match=f"{filename}.*appears to be in HDF5 format",
270270
):
271271
tskit.TableCollection.load(path)
272272

@@ -284,9 +284,15 @@ class TestErrors(TestFileFormat):
284284
def test_tszip_file(self):
285285
ts = msprime.simulate(5)
286286
tszip.compress(ts, self.temp_file)
287-
with pytest.raises(tskit.FileFormatError, match="appears to be in zip format"):
287+
with pytest.raises(
288+
tskit.FileFormatError,
289+
match=f"{self.temp_file}.*appears to be in zip format",
290+
):
288291
tskit.load(self.temp_file)
289-
with pytest.raises(tskit.FileFormatError, match="appears to be in zip format"):
292+
with pytest.raises(
293+
tskit.FileFormatError,
294+
match=f"{self.temp_file}.*appears to be in zip format",
295+
):
290296
tskit.TableCollection.load(self.temp_file)
291297

292298

@@ -781,7 +787,10 @@ def verify_equal_length_columns(self, ts, table):
781787
data = dict(all_data)
782788
data[col] = bad_val
783789
kastore.dump(data, self.temp_file)
784-
with pytest.raises(exceptions.FileFormatError):
790+
with pytest.raises(
791+
exceptions.FileFormatError,
792+
match=f"While trying to load {self.temp_file}",
793+
):
785794
tskit.load(self.temp_file)
786795

787796
def test_equal_length_columns(self):
@@ -805,7 +814,10 @@ def verify_offset_columns(self, ts):
805814
for bad_col_length in [[], range(2 * num_rows)]:
806815
data[offset_col] = bad_col_length
807816
kastore.dump(data, self.temp_file)
808-
with pytest.raises(exceptions.FileFormatError):
817+
with pytest.raises(
818+
exceptions.FileFormatError,
819+
match=f"While trying to load {self.temp_file}",
820+
):
809821
tskit.load(self.temp_file)
810822

811823
# Check for a bad offset
@@ -853,13 +865,17 @@ def test_index_columns(self):
853865
data = dict(all_data)
854866
data[edge_insertion_order] = data[edge_insertion_order][:1]
855867
kastore.dump(data, self.temp_file)
856-
with pytest.raises(exceptions.FileFormatError):
868+
with pytest.raises(
869+
exceptions.FileFormatError, match=f"While trying to load {self.temp_file}"
870+
):
857871
tskit.load(self.temp_file)
858872

859873
data = dict(all_data)
860874
data[edge_removal_order] = data[edge_removal_order][:1]
861875
kastore.dump(data, self.temp_file)
862-
with pytest.raises(exceptions.FileFormatError):
876+
with pytest.raises(
877+
exceptions.FileFormatError, match=f"While trying to load {self.temp_file}"
878+
):
863879
tskit.load(self.temp_file)
864880

865881
def test_load_empty_kastore(self):
@@ -897,7 +913,10 @@ def test_format_name_error(self):
897913
data = dict(store)
898914
data["format/name"] = np.array(bytearray(bad_name.encode()), dtype=np.int8)
899915
kastore.dump(data, self.temp_file)
900-
with pytest.raises(exceptions.FileFormatError):
916+
with pytest.raises(
917+
exceptions.FileFormatError,
918+
match=f"While trying to load {self.temp_file}",
919+
):
901920
tskit.load(self.temp_file)
902921

903922
def test_load_bad_formats(self):
@@ -908,12 +927,16 @@ def test_load_bad_formats(self):
908927
# Now some ascii text
909928
with open(self.temp_file, "wb") as f:
910929
f.write(b"Some ASCII text")
911-
with pytest.raises(exceptions.FileFormatError):
930+
with pytest.raises(
931+
exceptions.FileFormatError, match=f"While trying to load {self.temp_file}"
932+
):
912933
tskit.load(self.temp_file)
913934
# Now write 8k of random bytes
914935
with open(self.temp_file, "wb") as f:
915936
f.write(os.urandom(8192))
916-
with pytest.raises(exceptions.FileFormatError):
937+
with pytest.raises(
938+
exceptions.FileFormatError, match=f"While trying to load {self.temp_file}"
939+
):
917940
tskit.load(self.temp_file)
918941

919942
def test_load_bad_formats_fileobj(self):
@@ -925,7 +948,9 @@ def load():
925948
load()
926949
with open(self.temp_file, "wb") as f:
927950
f.write(b"Some ASCII text")
928-
with pytest.raises(exceptions.FileFormatError):
951+
with pytest.raises(
952+
exceptions.FileFormatError, match=f"While trying to load {self.temp_file}"
953+
):
929954
load()
930955

931956

@@ -978,7 +1003,9 @@ def test_ts_twofile_stream_fails(self, tmp_path, ts_fixture):
9781003
ts_fixture.dump(f)
9791004
with open(save_path, "rb") as f:
9801005
tskit.load(f, skip_tables=True)
981-
with pytest.raises(exceptions.FileFormatError):
1006+
with pytest.raises(
1007+
exceptions.FileFormatError, match=f"While trying to load {save_path}"
1008+
):
9821009
tskit.load(f)
9831010

9841011
def test_table_collection_load_path(self, tmp_path, ts_fixture):
@@ -1033,7 +1060,9 @@ def test_ts_twofile_stream_fails(self, tmp_path, ts_fixture):
10331060
ts_fixture.dump(f)
10341061
with open(save_path, "rb") as f:
10351062
tskit.load(f, skip_reference_sequence=True)
1036-
with pytest.raises(exceptions.FileFormatError):
1063+
with pytest.raises(
1064+
exceptions.FileFormatError, match=f"While trying to load {save_path}"
1065+
):
10371066
tskit.load(f)
10381067

10391068
def test_table_collection_load_path(self, tmp_path, ts_fixture):

python/tskit/util.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import json
3131
import numbers
3232
import os
33+
import sys
3334
import textwrap
3435
from typing import Union
3536

@@ -878,6 +879,10 @@ def raise_known_file_format_errors(open_file, existing_exception):
878879
Sniffs the file for pk-zip or hdf header bytes, then raises an exception
879880
if these are detected, if not raises the existing exception.
880881
"""
882+
if sys.version_info[0] >= 3 and sys.version_info[1] > 10:
883+
# add_note has been added in python 3.11
884+
# This condition can be removed once 3.10 is end-of-life
885+
existing_exception.add_note(f"While trying to load {open_file.name}")
881886
# Check for HDF5 header bytes
882887
try:
883888
open_file.seek(0)
@@ -887,16 +892,16 @@ def raise_known_file_format_errors(open_file, existing_exception):
887892
raise existing_exception
888893
if header == b"\x89HDF":
889894
raise tskit.FileFormatError(
890-
"The specified file appears to be in HDF5 format. This file "
895+
f"The file {open_file.name} appears to be in HDF5 format. This file "
891896
"may have been generated by msprime < 0.6.0 (June 2018) which "
892897
"can no longer be read directly. Please convert to the new "
893898
"kastore format using the ``tskit upgrade`` command from tskit version<0.6.2"
894899
) from existing_exception
895900
if header[:2] == b"\x50\x4b":
896901
raise tskit.FileFormatError(
897-
"The specified file appears to be in zip format, so may be a compressed "
898-
"tree sequence. Try using the tszip module to decompress this file before "
899-
"loading. `pip install tszip; tsunzip <filename>` or use "
902+
f"The file {open_file.name} appears to be in zip format, so may be a "
903+
"compressed tree sequence. Try using the tszip module to decompress this "
904+
"file before loading. `pip install tszip; tsunzip <filename>` or use "
900905
"`tszip.decompress` in Python code."
901906
) from existing_exception
902907
raise existing_exception

0 commit comments

Comments
 (0)