Skip to content

Commit bfff5be

Browse files
committed
orange-hdf5: Update .metadata handling to match latest proposed
1 parent 0826364 commit bfff5be

File tree

2 files changed

+94
-1
lines changed

2 files changed

+94
-1
lines changed

src/orangecontrib/protospec/data.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import pickle
23

34
from os import path
45

@@ -15,6 +16,7 @@
1516
Table,
1617
)
1718
from Orange.data.io import FileFormat
19+
from Orange.data.io_base import PICKLE_PROTOCOL
1820

1921
from Orange.version import short_version as ORANGE_VERSION # noqa N812
2022
from orangecontrib.spectroscopy.io import HDF5MetaReader
@@ -38,7 +40,7 @@ def read_domain(sub):
3840
if f'{sub}_args' in d
3941
else ['{}'] * len(subdomain)
4042
)
41-
for attr, args in zip(subdomain, subdomain_args, strict=False):
43+
for attr, args in zip(subdomain, subdomain_args): # noqa B905
4244
yield attr[0], attr[1], json.loads(args)
4345

4446
def make_var(name, header, args):
@@ -141,6 +143,48 @@ def parse(attr):
141143
f.create_dataset(f'metas/{i}', data=col_data, dtype=col_type)
142144
cls.write_table_metadata(filename, data)
143145

146+
@classmethod
147+
def write_table_metadata(cls, filename, data):
148+
dump_dict = {}
149+
for key, value in data.attributes.items():
150+
if isinstance(value, str):
151+
dump_dict[key] = value
152+
else:
153+
try:
154+
dump_dict[key] = json.dumps(value)
155+
except TypeError:
156+
# value is not JSON serializable, fall back to pickle
157+
dump_dict[key] = pickle.dumps(value, protocol=PICKLE_PROTOCOL).hex()
158+
159+
with h5py.File(filename, 'r+') as f:
160+
metadata_group = f.require_group('metadata')
161+
str_dtype = h5py.string_dtype()
162+
for key, value in dump_dict.items():
163+
metadata_group.create_dataset(key, data=value, dtype=str_dtype)
164+
165+
@classmethod
166+
def set_table_metadata(cls, filename, data):
167+
with h5py.File(filename, 'r') as f:
168+
if 'metadata' in f:
169+
metadata_group = f['metadata']
170+
for key in metadata_group:
171+
value = metadata_group[key][()]
172+
if isinstance(value, bytes):
173+
value = value.decode('utf-8')
174+
if value.startswith('{') or value.startswith('['):
175+
try:
176+
value = json.loads(value)
177+
except json.JSONDecodeError:
178+
pass
179+
elif value.startswith(f"80{PICKLE_PROTOCOL:02x}"):
180+
try:
181+
value = pickle.loads(bytes.fromhex(value))
182+
except (pickle.UnpicklingError, ValueError):
183+
pass
184+
data.attributes[key] = value
185+
else:
186+
super().set_table_metadata(filename, data)
187+
144188

145189
class IRisF1HDF5Reader(FileFormat):
146190
"""Reader for IRsweep IRis-F1 HDF5 _processed_data files"""

src/orangecontrib/protospec/tests/test_io.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import os
12
import unittest
3+
from tempfile import NamedTemporaryFile
24

35
import numpy as np
46
from Orange.data import (
@@ -41,3 +43,50 @@ def test_roundtrip_hdf5(self):
4143
np.testing.assert_equal(data.metas[:2], self.data.metas[:2])
4244
self.assertEqual(data.metas[2, 0], "")
4345
np.testing.assert_equal(data.domain, self.data.domain)
46+
47+
48+
class Unserializable:
49+
def __init__(self, name):
50+
self.name = name
51+
52+
53+
class TestWriterMetadata(unittest.TestCase):
54+
def setUp(self):
55+
TestHDF5.setUp(self)
56+
self.data.attributes.update(
57+
{
58+
"Name": "Test dataset",
59+
"Description": "This is a test dataset.",
60+
"Author": "Unit Tester",
61+
"Year": "2024",
62+
"Reference": "None",
63+
}
64+
)
65+
66+
def test_metadata_hdf5(self):
67+
data = self.data.copy()
68+
data.attributes["CustomAttr"] = {"key1": "value1", "key2": 2}
69+
with NamedTemporaryFile(suffix=".hdf5", delete=False) as f:
70+
fname = f.name
71+
try:
72+
HDF5Reader.write(fname, data)
73+
table = HDF5Reader(fname).read()
74+
self.assertEqual(table.attributes, data.attributes)
75+
finally:
76+
os.remove(fname)
77+
78+
def test_metadata_hdf5_pickle(self):
79+
data = self.data.copy()
80+
data.attributes["Unserializable"] = Unserializable(name="test")
81+
with NamedTemporaryFile(suffix=".hdf5", delete=False) as f:
82+
fname = f.name
83+
try:
84+
HDF5Reader.write(fname, data)
85+
table = HDF5Reader(fname).read()
86+
for key, value in table.attributes.items():
87+
if isinstance(value, Unserializable):
88+
self.assertIsInstance(data.attributes[key], Unserializable)
89+
else:
90+
self.assertEqual(value, data.attributes[key])
91+
finally:
92+
os.remove(fname)

0 commit comments

Comments
 (0)