Skip to content

Commit 576fde3

Browse files
committed
Refactor table metadata helpers
1 parent 99f03e2 commit 576fde3

File tree

2 files changed

+153
-295
lines changed

2 files changed

+153
-295
lines changed

python/tskit/metadata.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
import tskit
4444
import tskit.exceptions as exceptions
45+
import tskit.util as util
4546

4647
__builtins__object__setattr__ = builtins.object.__setattr__
4748

@@ -1041,3 +1042,106 @@ def assert_equals(self, other: MetadataProvider):
10411042
raise AssertionError(
10421043
f"Metadata differs: self={self.metadata} " f"other={other.metadata}"
10431044
)
1045+
1046+
1047+
NOTSET = object() # Sentinel for unset default values
1048+
1049+
1050+
class TableMetadataReader:
1051+
# Mixin for table classes that expose decoded metadata
1052+
1053+
@property
1054+
def metadata_schema(self) -> MetadataSchema:
1055+
"""
1056+
The :class:`tskit.MetadataSchema` for this table.
1057+
"""
1058+
# This isn't as inefficient as it looks because we're using an LRU cache on
1059+
# the parse_metadata_schema function. Thus, we're really only incurring the
1060+
# cost of creating the unicode string from the low-level schema and looking
1061+
# up the functools cache.
1062+
return parse_metadata_schema(self.ll_table.metadata_schema)
1063+
1064+
def metadata_vector(self, key, *, dtype=None, default_value=NOTSET):
1065+
"""
1066+
Returns a numpy array of metadata values obtained by extracting ``key``
1067+
from each metadata entry, and using ``default_value`` if the key is
1068+
not present. ``key`` may be a list, in which case nested values are returned.
1069+
For instance, ``key = ["a", "x"]`` will return an array of
1070+
``row.metadata["a"]["x"]`` values, iterated over rows in this table.
1071+
1072+
:param str key: The name, or a list of names, of metadata entries.
1073+
:param str dtype: The dtype of the result (can usually be omitted).
1074+
:param object default_value: The value to be inserted if the metadata key
1075+
is not present. Note that for numeric columns, a default value of None
1076+
will result in a non-numeric array. The default behaviour is to raise
1077+
``KeyError`` on missing entries.
1078+
"""
1079+
from collections.abc import Mapping
1080+
1081+
if default_value == NOTSET:
1082+
1083+
def getter(d, k):
1084+
return d[k]
1085+
1086+
else:
1087+
1088+
def getter(d, k):
1089+
return (
1090+
d.get(k, default_value) if isinstance(d, Mapping) else default_value
1091+
)
1092+
1093+
if isinstance(key, list):
1094+
out = np.array(
1095+
[functools.reduce(getter, key, row.metadata) for row in self],
1096+
dtype=dtype,
1097+
)
1098+
else:
1099+
out = np.array(
1100+
[getter(row.metadata, key) for row in self],
1101+
dtype=dtype,
1102+
)
1103+
return out
1104+
1105+
def _make_row(self, *args):
1106+
return self.row_class(*args, metadata_decoder=self.metadata_schema.decode_row)
1107+
1108+
1109+
class TableMetadataWriter(TableMetadataReader):
1110+
# Mixin for tables writing metadata
1111+
1112+
@TableMetadataReader.metadata_schema.setter
1113+
def metadata_schema(self, schema: MetadataSchema) -> None:
1114+
if not isinstance(schema, MetadataSchema):
1115+
raise TypeError(
1116+
"Only instances of tskit.MetadataSchema can be assigned to "
1117+
f"metadata_schema, not {type(schema)}"
1118+
)
1119+
self.ll_table.metadata_schema = repr(schema)
1120+
1121+
def packset_metadata(self, metadatas):
1122+
"""
1123+
Packs the specified list of metadata values and updates the ``metadata``
1124+
and ``metadata_offset`` columns. The length of the metadatas array
1125+
must be equal to the number of rows in the table.
1126+
1127+
:param list metadatas: A list of metadata bytes values.
1128+
"""
1129+
packed, offset = util.pack_bytes(metadatas)
1130+
data = self.asdict()
1131+
data["metadata"] = packed
1132+
data["metadata_offset"] = offset
1133+
self.set_columns(**data)
1134+
1135+
def drop_metadata(self, *, keep_schema=False):
1136+
"""
1137+
Drops all metadata in this table. By default, the schema is also cleared,
1138+
except if ``keep_schema`` is True.
1139+
1140+
:param bool keep_schema: True if the current schema should be kept intact.
1141+
"""
1142+
data = self.asdict()
1143+
data["metadata"] = []
1144+
data["metadata_offset"][:] = 0
1145+
self.set_columns(**data)
1146+
if not keep_schema:
1147+
self.metadata_schema = MetadataSchema.null()

0 commit comments

Comments
 (0)