|
42 | 42 |
|
43 | 43 | import tskit
|
44 | 44 | import tskit.exceptions as exceptions
|
| 45 | +import tskit.util as util |
45 | 46 |
|
46 | 47 | __builtins__object__setattr__ = builtins.object.__setattr__
|
47 | 48 |
|
@@ -1041,3 +1042,106 @@ def assert_equals(self, other: MetadataProvider):
|
1041 | 1042 | raise AssertionError(
|
1042 | 1043 | f"Metadata differs: self={self.metadata} " f"other={other.metadata}"
|
1043 | 1044 | )
|
| 1045 | + |
| 1046 | + |
| 1047 | +NOTSET = object() # Sentinel for unset default values |
| 1048 | + |
| 1049 | + |
| 1050 | +class TableMetadataReader: |
| 1051 | + # Mixin for table classes that expose decoded metadata |
| 1052 | + |
| 1053 | + @property |
| 1054 | + def metadata_schema(self) -> MetadataSchema: |
| 1055 | + """ |
| 1056 | + The :class:`tskit.MetadataSchema` for this table. |
| 1057 | + """ |
| 1058 | + # This isn't as inefficient as it looks because we're using an LRU cache on |
| 1059 | + # the parse_metadata_schema function. Thus, we're really only incurring the |
| 1060 | + # cost of creating the unicode string from the low-level schema and looking |
| 1061 | + # up the functools cache. |
| 1062 | + return parse_metadata_schema(self.ll_table.metadata_schema) |
| 1063 | + |
| 1064 | + def metadata_vector(self, key, *, dtype=None, default_value=NOTSET): |
| 1065 | + """ |
| 1066 | + Returns a numpy array of metadata values obtained by extracting ``key`` |
| 1067 | + from each metadata entry, and using ``default_value`` if the key is |
| 1068 | + not present. ``key`` may be a list, in which case nested values are returned. |
| 1069 | + For instance, ``key = ["a", "x"]`` will return an array of |
| 1070 | + ``row.metadata["a"]["x"]`` values, iterated over rows in this table. |
| 1071 | +
|
| 1072 | + :param str key: The name, or a list of names, of metadata entries. |
| 1073 | + :param str dtype: The dtype of the result (can usually be omitted). |
| 1074 | + :param object default_value: The value to be inserted if the metadata key |
| 1075 | + is not present. Note that for numeric columns, a default value of None |
| 1076 | + will result in a non-numeric array. The default behaviour is to raise |
| 1077 | + ``KeyError`` on missing entries. |
| 1078 | + """ |
| 1079 | + from collections.abc import Mapping |
| 1080 | + |
| 1081 | + if default_value == NOTSET: |
| 1082 | + |
| 1083 | + def getter(d, k): |
| 1084 | + return d[k] |
| 1085 | + |
| 1086 | + else: |
| 1087 | + |
| 1088 | + def getter(d, k): |
| 1089 | + return ( |
| 1090 | + d.get(k, default_value) if isinstance(d, Mapping) else default_value |
| 1091 | + ) |
| 1092 | + |
| 1093 | + if isinstance(key, list): |
| 1094 | + out = np.array( |
| 1095 | + [functools.reduce(getter, key, row.metadata) for row in self], |
| 1096 | + dtype=dtype, |
| 1097 | + ) |
| 1098 | + else: |
| 1099 | + out = np.array( |
| 1100 | + [getter(row.metadata, key) for row in self], |
| 1101 | + dtype=dtype, |
| 1102 | + ) |
| 1103 | + return out |
| 1104 | + |
| 1105 | + def _make_row(self, *args): |
| 1106 | + return self.row_class(*args, metadata_decoder=self.metadata_schema.decode_row) |
| 1107 | + |
| 1108 | + |
| 1109 | +class TableMetadataWriter(TableMetadataReader): |
| 1110 | + # Mixin for tables writing metadata |
| 1111 | + |
| 1112 | + @TableMetadataReader.metadata_schema.setter |
| 1113 | + def metadata_schema(self, schema: MetadataSchema) -> None: |
| 1114 | + if not isinstance(schema, MetadataSchema): |
| 1115 | + raise TypeError( |
| 1116 | + "Only instances of tskit.MetadataSchema can be assigned to " |
| 1117 | + f"metadata_schema, not {type(schema)}" |
| 1118 | + ) |
| 1119 | + self.ll_table.metadata_schema = repr(schema) |
| 1120 | + |
| 1121 | + def packset_metadata(self, metadatas): |
| 1122 | + """ |
| 1123 | + Packs the specified list of metadata values and updates the ``metadata`` |
| 1124 | + and ``metadata_offset`` columns. The length of the metadatas array |
| 1125 | + must be equal to the number of rows in the table. |
| 1126 | +
|
| 1127 | + :param list metadatas: A list of metadata bytes values. |
| 1128 | + """ |
| 1129 | + packed, offset = util.pack_bytes(metadatas) |
| 1130 | + data = self.asdict() |
| 1131 | + data["metadata"] = packed |
| 1132 | + data["metadata_offset"] = offset |
| 1133 | + self.set_columns(**data) |
| 1134 | + |
| 1135 | + def drop_metadata(self, *, keep_schema=False): |
| 1136 | + """ |
| 1137 | + Drops all metadata in this table. By default, the schema is also cleared, |
| 1138 | + except if ``keep_schema`` is True. |
| 1139 | +
|
| 1140 | + :param bool keep_schema: True if the current schema should be kept intact. |
| 1141 | + """ |
| 1142 | + data = self.asdict() |
| 1143 | + data["metadata"] = [] |
| 1144 | + data["metadata_offset"][:] = 0 |
| 1145 | + self.set_columns(**data) |
| 1146 | + if not keep_schema: |
| 1147 | + self.metadata_schema = MetadataSchema.null() |
0 commit comments