diff --git a/python/_tskitmodule.c b/python/_tskitmodule.c index 829963cbe4..acbc9ce6f0 100644 --- a/python/_tskitmodule.c +++ b/python/_tskitmodule.c @@ -11026,53 +11026,7 @@ TreeSequence_decode_ragged_string_column( } #endif -static PyObject * -TreeSequence_get_individuals_flags(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_individual_table_t individuals; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - individuals = self->tree_sequence->tables->individuals; - ret = TreeSequence_make_array( - self, individuals.num_rows, NPY_UINT32, individuals.flags); -out: - return ret; -} - -static PyObject * -TreeSequence_get_individuals_metadata(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_individual_table_t individuals; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - individuals = self->tree_sequence->tables->individuals; - ret = TreeSequence_make_array( - self, individuals.metadata_length, NPY_UINT8, individuals.metadata); -out: - return ret; -} -static PyObject * -TreeSequence_get_individuals_metadata_offset(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_individual_table_t individuals; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - individuals = self->tree_sequence->tables->individuals; - ret = TreeSequence_make_array( - self, individuals.num_rows + 1, NPY_UINT64, individuals.metadata_offset); -out: - return ret; -} static PyObject * TreeSequence_get_individuals_location(TreeSequence *self, void *closure) @@ -11140,7 +11094,7 @@ TreeSequence_get_individuals_parents_offset(TreeSequence *self, void *closure) #if HAVE_NUMPY_2 static PyObject * -TreeSequence_get_provenances_timestamp(TreeSequence *self, void *closure) +TreeSequence_get_provenances_timestamp_string(TreeSequence *self, void *closure) { PyObject *ret = NULL; tsk_provenance_table_t provenances; @@ -11156,7 +11110,7 @@ TreeSequence_get_provenances_timestamp(TreeSequence *self, void *closure) } static PyObject * -TreeSequence_get_provenances_record(TreeSequence *self, void *closure) +TreeSequence_get_provenances_record_string(TreeSequence *self, void *closure) { PyObject *ret = NULL; tsk_provenance_table_t provenances; @@ -11172,208 +11126,9 @@ TreeSequence_get_provenances_record(TreeSequence *self, void *closure) } #endif -static PyObject * -TreeSequence_get_nodes_time(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_node_table_t nodes; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - nodes = self->tree_sequence->tables->nodes; - ret = TreeSequence_make_array(self, nodes.num_rows, NPY_FLOAT64, nodes.time); -out: - return ret; -} - -static PyObject * -TreeSequence_get_nodes_flags(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_node_table_t nodes; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - nodes = self->tree_sequence->tables->nodes; - ret = TreeSequence_make_array(self, nodes.num_rows, NPY_UINT32, nodes.flags); -out: - return ret; -} - -static PyObject * -TreeSequence_get_nodes_population(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_node_table_t nodes; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - nodes = self->tree_sequence->tables->nodes; - ret = TreeSequence_make_array(self, nodes.num_rows, NPY_INT32, nodes.population); -out: - return ret; -} - -static PyObject * -TreeSequence_get_nodes_individual(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_node_table_t nodes; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - nodes = self->tree_sequence->tables->nodes; - ret = TreeSequence_make_array(self, nodes.num_rows, NPY_INT32, nodes.individual); -out: - return ret; -} - -static PyObject * -TreeSequence_get_nodes_metadata(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_node_table_t nodes; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - nodes = self->tree_sequence->tables->nodes; - ret = TreeSequence_make_array( - self, nodes.metadata_length, NPY_UINT8, nodes.metadata); -out: - return ret; -} - -static PyObject * -TreeSequence_get_nodes_metadata_offset(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_node_table_t nodes; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - nodes = self->tree_sequence->tables->nodes; - ret = TreeSequence_make_array( - self, nodes.num_rows + 1, NPY_UINT64, nodes.metadata_offset); -out: - return ret; -} - -static PyObject * -TreeSequence_get_edges_left(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_edge_table_t edges; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - edges = self->tree_sequence->tables->edges; - ret = TreeSequence_make_array(self, edges.num_rows, NPY_FLOAT64, edges.left); -out: - return ret; -} - -static PyObject * -TreeSequence_get_edges_right(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_edge_table_t edges; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - edges = self->tree_sequence->tables->edges; - ret = TreeSequence_make_array(self, edges.num_rows, NPY_FLOAT64, edges.right); -out: - return ret; -} - -static PyObject * -TreeSequence_get_edges_parent(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_edge_table_t edges; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - edges = self->tree_sequence->tables->edges; - ret = TreeSequence_make_array(self, edges.num_rows, NPY_INT32, edges.parent); -out: - return ret; -} - -static PyObject * -TreeSequence_get_edges_child(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_edge_table_t edges; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - edges = self->tree_sequence->tables->edges; - ret = TreeSequence_make_array(self, edges.num_rows, NPY_INT32, edges.child); -out: - return ret; -} - -static PyObject * -TreeSequence_get_edges_metadata(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_edge_table_t edges; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - edges = self->tree_sequence->tables->edges; - ret = TreeSequence_make_array( - self, edges.metadata_length, NPY_UINT8, edges.metadata); -out: - return ret; -} - -static PyObject * -TreeSequence_get_edges_metadata_offset(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_edge_table_t edges; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - edges = self->tree_sequence->tables->edges; - ret = TreeSequence_make_array( - self, edges.num_rows + 1, NPY_UINT64, edges.metadata_offset); -out: - return ret; -} - -static PyObject * -TreeSequence_get_sites_position(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_site_table_t sites; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - sites = self->tree_sequence->tables->sites; - ret = TreeSequence_make_array(self, sites.num_rows, NPY_FLOAT64, sites.position); -out: - return ret; -} - #if HAVE_NUMPY_2 static PyObject * -TreeSequence_get_sites_ancestral_state(TreeSequence *self, void *closure) +TreeSequence_get_sites_ancestral_state_string(TreeSequence *self, void *closure) { PyObject *ret = NULL; tsk_site_table_t sites; @@ -11389,101 +11144,10 @@ TreeSequence_get_sites_ancestral_state(TreeSequence *self, void *closure) } #endif -static PyObject * -TreeSequence_get_sites_metadata(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_site_table_t sites; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - sites = self->tree_sequence->tables->sites; - ret = TreeSequence_make_array( - self, sites.metadata_length, NPY_UINT8, sites.metadata); -out: - return ret; -} - -static PyObject * -TreeSequence_get_sites_metadata_offset(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_site_table_t sites; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - sites = self->tree_sequence->tables->sites; - ret = TreeSequence_make_array( - self, sites.num_rows + 1, NPY_UINT64, sites.metadata_offset); -out: - return ret; -} - -static PyObject * -TreeSequence_get_mutations_site(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_mutation_table_t mutations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - mutations = self->tree_sequence->tables->mutations; - ret = TreeSequence_make_array(self, mutations.num_rows, NPY_INT32, mutations.site); -out: - return ret; -} - -static PyObject * -TreeSequence_get_mutations_node(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_mutation_table_t mutations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - mutations = self->tree_sequence->tables->mutations; - ret = TreeSequence_make_array(self, mutations.num_rows, NPY_INT32, mutations.node); -out: - return ret; -} - -static PyObject * -TreeSequence_get_mutations_parent(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_mutation_table_t mutations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - mutations = self->tree_sequence->tables->mutations; - ret = TreeSequence_make_array(self, mutations.num_rows, NPY_INT32, mutations.parent); -out: - return ret; -} - -static PyObject * -TreeSequence_get_mutations_time(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_mutation_table_t mutations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - mutations = self->tree_sequence->tables->mutations; - ret = TreeSequence_make_array(self, mutations.num_rows, NPY_FLOAT64, mutations.time); -out: - return ret; -} #if HAVE_NUMPY_2 static PyObject * -TreeSequence_get_mutations_derived_state(TreeSequence *self, void *closure) +TreeSequence_get_mutations_derived_state_string(TreeSequence *self, void *closure) { PyObject *ret = NULL; tsk_mutation_table_t mutations; @@ -11498,7 +11162,7 @@ TreeSequence_get_mutations_derived_state(TreeSequence *self, void *closure) return ret; } static PyObject * -TreeSequence_get_mutations_inherited_state(TreeSequence *self, void *closure) +TreeSequence_get_mutations_inherited_state_string(TreeSequence *self, void *closure) { PyObject *ret = NULL; tsk_treeseq_t *ts; @@ -11552,195 +11216,91 @@ TreeSequence_get_mutations_inherited_state(TreeSequence *self, void *closure) } #endif -static PyObject * -TreeSequence_get_mutations_metadata(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_mutation_table_t mutations; - if (TreeSequence_check_state(self) != 0) { - goto out; - } - mutations = self->tree_sequence->tables->mutations; - ret = TreeSequence_make_array( - self, mutations.metadata_length, NPY_UINT8, mutations.metadata); -out: - return ret; -} +/* Universal macro for all array accessors */ +#define DEFINE_ARRAY_ACCESSOR( \ + table_name, field_name, table_type, numpy_type, count_expr) \ + static PyObject *TreeSequence_get_##table_name##_##field_name( \ + TreeSequence *self, void *closure) \ + { \ + PyObject *ret = NULL; \ + tsk_##table_type##_table_t table; \ + \ + if (TreeSequence_check_state(self) != 0) { \ + goto out; \ + } \ + table = self->tree_sequence->tables->table_name; \ + ret = TreeSequence_make_array(self, count_expr, numpy_type, table.field_name); \ + out: \ + return ret; \ + } + +/* Generate string column accessors using the universal macro */ +DEFINE_ARRAY_ACCESSOR( + sites, ancestral_state, site, NPY_INT8, table.ancestral_state_length) +DEFINE_ARRAY_ACCESSOR( + sites, ancestral_state_offset, site, NPY_UINT64, table.num_rows + 1) +DEFINE_ARRAY_ACCESSOR( + mutations, derived_state, mutation, NPY_INT8, table.derived_state_length) +DEFINE_ARRAY_ACCESSOR( + mutations, derived_state_offset, mutation, NPY_UINT64, table.num_rows + 1) +DEFINE_ARRAY_ACCESSOR(provenances, record, provenance, NPY_INT8, table.record_length) +DEFINE_ARRAY_ACCESSOR( + provenances, record_offset, provenance, NPY_UINT64, table.num_rows + 1) +DEFINE_ARRAY_ACCESSOR( + provenances, timestamp, provenance, NPY_INT8, table.timestamp_length) +DEFINE_ARRAY_ACCESSOR( + provenances, timestamp_offset, provenance, NPY_UINT64, table.num_rows + 1) + +/* Generate array accessors using the universal macro */ +DEFINE_ARRAY_ACCESSOR(nodes, time, node, NPY_FLOAT64, table.num_rows) +DEFINE_ARRAY_ACCESSOR(nodes, flags, node, NPY_UINT32, table.num_rows) +DEFINE_ARRAY_ACCESSOR(nodes, population, node, NPY_INT32, table.num_rows) +DEFINE_ARRAY_ACCESSOR(nodes, individual, node, NPY_INT32, table.num_rows) +DEFINE_ARRAY_ACCESSOR(nodes, metadata, node, NPY_UINT8, table.metadata_length) +DEFINE_ARRAY_ACCESSOR(nodes, metadata_offset, node, NPY_UINT64, table.num_rows + 1) + +DEFINE_ARRAY_ACCESSOR(edges, left, edge, NPY_FLOAT64, table.num_rows) +DEFINE_ARRAY_ACCESSOR(edges, right, edge, NPY_FLOAT64, table.num_rows) +DEFINE_ARRAY_ACCESSOR(edges, parent, edge, NPY_INT32, table.num_rows) +DEFINE_ARRAY_ACCESSOR(edges, child, edge, NPY_INT32, table.num_rows) +DEFINE_ARRAY_ACCESSOR(edges, metadata, edge, NPY_UINT8, table.metadata_length) +DEFINE_ARRAY_ACCESSOR(edges, metadata_offset, edge, NPY_UINT64, table.num_rows + 1) + +DEFINE_ARRAY_ACCESSOR(sites, position, site, NPY_FLOAT64, table.num_rows) +DEFINE_ARRAY_ACCESSOR(sites, metadata, site, NPY_UINT8, table.metadata_length) +DEFINE_ARRAY_ACCESSOR(sites, metadata_offset, site, NPY_UINT64, table.num_rows + 1) + +DEFINE_ARRAY_ACCESSOR(mutations, site, mutation, NPY_INT32, table.num_rows) +DEFINE_ARRAY_ACCESSOR(mutations, node, mutation, NPY_INT32, table.num_rows) +DEFINE_ARRAY_ACCESSOR(mutations, parent, mutation, NPY_INT32, table.num_rows) +DEFINE_ARRAY_ACCESSOR(mutations, time, mutation, NPY_FLOAT64, table.num_rows) +DEFINE_ARRAY_ACCESSOR(mutations, metadata, mutation, NPY_UINT8, table.metadata_length) +DEFINE_ARRAY_ACCESSOR( + mutations, metadata_offset, mutation, NPY_UINT64, table.num_rows + 1) + +DEFINE_ARRAY_ACCESSOR(migrations, left, migration, NPY_FLOAT64, table.num_rows) +DEFINE_ARRAY_ACCESSOR(migrations, right, migration, NPY_FLOAT64, table.num_rows) +DEFINE_ARRAY_ACCESSOR(migrations, node, migration, NPY_INT32, table.num_rows) +DEFINE_ARRAY_ACCESSOR(migrations, source, migration, NPY_INT32, table.num_rows) +DEFINE_ARRAY_ACCESSOR(migrations, dest, migration, NPY_INT32, table.num_rows) +DEFINE_ARRAY_ACCESSOR(migrations, time, migration, NPY_FLOAT64, table.num_rows) +DEFINE_ARRAY_ACCESSOR(migrations, metadata, migration, NPY_UINT8, table.metadata_length) +DEFINE_ARRAY_ACCESSOR( + migrations, metadata_offset, migration, NPY_UINT64, table.num_rows + 1) + +DEFINE_ARRAY_ACCESSOR( + populations, metadata, population, NPY_UINT8, table.metadata_length) +DEFINE_ARRAY_ACCESSOR( + populations, metadata_offset, population, NPY_UINT64, table.num_rows + 1) + +DEFINE_ARRAY_ACCESSOR(individuals, flags, individual, NPY_UINT32, table.num_rows) +DEFINE_ARRAY_ACCESSOR( + individuals, metadata, individual, NPY_UINT8, table.metadata_length) +DEFINE_ARRAY_ACCESSOR( + individuals, metadata_offset, individual, NPY_UINT64, table.num_rows + 1) -static PyObject * -TreeSequence_get_mutations_metadata_offset(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_mutation_table_t mutations; - if (TreeSequence_check_state(self) != 0) { - goto out; - } - mutations = self->tree_sequence->tables->mutations; - ret = TreeSequence_make_array( - self, mutations.num_rows + 1, NPY_UINT64, mutations.metadata_offset); -out: - return ret; -} - -static PyObject * -TreeSequence_get_migrations_left(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_migration_table_t migrations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - migrations = self->tree_sequence->tables->migrations; - ret = TreeSequence_make_array( - self, migrations.num_rows, NPY_FLOAT64, migrations.left); -out: - return ret; -} - -static PyObject * -TreeSequence_get_migrations_right(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_migration_table_t migrations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - migrations = self->tree_sequence->tables->migrations; - ret = TreeSequence_make_array( - self, migrations.num_rows, NPY_FLOAT64, migrations.right); -out: - return ret; -} - -static PyObject * -TreeSequence_get_migrations_node(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_migration_table_t migrations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - migrations = self->tree_sequence->tables->migrations; - ret = TreeSequence_make_array(self, migrations.num_rows, NPY_INT32, migrations.node); -out: - return ret; -} - -static PyObject * -TreeSequence_get_migrations_source(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_migration_table_t migrations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - migrations = self->tree_sequence->tables->migrations; - ret = TreeSequence_make_array( - self, migrations.num_rows, NPY_INT32, migrations.source); -out: - return ret; -} - -static PyObject * -TreeSequence_get_migrations_dest(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_migration_table_t migrations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - migrations = self->tree_sequence->tables->migrations; - ret = TreeSequence_make_array(self, migrations.num_rows, NPY_INT32, migrations.dest); -out: - return ret; -} - -static PyObject * -TreeSequence_get_migrations_time(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_migration_table_t migrations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - migrations = self->tree_sequence->tables->migrations; - ret = TreeSequence_make_array( - self, migrations.num_rows, NPY_FLOAT64, migrations.time); -out: - return ret; -} - -static PyObject * -TreeSequence_get_migrations_metadata(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_migration_table_t migrations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - migrations = self->tree_sequence->tables->migrations; - ret = TreeSequence_make_array( - self, migrations.metadata_length, NPY_UINT8, migrations.metadata); -out: - return ret; -} - -static PyObject * -TreeSequence_get_migrations_metadata_offset(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_migration_table_t migrations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - migrations = self->tree_sequence->tables->migrations; - ret = TreeSequence_make_array( - self, migrations.num_rows + 1, NPY_UINT64, migrations.metadata_offset); -out: - return ret; -} - -static PyObject * -TreeSequence_get_populations_metadata(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_population_table_t populations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - populations = self->tree_sequence->tables->populations; - ret = TreeSequence_make_array( - self, populations.metadata_length, NPY_UINT8, populations.metadata); -out: - return ret; -} - -static PyObject * -TreeSequence_get_populations_metadata_offset(TreeSequence *self, void *closure) -{ - PyObject *ret = NULL; - tsk_population_table_t populations; - - if (TreeSequence_check_state(self) != 0) { - goto out; - } - populations = self->tree_sequence->tables->populations; - ret = TreeSequence_make_array( - self, populations.num_rows + 1, NPY_UINT64, populations.metadata_offset); -out: - return ret; -} static PyObject * TreeSequence_get_indexes_edge_insertion_order(TreeSequence *self, void *closure) @@ -12155,8 +11715,8 @@ static PyGetSetDef TreeSequence_getsetters[] = { .get = (getter) TreeSequence_get_sites_position, .doc = "The site position array" }, #if HAVE_NUMPY_2 - { .name = "sites_ancestral_state", - .get = (getter) TreeSequence_get_sites_ancestral_state, + { .name = "sites_ancestral_state_string", + .get = (getter) TreeSequence_get_sites_ancestral_state_string, .doc = "The site ancestral state array" }, #endif { .name = "sites_metadata", @@ -12178,11 +11738,11 @@ static PyGetSetDef TreeSequence_getsetters[] = { .get = (getter) TreeSequence_get_mutations_time, .doc = "The mutation time array" }, #if HAVE_NUMPY_2 - { .name = "mutations_derived_state", - .get = (getter) TreeSequence_get_mutations_derived_state, + { .name = "mutations_derived_state_string", + .get = (getter) TreeSequence_get_mutations_derived_state_string, .doc = "The mutation derived state array" }, - { .name = "mutations_inherited_state", - .get = (getter) TreeSequence_get_mutations_inherited_state, + { .name = "mutations_inherited_state_string", + .get = (getter) TreeSequence_get_mutations_inherited_state_string, .doc = "The mutation inherited state array" }, #endif { .name = "mutations_metadata", @@ -12191,6 +11751,30 @@ static PyGetSetDef TreeSequence_getsetters[] = { { .name = "mutations_metadata_offset", .get = (getter) TreeSequence_get_mutations_metadata_offset, .doc = "The mutation metadata offset array" }, + { .name = "sites_ancestral_state", + .get = (getter) TreeSequence_get_sites_ancestral_state, + .doc = "The site ancestral state data array (numpy1 style)" }, + { .name = "sites_ancestral_state_offset", + .get = (getter) TreeSequence_get_sites_ancestral_state_offset, + .doc = "The site ancestral state offset array (numpy1 style)" }, + { .name = "mutations_derived_state", + .get = (getter) TreeSequence_get_mutations_derived_state, + .doc = "The mutation derived state data array (numpy1 style)" }, + { .name = "mutations_derived_state_offset", + .get = (getter) TreeSequence_get_mutations_derived_state_offset, + .doc = "The mutation derived state offset array (numpy1 style)" }, + { .name = "provenances_record", + .get = (getter) TreeSequence_get_provenances_record, + .doc = "The provenance record data array (numpy1 style)" }, + { .name = "provenances_record_offset", + .get = (getter) TreeSequence_get_provenances_record_offset, + .doc = "The provenance record offset array (numpy1 style)" }, + { .name = "provenances_timestamp", + .get = (getter) TreeSequence_get_provenances_timestamp, + .doc = "The provenance timestamp data array (numpy1 style)" }, + { .name = "provenances_timestamp_offset", + .get = (getter) TreeSequence_get_provenances_timestamp_offset, + .doc = "The provenance timestamp offset array (numpy1 style)" }, { .name = "migrations_left", .get = (getter) TreeSequence_get_migrations_left, .doc = "The migration left array" }, @@ -12228,11 +11812,11 @@ static PyGetSetDef TreeSequence_getsetters[] = { .get = (getter) TreeSequence_get_indexes_edge_removal_order, .doc = "The edge removal order array" }, #if HAVE_NUMPY_2 - { .name = "provenances_timestamp", - .get = (getter) TreeSequence_get_provenances_timestamp, + { .name = "provenances_timestamp_string", + .get = (getter) TreeSequence_get_provenances_timestamp_string, .doc = "The provenance timestamp array" }, - { .name = "provenances_record", - .get = (getter) TreeSequence_get_provenances_record, + { .name = "provenances_record_string", + .get = (getter) TreeSequence_get_provenances_record_string, .doc = "The provenance record array" }, #endif { NULL } /* Sentinel */ diff --git a/python/lwt_interface/dict_encoding_testlib.py b/python/lwt_interface/dict_encoding_testlib.py index 01ed9a865c..86c2418923 100644 --- a/python/lwt_interface/dict_encoding_testlib.py +++ b/python/lwt_interface/dict_encoding_testlib.py @@ -132,7 +132,7 @@ def verify(self, tables): def test_simple(self): ts = msprime.simulate(10, mutation_rate=1, random_seed=2) - self.verify(ts.tables) + self.verify(ts.dump_tables()) def test_empty(self): tables = tskit.TableCollection(sequence_length=1) @@ -152,7 +152,7 @@ def test_sequence_length(self): ts = msprime.simulate( 10, recombination_rate=0.1, mutation_rate=1, length=0.99, random_seed=2 ) - self.verify(ts.tables) + self.verify(ts.dump_tables()) def test_migration(self): pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)] @@ -164,7 +164,7 @@ def test_migration(self): record_migrations=True, random_seed=1, ) - self.verify(ts.tables) + self.verify(ts.dump_tables()) def test_example(self, tables): tables.metadata_schema = tskit.MetadataSchema( diff --git a/python/tests/test_genotypes.py b/python/tests/test_genotypes.py index 583fe45bb7..e8af9841a1 100644 --- a/python/tests/test_genotypes.py +++ b/python/tests/test_genotypes.py @@ -863,7 +863,7 @@ def test_nonbinary_trees(self): def test_acgt_mutations(self): ts = msprime.simulate(10, mutation_rate=10) assert ts.num_sites > 0 - tables = ts.tables + tables = ts.dump_tables() sites = tables.sites mutations = tables.mutations sites.set_columns( @@ -883,7 +883,7 @@ def test_acgt_mutations(self): def test_fails_multiletter_mutations(self): ts = msprime.simulate(10, random_seed=2) - tables = ts.tables + tables = ts.dump_tables() tables.sites.add_row(0, "ACTG") tsp = tables.tree_sequence() with pytest.raises(TypeError): @@ -891,7 +891,7 @@ def test_fails_multiletter_mutations(self): def test_fails_deletion_mutations(self): ts = msprime.simulate(10, random_seed=2) - tables = ts.tables + tables = ts.dump_tables() tables.sites.add_row(0, "") tsp = tables.tree_sequence() with pytest.raises(TypeError): @@ -899,7 +899,7 @@ def test_fails_deletion_mutations(self): def test_nonascii_mutations(self): ts = msprime.simulate(10, random_seed=2) - tables = ts.tables + tables = ts.dump_tables() tables.sites.add_row(0, chr(169)) # Copyright symbol tsp = tables.tree_sequence() with pytest.raises(TypeError): diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py index 96d2947ff4..585f68b937 100644 --- a/python/tests/test_highlevel.py +++ b/python/tests/test_highlevel.py @@ -1292,7 +1292,7 @@ def verify_edgesets(self, ts): tskit.Edge(edgeset.left, edgeset.right, edgeset.parent, child) ) # squash the edges. - t = ts.dump_tables().nodes.time + t = ts.tables.nodes.time new_edges.sort(key=lambda e: (t[e.parent], e.parent, e.child, e.left)) squashed = [] @@ -1916,24 +1916,20 @@ def test_load_tables(self, ts): with pytest.raises( _tskit.LibraryError, match="Table collection must be indexed" ): - assert tskit.TreeSequence.load_tables(tables).dump_tables().has_index() + assert tskit.TreeSequence.load_tables(tables).tables.has_index() # Tables not in tc, but rebuilt - assert ( - tskit.TreeSequence.load_tables(tables, build_indexes=True) - .dump_tables() - .has_index() - ) + assert tskit.TreeSequence.load_tables( + tables, build_indexes=True + ).tables.has_index() tables.build_index() # Tables in tc, not rebuilt - assert ( - tskit.TreeSequence.load_tables(tables, build_indexes=False) - .dump_tables() - .has_index() - ) + assert tskit.TreeSequence.load_tables( + tables, build_indexes=False + ).tables.has_index() # Tables in tc, and rebuilt - assert tskit.TreeSequence.load_tables(tables).dump_tables().has_index() + assert tskit.TreeSequence.load_tables(tables).tables.has_index() @pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences()) def test_html_repr(self, ts): @@ -1958,14 +1954,14 @@ def test_bad_provenance(self, ts_fixture): assert "Could not parse provenance" in ts._repr_html_() def test_provenance_summary_html(self, ts_fixture): - tables = ts_fixture.tables + tables = ts_fixture.dump_tables() for _ in range(20): # Add a row with isotimestamp tables.provenances.add_row("foo", "bar") assert "... 15 more" in tables.tree_sequence()._repr_html_() def test_html_repr_limit(self, ts_fixture): - tables = ts_fixture.tables + tables = ts_fixture.dump_tables() d = {n: n for n in range(50)} d[0] = "N" * 200 tables.metadata = d @@ -2656,7 +2652,8 @@ def verify_tables_api_equality(self, ts): tables = ts.dump_tables() tables.simplify(samples=samples) tables.assert_equals( - ts.simplify(samples=samples).tables, ignore_timestamps=True + ts.simplify(samples=samples).dump_tables(), + ignore_timestamps=True, ) @pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences()) @@ -4012,7 +4009,7 @@ def test_first_last(self): def test_eq_different_tree_sequence(self): ts = msprime.simulate(4, recombination_rate=1, length=2, random_seed=42) - copy = ts.tables.tree_sequence() + copy = ts.dump_tables().tree_sequence() for tree1, tree2 in zip(ts.aslist(), copy.aslist()): assert tree1 != tree2 diff --git a/python/tests/test_ibd.py b/python/tests/test_ibd.py index 4b13ad6db9..5e5c466a19 100644 --- a/python/tests/test_ibd.py +++ b/python/tests/test_ibd.py @@ -275,7 +275,7 @@ def test_within_between_mutually_exclusive(self, ts): @pytest.mark.parametrize("ts", example_ts()) def test_tables_interface(self, ts): - ibd_tab = ts.tables.ibd_segments(store_segments=True) + ibd_tab = ts.dump_tables().ibd_segments(store_segments=True) ibd_ts = ts.ibd_segments(store_segments=True) assert ibd_tab == ibd_ts diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py index 68ba729f4e..ab003ffd7a 100644 --- a/python/tests/test_lowlevel.py +++ b/python/tests/test_lowlevel.py @@ -253,7 +253,7 @@ def test_dump_equality(self, tmp_path): def test_reference_deletion(self): ts = msprime.simulate(10, mutation_rate=1, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables # Get references to all the tables tables = [ tc.individuals, @@ -334,7 +334,7 @@ def test_set_metadata_schema(self): def test_simplify_bad_args(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables with pytest.raises(TypeError): tc.simplify() with pytest.raises(ValueError): @@ -375,7 +375,7 @@ def test_simplify_flags(self, flag, value): def test_link_ancestors_bad_args(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables with pytest.raises(TypeError): tc.link_ancestors() with pytest.raises(TypeError): @@ -391,7 +391,7 @@ def test_link_ancestors_bad_args(self): def test_link_ancestors(self): ts = msprime.simulate(2, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables edges = tc.link_ancestors([0, 1], [3]) assert isinstance(edges, _tskit.EdgeTable) del edges @@ -399,7 +399,7 @@ def test_link_ancestors(self): def test_subset_bad_args(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables with pytest.raises(TypeError): tc.subset(np.array(["a"])) with pytest.raises(ValueError): @@ -411,7 +411,7 @@ def test_subset_bad_args(self): def test_union_bad_args(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables tc2 = tc with pytest.raises(TypeError): tc.union(tc2, np.array(["a"])) @@ -435,7 +435,7 @@ def test_union_bad_args(self): def test_equals_bad_args(self): ts = msprime.simulate(10, random_seed=1242) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables with pytest.raises(TypeError): tc.equals() with pytest.raises(TypeError): @@ -477,7 +477,7 @@ def test_fromdict(self): def test_asdict_bad_args(self): ts = msprime.simulate(10, random_seed=1242) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables for bad_type in [None, 0.1, "str"]: with pytest.raises(TypeError): tc.asdict(force_offset_64=bad_type) @@ -517,14 +517,14 @@ def test_uninitialised(self): def test_get_keys(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables pairs = [[0, 1], [0, 2], [1, 2]] result = tc.ibd_segments_within([0, 1, 2], store_pairs=True) np.testing.assert_array_equal(result.get_keys(), pairs) def test_store_pairs(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables # By default we can't get any information about pairs. result = tc.ibd_segments_within() with pytest.raises(_tskit.IdentityPairsNotStoredError): @@ -551,7 +551,7 @@ def test_store_pairs(self): def test_within_all_pairs(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables num_pairs = ts.num_samples * (ts.num_samples - 1) / 2 result = tc.ibd_segments_within(store_pairs=True) assert result.num_pairs == num_pairs @@ -560,7 +560,7 @@ def test_within_all_pairs(self): def test_between_all_pairs(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables result = tc.ibd_segments_between([5, 5], range(10), store_pairs=True) assert result.num_pairs == 25 pairs = np.array(list(itertools.product(range(5), range(5, 10)))) @@ -568,7 +568,7 @@ def test_between_all_pairs(self): def test_within_bad_args(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables for bad_samples in ["sdf", {}]: with pytest.raises(ValueError): tc.ibd_segments_within(bad_samples) @@ -587,7 +587,7 @@ def test_within_bad_args(self): def test_between_bad_args(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables with pytest.raises(TypeError): tc.ibd_segments_between() with pytest.raises(TypeError): @@ -615,7 +615,7 @@ def test_between_bad_args(self): def test_get_output(self): ts = msprime.simulate(5, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables pairs = [(0, 1), (2, 3)] result = tc.ibd_segments_within([0, 1, 2, 3], store_segments=True) assert isinstance(result, _tskit.IdentitySegments) @@ -634,7 +634,7 @@ def test_get_output(self): def test_get_bad_args(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables result = tc.ibd_segments_within([0, 1, 2], store_segments=True) with pytest.raises(TypeError): result.get() @@ -651,7 +651,7 @@ def test_get_bad_args(self): def test_print_state(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables result = tc.ibd_segments_within() with pytest.raises(TypeError): result.print_state() @@ -687,7 +687,7 @@ def test_direct_instantiation(self): def test_memory_management_within(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables result = tc.ibd_segments_within(store_segments=True) del ts, tc lst = result.get(0, 1) @@ -702,7 +702,7 @@ def test_memory_management_within(self): def test_memory_management_between(self): ts = msprime.simulate(10, random_seed=1) - tc = ts.tables._ll_tables + tc = ts.dump_tables()._ll_tables result = tc.ibd_segments_between([2, 2], range(4), store_segments=True) del ts, tc lst = result.get(0, 2) @@ -723,7 +723,7 @@ class TestTableMethods: @pytest.mark.parametrize("table_name", tskit.TABLE_NAMES) def test_table_extend(self, table_name, ts_fixture): - table = getattr(ts_fixture.tables, table_name) + table = getattr(ts_fixture.dump_tables(), table_name) assert len(table) >= 5 ll_table = table.ll_table table_copy = table.copy() @@ -753,7 +753,7 @@ def test_table_extend(self, table_name, ts_fixture): def test_table_extend_types( self, ts_fixture, table_name, row_indexes, expected_rows ): - table = getattr(ts_fixture.tables, table_name) + table = getattr(ts_fixture.dump_tables(), table_name) assert len(table) >= 5 ll_table = table.ll_table table_copy = table.copy() @@ -765,7 +765,7 @@ def test_table_extend_types( @pytest.mark.parametrize("table_name", tskit.TABLE_NAMES) def test_table_keep_rows_errors(self, table_name, ts_fixture): - table = getattr(ts_fixture.tables, table_name) + table = getattr(ts_fixture.dump_tables(), table_name) n = len(table) ll_table = table.ll_table with pytest.raises(ValueError, match="must be of length"): @@ -777,7 +777,7 @@ def test_table_keep_rows_errors(self, table_name, ts_fixture): @pytest.mark.parametrize("table_name", tskit.TABLE_NAMES) def test_table_keep_rows_all(self, table_name, ts_fixture): - table = getattr(ts_fixture.tables, table_name) + table = getattr(ts_fixture.dump_tables(), table_name) n = len(table) ll_table = table.ll_table a = ll_table.keep_rows(np.ones(n, dtype=bool)) @@ -788,7 +788,7 @@ def test_table_keep_rows_all(self, table_name, ts_fixture): @pytest.mark.parametrize("table_name", tskit.TABLE_NAMES) def test_table_keep_rows_none(self, table_name, ts_fixture): - table = getattr(ts_fixture.tables, table_name) + table = getattr(ts_fixture.dump_tables(), table_name) n = len(table) ll_table = table.ll_table a = ll_table.keep_rows(np.zeros(n, dtype=bool)) @@ -821,7 +821,7 @@ def test_individual_table_keep_rows_ref_error(self): ], ) def test_table_update(self, ts_fixture, table_name, column_name): - table = getattr(ts_fixture.tables, table_name) + table = getattr(ts_fixture.dump_tables(), table_name) copy = table.copy() ll_table = table.ll_table @@ -985,7 +985,7 @@ class TestTableMethodsErrors: """ def yield_tables(self, ts): - for table in ts.tables.table_name_map.values(): + for table in ts.dump_tables().table_name_map.values(): yield table.ll_table @pytest.mark.parametrize( @@ -993,7 +993,7 @@ def yield_tables(self, ts): tskit.TABLE_NAMES, ) def test_table_extend_bad_args(self, ts_fixture, table_name): - table = getattr(ts_fixture.tables, table_name) + table = getattr(ts_fixture.dump_tables(), table_name) ll_table = table.ll_table ll_table_copy = table.copy().ll_table @@ -1026,7 +1026,7 @@ def test_table_extend_bad_args(self, ts_fixture, table_name): @pytest.mark.parametrize("table_name", tskit.TABLE_NAMES) def test_update_bad_row_index(self, ts_fixture, table_name): - table = getattr(ts_fixture.tables, table_name) + table = getattr(ts_fixture.dump_tables(), table_name) ll_table = table.ll_table row_data = ll_table.get_row(0) with pytest.raises(_tskit.LibraryError, match="out of bounds"): @@ -1077,7 +1077,7 @@ def test_flag_underflow_overflow(self, table): table.add_row(flags=-1) def test_index(self): - tc = msprime.simulate(10, random_seed=42).tables._ll_tables + tc = msprime.simulate(10, random_seed=42).dump_tables()._ll_tables assert tc.indexes["edge_insertion_order"].dtype == np.int32 assert tc.indexes["edge_removal_order"].dtype == np.int32 assert np.array_equal( @@ -1110,12 +1110,12 @@ def test_index(self): ) def test_no_indexes(self): - tc = msprime.simulate(10, random_seed=42).tables._ll_tables + tc = msprime.simulate(10, random_seed=42).dump_tables()._ll_tables tc.drop_index() assert tc.indexes == {} def test_bad_indexes(self): - tc = msprime.simulate(10, random_seed=42).tables._ll_tables + tc = msprime.simulate(10, random_seed=42).dump_tables()._ll_tables for col in ("insertion", "removal"): d = tc.indexes d[f"edge_{col}_order"] = d[f"edge_{col}_order"][:-1] @@ -1148,9 +1148,11 @@ def test_bad_indexes(self): ): tc.indexes = d - tc = msprime.simulate( - 10, recombination_rate=10, random_seed=42 - ).tables._ll_tables + tc = ( + msprime.simulate(10, recombination_rate=10, random_seed=42) + .dump_tables() + ._ll_tables + ) modify_indexes = tc.indexes shape = modify_indexes["edge_insertion_order"].shape modify_indexes["edge_insertion_order"] = np.zeros(shape, dtype=np.int32) @@ -1205,6 +1207,14 @@ class TestTreeSequence(LowLevelTestCase, MetadataTestMixin): "mutations_time", "mutations_metadata", "mutations_metadata_offset", + "sites_ancestral_state", + "sites_ancestral_state_offset", + "mutations_derived_state", + "mutations_derived_state_offset", + "provenances_record", + "provenances_record_offset", + "provenances_timestamp", + "provenances_timestamp_offset", "migrations_left", "migrations_right", "migrations_node", @@ -2198,11 +2208,11 @@ def test_generated_columns(self, ts_fixture, name): @pytest.mark.parametrize( "string_array", [ - "sites_ancestral_state", - "mutations_derived_state", - "mutations_inherited_state", - "provenances_timestamp", - "provenances_record", + "sites_ancestral_state_string", + "mutations_derived_state_string", + "mutations_inherited_state_string", + "provenances_timestamp_string", + "provenances_record_string", ], ) @pytest.mark.parametrize( @@ -2215,19 +2225,19 @@ def test_string_arrays(self, ts_fixture, str_lengths, string_array): else: if str_lengths == "all-1": ts = ts_fixture - if string_array == "sites_ancestral_state": + if string_array == "sites_ancestral_state_string": assert ts.num_sites > 0 assert {len(site.ancestral_state) for site in ts.sites()} == {1} - elif string_array == "mutations_derived_state": + elif string_array == "mutations_derived_state_string": assert ts.num_mutations > 0 assert {len(mut.derived_state) for mut in ts.mutations()} == {1} - elif string_array == "mutations_inherited_state": + elif string_array == "mutations_inherited_state_string": assert ts.num_mutations > 0 assert {len(mut.inherited_state) for mut in ts.mutations()} == {1} - elif string_array == "provenances_timestamp": + elif string_array == "provenances_timestamp_string": assert ts.num_provenances > 0 assert len(ts.provenance(3).timestamp) == 1 - elif string_array == "provenances_record": + elif string_array == "provenances_record_string": assert ts.num_provenances > 0 assert len(ts.provenance(3).record) == 1 else: @@ -2241,7 +2251,7 @@ def test_string_arrays(self, ts_fixture, str_lengths, string_array): "unicode": lambda i, item: "🧬" * (i + 1), } - if string_array == "sites_ancestral_state": + if string_array == "sites_ancestral_state_string": sites = tables.sites.copy() tables.sites.clear() get_ancestral_state = str_map[str_lengths] @@ -2249,7 +2259,7 @@ def test_string_arrays(self, ts_fixture, str_lengths, string_array): tables.sites.append( site.replace(ancestral_state=get_ancestral_state(i, site)) ) - elif string_array == "mutations_derived_state": + elif string_array == "mutations_derived_state_string": mutations = tables.mutations.copy() tables.mutations.clear() get_derived_state = str_map[str_lengths] @@ -2259,7 +2269,7 @@ def test_string_arrays(self, ts_fixture, str_lengths, string_array): derived_state=get_derived_state(i, mutation) ) ) - elif string_array == "mutations_inherited_state": + elif string_array == "mutations_inherited_state_string": # For inherited state, we modify sites and mutations to create # varied lengths sites = tables.sites.copy() @@ -2278,7 +2288,7 @@ def test_string_arrays(self, ts_fixture, str_lengths, string_array): derived_state=get_derived_state(i, mutation) ) ) - elif string_array == "provenances_timestamp": + elif string_array == "provenances_timestamp_string": provenances = tables.provenances.copy() tables.provenances.clear() get_timestamp = str_map[str_lengths] @@ -2286,7 +2296,7 @@ def test_string_arrays(self, ts_fixture, str_lengths, string_array): tables.provenances.append( provenance.replace(timestamp=get_timestamp(i, provenance)) ) - elif string_array == "provenances_record": + elif string_array == "provenances_record_string": provenances = tables.provenances.copy() tables.provenances.clear() get_record = str_map[str_lengths] @@ -2304,19 +2314,19 @@ def test_string_arrays(self, ts_fixture, str_lengths, string_array): if str_lengths == "none": assert a.size == 0 else: - if string_array == "sites_ancestral_state": + if string_array == "sites_ancestral_state_string": for site in ts.sites(): assert a[site.id] == site.ancestral_state - elif string_array == "mutations_derived_state": + elif string_array == "mutations_derived_state_string": for mutation in ts.mutations(): assert a[mutation.id] == mutation.derived_state - elif string_array == "mutations_inherited_state": + elif string_array == "mutations_inherited_state_string": for mutation in ts.mutations(): assert a[mutation.id] == mutation.inherited_state - elif string_array == "provenances_timestamp": + elif string_array == "provenances_timestamp_string": for provenance in ts.provenances(): assert a[provenance.id] == provenance.timestamp - elif string_array == "provenances_record": + elif string_array == "provenances_record_string": for provenance in ts.provenances(): assert a[provenance.id] == provenance.record diff --git a/python/tests/test_tables.py b/python/tests/test_tables.py index 5c59653c7b..8672b03201 100644 --- a/python/tests/test_tables.py +++ b/python/tests/test_tables.py @@ -860,7 +860,7 @@ def test_bad_offsets(self): def test_replace_with_wrong_class(self): t = self.table_class() with pytest.raises(TypeError, match="is required"): - t.replace_with(tskit.BaseTable(None, None)) + t.replace_with(tskit.MutableBaseTable(None, None)) class MetadataTestsMixin: @@ -2443,41 +2443,41 @@ def test_wf_example(self): def test_single_tree_no_mutations(self): ts = msprime.simulate(10, random_seed=self.random_seed) self.verify_sort_offset(ts) - self.verify_sort(ts.tables, 432) + self.verify_sort(ts.dump_tables(), 432) def test_single_tree_no_mutations_metadata(self): ts = msprime.simulate(10, random_seed=self.random_seed) ts = tsutil.add_random_metadata(ts, self.random_seed) - self.verify_sort(ts.tables, 12) + self.verify_sort(ts.dump_tables(), 12) def test_many_trees_no_mutations(self): ts = msprime.simulate(10, recombination_rate=2, random_seed=self.random_seed) assert ts.num_trees > 2 self.verify_sort_offset(ts) - self.verify_sort(ts.tables, 31) + self.verify_sort(ts.dump_tables(), 31) def test_single_tree_mutations(self): ts = msprime.simulate(10, mutation_rate=2, random_seed=self.random_seed) assert ts.num_sites > 2 self.verify_sort_offset(ts) - self.verify_sort(ts.tables, 83) + self.verify_sort(ts.dump_tables(), 83) def test_single_tree_mutations_metadata(self): ts = msprime.simulate(10, mutation_rate=2, random_seed=self.random_seed) assert ts.num_sites > 2 ts = tsutil.add_random_metadata(ts, self.random_seed) - self.verify_sort(ts.tables, 384) + self.verify_sort(ts.dump_tables(), 384) def test_single_tree_multichar_mutations(self): ts = msprime.simulate(10, random_seed=self.random_seed) ts = tsutil.insert_multichar_mutations(ts, self.random_seed) - self.verify_sort(ts.tables, 185) + self.verify_sort(ts.dump_tables(), 185) def test_single_tree_multichar_mutations_metadata(self): ts = msprime.simulate(10, random_seed=self.random_seed) ts = tsutil.insert_multichar_mutations(ts, self.random_seed) ts = tsutil.add_random_metadata(ts, self.random_seed) - self.verify_sort(ts.tables, 2175) + self.verify_sort(ts.dump_tables(), 2175) def test_many_trees_mutations(self): ts = msprime.simulate( @@ -2486,20 +2486,20 @@ def test_many_trees_mutations(self): assert ts.num_trees > 2 assert ts.num_sites > 2 self.verify_sort_offset(ts) - self.verify_sort(ts.tables, 173) + self.verify_sort(ts.dump_tables(), 173) def test_many_trees_multichar_mutations(self): ts = msprime.simulate(10, recombination_rate=2, random_seed=self.random_seed) assert ts.num_trees > 2 ts = tsutil.insert_multichar_mutations(ts, self.random_seed) - self.verify_sort(ts.tables, 16) + self.verify_sort(ts.dump_tables(), 16) def test_many_trees_multichar_mutations_metadata(self): ts = msprime.simulate(10, recombination_rate=2, random_seed=self.random_seed) assert ts.num_trees > 2 ts = tsutil.insert_multichar_mutations(ts, self.random_seed) ts = tsutil.add_random_metadata(ts, self.random_seed) - self.verify_sort(ts.tables, 91) + self.verify_sort(ts.dump_tables(), 91) def get_nonbinary_example(self, mutation_rate): ts = msprime.simulate( @@ -2524,20 +2524,20 @@ def get_nonbinary_example(self, mutation_rate): def test_nonbinary_trees(self): ts = self.get_nonbinary_example(mutation_rate=0) self.verify_sort_offset(ts) - self.verify_sort(ts.tables, 9182) + self.verify_sort(ts.dump_tables(), 9182) def test_nonbinary_trees_mutations(self): ts = self.get_nonbinary_example(mutation_rate=2) assert ts.num_trees > 2 assert ts.num_sites > 2 self.verify_sort_offset(ts) - self.verify_sort(ts.tables, 44) + self.verify_sort(ts.dump_tables(), 44) def test_unknown_times(self): ts = self.get_wf_example(seed=486) ts = tsutil.insert_branch_mutations(ts, mutations_per_branch=2) ts = tsutil.remove_mutation_times(ts) - self.verify_sort(ts.tables, 9182) + self.verify_sort(ts.dump_tables(), 9182) def test_stable_individual_order(self): # canonical should retain individual order lacking any other information @@ -2551,7 +2551,7 @@ def test_stable_individual_order(self): def test_discrete_times(self): ts = self.get_wf_example(seed=623) ts = tsutil.insert_discrete_time_mutations(ts) - self.verify_sort(ts.tables, 9183) + self.verify_sort(ts.dump_tables(), 9183) def test_incompatible_edges(self): ts1 = msprime.simulate(10, random_seed=self.random_seed) @@ -2833,7 +2833,7 @@ def test_sort_mutations_time(self): assert list(mutations.site) == [0, 0, 0, 1, 1, 1, 2, 2, 2] assert list(mutations.node) == [2, 1, 0, 0, 1, 2, 2, 0, 1] # Nans are not equal so swap in -1 - times = mutations.time + times = mutations.time.copy() times[np.isnan(times)] = -1 assert list(times) == [3.0, 2.0, 1.0, 0.5, 0.5, 0.5, 6.0, 4.0, -5.0] assert list(mutations.derived_state) == list( @@ -3613,12 +3613,12 @@ def test_from_dict(self, ts_fixture): t1.assert_equals(t2) def test_roundtrip_dict(self, ts_fixture): - t1 = ts_fixture.tables + t1 = ts_fixture.dump_tables() t2 = tskit.TableCollection.fromdict(t1.asdict()) t1.assert_equals(t2) def test_table_name_map(self, ts_fixture): - tables = ts_fixture.tables + tables = ts_fixture.dump_tables() td1 = { "individuals": tables.individuals, "populations": tables.populations, @@ -3904,7 +3904,7 @@ def test_sequence_length_longer_than_edges(self, ts_fixture): def test_indexes(self, simple_degree1_ts_fixture): tc = tskit.TableCollection(sequence_length=1) assert tc.indexes == tskit.TableCollectionIndexes() - tc = simple_degree1_ts_fixture.tables + tc = simple_degree1_ts_fixture.dump_tables() assert np.array_equal( tc.indexes.edge_insertion_order, np.arange(18, dtype=np.int32) ) @@ -4558,18 +4558,18 @@ def test_multichar_metadata(self): assert site.metadata == site.id * b"A" -class TestBaseTable: +class TestMutableBaseTable: """ Tests of the table superclass. """ def test_set_columns_not_implemented(self): - t = tskit.BaseTable(None, None) + t = tskit.MutableBaseTable(None, None) with pytest.raises(NotImplementedError): t.set_columns() def test_replace_with(self, ts_fixture): - # Although replace_with is a BaseTable method, it is simpler to test it + # Although replace_with is a MutableBaseTable method, it is simpler to test it # on the subclasses directly, as some differ e.g. in having metadata schemas original_tables = ts_fixture.dump_tables() original_tables.nodes.metadata_schema = tskit.MetadataSchema.permissive_json() @@ -4606,7 +4606,7 @@ def get_msprime_example(self, sample_size=10, seed=1234): # adding metadata and locations ts = tsutil.add_random_metadata(ts, seed) ts = tsutil.insert_random_ploidy_individuals(ts, max_ploidy=1) - return ts.tables + return ts.dump_tables() def get_wf_example(self, N=5, ngens=2, seed=1249): tables = wf.wf_sim(N, N, num_pops=2, seed=seed) @@ -4615,7 +4615,7 @@ def get_wf_example(self, N=5, ngens=2, seed=1249): ts = tsutil.jukes_cantor(ts, 1, 10, seed=seed) ts = tsutil.add_random_metadata(ts, seed) ts = tsutil.insert_random_ploidy_individuals(ts, max_ploidy=2) - return ts.tables + return ts.dump_tables() def get_examples(self, seed): yield self.get_msprime_example(seed=seed) @@ -4827,7 +4827,7 @@ def test_no_remove_unreferenced(self): assert tables.populations == sub_tables.populations assert tables.individuals == sub_tables.individuals ts = tables.tree_sequence() - sub_tables = ts.subset([], remove_unreferenced=False).tables + sub_tables = ts.subset([], remove_unreferenced=False).dump_tables() assert tables.sites == sub_tables.sites assert tables.populations == sub_tables.populations assert tables.individuals == sub_tables.individuals @@ -4911,8 +4911,12 @@ def split_example(self, ts, T): shared_nodes = [n.id for n in ts.nodes() if n.time >= T] pop1 = list(ts.samples(population=0)) pop2 = list(ts.samples(population=1)) - tables1 = ts.simplify(shared_nodes + pop1, record_provenance=False).tables - tables2 = ts.simplify(shared_nodes + pop2, record_provenance=False).tables + tables1 = ts.simplify( + shared_nodes + pop1, record_provenance=False + ).dump_tables() + tables2 = ts.simplify( + shared_nodes + pop2, record_provenance=False + ).dump_tables() node_mapping = [ i if i < len(shared_nodes) else tskit.NULL for i in range(tables2.nodes.num_rows) @@ -5085,9 +5089,10 @@ def verify_union_consistency(self, tables, other, node_mapping): assert s2.position == su.position assert s2.ancestral_state == su.ancestral_state # check mutation parents - tables_union = tsu.tables + expected_tables = tsu.dump_tables() + tables_union = expected_tables.copy() tables_union.compute_mutation_parents() - assert tables_union.mutations == tsu.tables.mutations + assert tables_union.mutations == expected_tables.mutations def test_union_empty(self): tables = self.get_msprime_example(sample_size=3, T=2, seed=9328).dump_tables() @@ -5112,7 +5117,9 @@ def test_noshared_example(self): node_mapping = np.full(ts2.num_nodes, tskit.NULL, dtype="int32") uni1 = ts1.union(ts2, node_mapping, record_provenance=False) uni2_tables = ts1.dump_tables() - tsutil.py_union(uni2_tables, ts2.tables, node_mapping, record_provenance=False) + tsutil.py_union( + uni2_tables, ts2.dump_tables(), node_mapping, record_provenance=False + ) assert uni1.tables == uni2_tables def test_all_shared_example(self): @@ -5161,13 +5168,13 @@ def test_examples(self): with self.subTest(N=N, T=T): ts = self.get_msprime_example(N, T=T, seed=888) if mut_times: - tables = ts.tables + tables = ts.dump_tables() tables.compute_mutation_times() ts = tables.tree_sequence() self.verify_union(*self.split_example(ts, T)) ts = self.get_wf_example(N=N, T=T, seed=827) if mut_times: - tables = ts.tables + tables = ts.dump_tables() tables.compute_mutation_times() ts = tables.tree_sequence() self.verify_union(*self.split_example(ts, T)) @@ -5176,7 +5183,7 @@ def test_examples(self): class TestTableSetitemMetadata: @pytest.mark.parametrize("table_name", tskit.TABLE_NAMES) def test_setitem_metadata(self, ts_fixture, table_name): - table = getattr(ts_fixture.tables, table_name) + table = getattr(ts_fixture.dump_tables(), table_name) if hasattr(table, "metadata_schema"): assert table.metadata_schema == tskit.MetadataSchema({"codec": "json"}) assert table[0].metadata != table[1].metadata diff --git a/python/tests/test_threads.py b/python/tests/test_threads.py index ad96cb3d52..6d835f5d01 100644 --- a/python/tests/test_threads.py +++ b/python/tests/test_threads.py @@ -156,7 +156,7 @@ def get_tables(self): ts = msprime.simulate( 100, mutation_rate=10, recombination_rate=10, random_seed=8 ) - return ts.tables + return ts.dump_tables() def run_multiple_writers(self, writer, num_writers=32): barrier = threading.Barrier(num_writers) diff --git a/python/tests/test_topology.py b/python/tests/test_topology.py index dd1eb9f23e..e19c957e9c 100644 --- a/python/tests/test_topology.py +++ b/python/tests/test_topology.py @@ -862,7 +862,7 @@ def test_single_record(self): ts = tskit.load_text(nodes, edges, strict=False) tss, node_map = ts.simplify(map_nodes=True) assert list(node_map) == [0, 1] - assert tss.dump_tables().nodes == ts.dump_tables().nodes + assert tss.tables.nodes == ts.tables.nodes simplified_edges = list(tss.edges()) assert len(simplified_edges) == 1 e = simplified_edges[0] @@ -873,16 +873,16 @@ def test_single_tree(self): ts = msprime.simulate(10, random_seed=self.random_seed) ts_redundant = tsutil.insert_redundant_breakpoints(ts) tss = ts_redundant.simplify() - assert tss.dump_tables().nodes == ts.dump_tables().nodes - assert tss.dump_tables().edges == ts.dump_tables().edges + assert tss.tables.nodes == ts.tables.nodes + assert tss.tables.edges == ts.tables.edges def test_many_trees(self): ts = msprime.simulate(20, recombination_rate=5, random_seed=self.random_seed) assert ts.num_trees > 2 ts_redundant = tsutil.insert_redundant_breakpoints(ts) tss = ts_redundant.simplify() - assert tss.dump_tables().nodes == ts.dump_tables().nodes - assert tss.dump_tables().edges == ts.dump_tables().edges + assert tss.tables.nodes == ts.tables.nodes + assert tss.tables.edges == ts.tables.edges class TestRedundantBreakpoints(TopologyTestCase): @@ -4899,7 +4899,7 @@ def do_map(self, ts, ancestors, samples=None, compare_lib=True): s = tests.AncestorMap(ts, samples, ancestors) ancestor_table = s.link_ancestors() if compare_lib: - lib_result = ts.tables.link_ancestors(samples, ancestors) + lib_result = ts.dump_tables().link_ancestors(samples, ancestors) assert ancestor_table == lib_result return ancestor_table @@ -4912,7 +4912,7 @@ def test_deprecated_name(self): ancestors = [8] s = tests.AncestorMap(ts, samples, ancestors) tss = s.link_ancestors() - lib_result = ts.tables.map_ancestors(samples, ancestors) + lib_result = ts.dump_tables().map_ancestors(samples, ancestors) assert tss == lib_result assert list(tss.parent) == [8, 8, 8, 8, 8] assert list(tss.child) == [0, 1, 2, 3, 4] @@ -5141,7 +5141,7 @@ class TestMutationParent: def verify_parents(self, ts): parent = tsutil.compute_mutation_parent(ts) - tables = ts.tables + tables = ts.dump_tables() assert np.array_equal(parent, tables.mutations.parent) tables.mutations.parent = np.zeros_like(tables.mutations.parent) - 1 assert np.all(tables.mutations.parent == tskit.NULL) @@ -5334,7 +5334,7 @@ class TestMutationTime: seed = 42 def verify_times(self, ts): - tables = ts.tables + tables = ts.dump_tables() # Clear out the existing mutations as they come from msprime tables.mutations.time = np.full( tables.mutations.time.shape, -1, dtype=np.float64 @@ -5397,7 +5397,7 @@ def test_example(self): ) # ts.dump_text(mutations=sys.stdout) # self.assertFalse(True) - tables = ts.tables + tables = ts.dump_tables() python_time = tsutil.compute_mutation_times(ts) assert np.allclose(python_time, tables.mutations.time, rtol=1e-15, atol=1e-15) tables.mutations.time = np.full( @@ -5775,7 +5775,7 @@ class TestSquashEdges: """ def do_squash(self, ts, compare_lib=True): - squashed = ts.tables.edges + squashed = ts.dump_tables().edges squashed.squash() if compare_lib: squashed_list = squash_edges(ts) @@ -6604,7 +6604,7 @@ def test_migration_error(self): random_seed=1, ) with pytest.raises(tskit.LibraryError): - ts.tables.keep_intervals([[0, 1]]) + ts.dump_tables().keep_intervals([[0, 1]]) def test_bad_intervals(self): tables = tskit.TableCollection(10) @@ -6619,7 +6619,7 @@ def test_one_interval(self): ts = msprime.simulate( 10, random_seed=self.random_seed, recombination_rate=2, mutation_rate=2 ) - tables = ts.tables + tables = ts.dump_tables() intervals = [(0.3, 0.7)] for simplify in (True, False): for rec_prov in (True, False): @@ -6629,7 +6629,7 @@ def test_two_intervals(self): ts = msprime.simulate( 10, random_seed=self.random_seed, recombination_rate=2, mutation_rate=2 ) - tables = ts.tables + tables = ts.dump_tables() intervals = [(0.1, 0.2), (0.8, 0.9)] for simplify in (True, False): for rec_prov in (True, False): @@ -6639,7 +6639,7 @@ def test_ten_intervals(self): ts = msprime.simulate( 10, random_seed=self.random_seed, recombination_rate=2, mutation_rate=2 ) - tables = ts.tables + tables = ts.dump_tables() intervals = [(x, x + 0.05) for x in np.arange(0.0, 1.0, 0.1)] for simplify in (True, False): for rec_prov in (True, False): @@ -6649,7 +6649,7 @@ def test_hundred_intervals(self): ts = msprime.simulate( 10, random_seed=self.random_seed, recombination_rate=2, mutation_rate=2 ) - tables = ts.tables + tables = ts.dump_tables() intervals = [(x, x + 0.005) for x in np.arange(0.0, 1.0, 0.01)] for simplify in (True, False): for rec_prov in (True, False): @@ -6659,7 +6659,7 @@ def test_regular_intervals(self): ts = msprime.simulate( 3, random_seed=1234, recombination_rate=2, mutation_rate=2 ) - tables = ts.tables + tables = ts.dump_tables() eps = 0.0125 for num_intervals in range(2, 10): breaks = np.linspace(0, ts.sequence_length, num=num_intervals) diff --git a/python/tests/test_wright_fisher.py b/python/tests/test_wright_fisher.py index 94245c1fb6..b2f58735ac 100644 --- a/python/tests/test_wright_fisher.py +++ b/python/tests/test_wright_fisher.py @@ -400,7 +400,7 @@ def test_with_mutations(self): tables.sort() ts = tables.tree_sequence() ts = tsutil.jukes_cantor(ts, 10, 0.1, seed=self.random_seed) - tables = ts.tables + tables = ts.dump_tables() assert tables.sites.num_rows > 0 assert tables.mutations.num_rows > 0 samples = np.where(tables.nodes.flags == tskit.NODE_IS_SAMPLE)[0].astype( @@ -427,7 +427,7 @@ def test_with_recurrent_mutations(self): tables.sort() ts = tables.tree_sequence() ts = tsutil.jukes_cantor(ts, 1, 10, seed=self.random_seed) - tables = ts.tables + tables = ts.dump_tables() assert tables.sites.num_rows == 1 assert tables.mutations.num_rows > 0 # before simplify diff --git a/python/tests/tsutil.py b/python/tests/tsutil.py index 242139ac02..dc501834f6 100644 --- a/python/tests/tsutil.py +++ b/python/tests/tsutil.py @@ -117,7 +117,7 @@ def insert_branch_mutations(ts, mutations_per_branch=1): def remove_mutation_times(ts): - tables = ts.tables + tables = ts.dump_tables() tables.mutations.time = np.full_like(tables.mutations.time, tskit.UNKNOWN_TIME) return tables.tree_sequence() @@ -128,7 +128,7 @@ def insert_discrete_time_mutations(ts, num_times=4, num_sites=10): positions, at only a discrete set of times (the same for all trees): at num_times times evenly spaced between 0 and the maximum time. """ - tables = ts.tables + tables = ts.dump_tables() tables.sites.clear() tables.mutations.clear() height = max(t.time(t.roots[0]) for t in ts.trees()) diff --git a/python/tskit/exceptions.py b/python/tskit/exceptions.py index ed0e7d0791..087c7cbec9 100644 --- a/python/tskit/exceptions.py +++ b/python/tskit/exceptions.py @@ -60,3 +60,11 @@ class MetadataEncodingError(TskitException): """ A metadata object was of a type that could not be encoded """ + + +class ImmutableTableError(ValueError): + """ + Raised when attempting to modify an immutable table view. + + Use TreeSequence.dump_tables() to get a mutable copy. + """ diff --git a/python/tskit/metadata.py b/python/tskit/metadata.py index d40c3df262..c447debab2 100644 --- a/python/tskit/metadata.py +++ b/python/tskit/metadata.py @@ -42,6 +42,7 @@ import tskit import tskit.exceptions as exceptions +import tskit.util as util __builtins__object__setattr__ = builtins.object.__setattr__ @@ -1041,3 +1042,106 @@ def assert_equals(self, other: MetadataProvider): raise AssertionError( f"Metadata differs: self={self.metadata} " f"other={other.metadata}" ) + + +NOTSET = object() # Sentinel for unset default values + + +class TableMetadataReader: + # Mixin for table classes that expose decoded metadata + + @property + def metadata_schema(self) -> MetadataSchema: + """ + The :class:`tskit.MetadataSchema` for this table. + """ + # This isn't as inefficient as it looks because we're using an LRU cache on + # the parse_metadata_schema function. Thus, we're really only incurring the + # cost of creating the unicode string from the low-level schema and looking + # up the functools cache. + return parse_metadata_schema(self.ll_table.metadata_schema) + + def metadata_vector(self, key, *, dtype=None, default_value=NOTSET): + """ + Returns a numpy array of metadata values obtained by extracting ``key`` + from each metadata entry, and using ``default_value`` if the key is + not present. ``key`` may be a list, in which case nested values are returned. + For instance, ``key = ["a", "x"]`` will return an array of + ``row.metadata["a"]["x"]`` values, iterated over rows in this table. + + :param str key: The name, or a list of names, of metadata entries. + :param str dtype: The dtype of the result (can usually be omitted). + :param object default_value: The value to be inserted if the metadata key + is not present. Note that for numeric columns, a default value of None + will result in a non-numeric array. The default behaviour is to raise + ``KeyError`` on missing entries. + """ + from collections.abc import Mapping + + if default_value == NOTSET: + + def getter(d, k): + return d[k] + + else: + + def getter(d, k): + return ( + d.get(k, default_value) if isinstance(d, Mapping) else default_value + ) + + if isinstance(key, list): + out = np.array( + [functools.reduce(getter, key, row.metadata) for row in self], + dtype=dtype, + ) + else: + out = np.array( + [getter(row.metadata, key) for row in self], + dtype=dtype, + ) + return out + + def _make_row(self, *args): + return self.row_class(*args, metadata_decoder=self.metadata_schema.decode_row) + + +class TableMetadataWriter(TableMetadataReader): + # Mixin for tables writing metadata + + @TableMetadataReader.metadata_schema.setter + def metadata_schema(self, schema: MetadataSchema) -> None: + if not isinstance(schema, MetadataSchema): + raise TypeError( + "Only instances of tskit.MetadataSchema can be assigned to " + f"metadata_schema, not {type(schema)}" + ) + self.ll_table.metadata_schema = repr(schema) + + def packset_metadata(self, metadatas): + """ + Packs the specified list of metadata values and updates the ``metadata`` + and ``metadata_offset`` columns. The length of the metadatas array + must be equal to the number of rows in the table. + + :param list metadatas: A list of metadata bytes values. + """ + packed, offset = util.pack_bytes(metadatas) + data = self.asdict() + data["metadata"] = packed + data["metadata_offset"] = offset + self.set_columns(**data) + + def drop_metadata(self, *, keep_schema=False): + """ + Drops all metadata in this table. By default, the schema is also cleared, + except if ``keep_schema`` is True. + + :param bool keep_schema: True if the current schema should be kept intact. + """ + data = self.asdict() + data["metadata"] = [] + data["metadata_offset"][:] = 0 + self.set_columns(**data) + if not keep_schema: + self.metadata_schema = MetadataSchema.null() diff --git a/python/tskit/tables.py b/python/tskit/tables.py index 09e22e4443..409fdcea4e 100644 --- a/python/tskit/tables.py +++ b/python/tskit/tables.py @@ -24,15 +24,13 @@ """ Tree sequence IO via the tables API. """ -import collections.abc +import collections import dataclasses import datetime import json import numbers import warnings -from collections.abc import Mapping from dataclasses import dataclass -from functools import reduce from typing import Dict from typing import Optional from typing import Union @@ -45,21 +43,11 @@ import tskit.provenance as provenance import tskit.util as util from tskit import UNKNOWN_TIME +from tskit.exceptions import ImmutableTableError dataclass_options = {"frozen": True} -# Needed for cases where `None` can be an appropriate kwarg value, -# we override the meta so that it looks good in the docs. -class NotSetMeta(type): - def __repr__(cls): - return "Not set" - - -class NOTSET(metaclass=NotSetMeta): - pass - - @metadata.lazy_decode() @dataclass(**dataclass_options) class IndividualTableRow(util.Dataclass): @@ -341,34 +329,17 @@ def keep_with_offset(keep, data, offset): class BaseTable: - """ - Superclass of high-level tables. Not intended for direct instantiation. - """ + # Base class for all tables, with only immutable methods # The list of columns in the table. Must be set by subclasses. column_names = [] - - def __init__(self, ll_table, row_class): - self.ll_table = ll_table - self.row_class = row_class + mutable = None def _check_required_args(self, **kwargs): for k, v in kwargs.items(): if v is None: raise TypeError(f"{k} is required") - @property - def num_rows(self) -> int: - return self.ll_table.num_rows - - @property - def max_rows(self) -> int: - return self.ll_table.max_rows - - @property - def max_rows_increment(self) -> int: - return self.ll_table.max_rows_increment - @property def nbytes(self) -> int: """ @@ -403,13 +374,27 @@ def equals(self, other, ignore_metadata=False): :return: True if other is equal to this table; False otherwise. :rtype: bool """ - # Note: most tables support ignore_metadata, we can override for those that don't - ret = False - if type(other) is type(self): - ret = bool( - self.ll_table.equals(other.ll_table, ignore_metadata=ignore_metadata) - ) - return ret + if self is other: + return True + + if not isinstance(other, BaseTable): + return False + + if self.mutable and type(self) is type(other): + return self._fast_equals(other, ignore_metadata=ignore_metadata) + + # Both mutable but different types + if self.mutable and other.mutable: + return False + + # Swap so that self is immutable + if self.mutable: + (self, other) = (other, self) + + if self.table_name != other.table_name: + return False + + return self._fast_equals(other, ignore_metadata=ignore_metadata) def assert_equals(self, other, *, ignore_metadata=False): """ @@ -420,13 +405,30 @@ def assert_equals(self, other, *, ignore_metadata=False): :param bool ignore_metadata: If True exclude metadata and metadata schemas from the comparison. """ - if type(other) is not type(self): + if self is other: + return + + if not isinstance(other, BaseTable): raise AssertionError(f"Types differ: self={type(self)} other={type(other)}") - # Check using the low-level method to avoid slowly going through everything - if self.equals(other, ignore_metadata=ignore_metadata): + if self.mutable and type(self) is type(other): + if not self._fast_equals(other, ignore_metadata=ignore_metadata): + self._assert_equals(other, ignore_metadata=ignore_metadata) return + if self.mutable and other.mutable: + raise AssertionError(f"Types differ: self={type(self)} other={type(other)}") + + if self.mutable: + (self, other) = (other, self) + + if self.table_name != other.table_name: + raise AssertionError(f"Types differ: self={type(self)} other={type(other)}") + + if not self._fast_equals(other, ignore_metadata=ignore_metadata): + self._assert_equals(other, ignore_metadata=ignore_metadata) + + def _assert_equals(self, other, *, ignore_metadata=False): if not ignore_metadata and self.metadata_schema != other.metadata_schema: raise AssertionError( f"{type(self).__name__} metadata schemas differ: " @@ -466,6 +468,8 @@ def assert_equals(self, other, *, ignore_metadata=False): # differ when the decoded schema is the same if ( not ignore_metadata + and hasattr(self, "ll_table") + and hasattr(other, "ll_table") and self.ll_table.metadata_schema != other.ll_table.metadata_schema and self.metadata_schema == other.metadata_schema ): @@ -482,6 +486,190 @@ def __eq__(self, other): def __len__(self): return self.num_rows + def asdict(self): + """ + Returns a dictionary mapping the names of the columns in this table + to the corresponding numpy arrays. + """ + ret = {col: getattr(self, col) for col in self.column_names} + # Not all tables have metadata + try: + ret["metadata_schema"] = repr(self.metadata_schema) + except AttributeError: + pass + return ret + + def __str__(self): + headers, rows = self._text_header_and_rows( + limit=tskit._print_options["max_lines"] + ) + return util.unicode_table(rows, header=headers, row_separator=False) + + def _repr_html_(self): + """ + Called e.g. by jupyter notebooks to render tables + """ + headers, rows = self._text_header_and_rows( + limit=tskit._print_options["max_lines"] + ) + return util.html_table(rows, header=headers) + + def _columns_all_integer(self, *colnames): + # For displaying floating point values without loads of decimal places + return all( + np.all(getattr(self, col) == np.floor(getattr(self, col))) + for col in colnames + ) + + def _text_header_and_rows(self, limit=None): + """ + Returns headers and rows for table display. + """ + # Generate headers: "id" + column names (excluding offset columns) + display_columns = [ + col for col in self.column_names if not col.endswith("_offset") + ] + headers = ("id",) + tuple(display_columns) + + rows = [] + row_indexes = util.truncate_rows(self.num_rows, limit) + + float_columns = {} + for col in display_columns: + arr = getattr(self, col) + if np.issubdtype(arr.dtype, np.floating): + float_columns[col] = 0 if self._columns_all_integer(col) else 8 + + for j in row_indexes: + if j == -1: + rows.append(f"__skipped__{self.num_rows - limit}") + else: + row = self[j] + formatted_values = [f"{j:,}"] # ID column + for col in display_columns: + value = getattr(row, col) + if col == "metadata": + formatted_values.append(util.render_metadata(value)) + elif col in ["location", "parents"]: + # Array columns - join with commas + if col == "parents": + formatted_values.append( + ", ".join([f"{p:,}" for p in value]) + ) + else: + formatted_values.append(", ".join(map(str, value))) + elif col in float_columns: + dp = float_columns[col] + formatted_values.append(f"{value:,.{dp}f}") + elif isinstance(value, (int, np.integer)): + formatted_values.append(f"{value:,}") + else: + formatted_values.append(str(value)) + rows.append(formatted_values) + return headers, rows + + +def _assert_table_collections_equal(tc1, tc2, **kwargs): + # This is shared between TableCollection and ImmutableTableCollection, + # could go in a base class, but there's not much else in common + ignore_metadata = kwargs.get("ignore_metadata", False) + ignore_ts_metadata = kwargs.get("ignore_ts_metadata", False) + ignore_provenance = kwargs.get("ignore_provenance", False) + ignore_timestamps = kwargs.get("ignore_timestamps", False) + ignore_reference_sequence = kwargs.get("ignore_reference_sequence", False) + ignore_tables = kwargs.get("ignore_tables", False) + + if not (ignore_metadata or ignore_ts_metadata): + if hasattr(tc1, "metadata_schema") and hasattr(tc2, "metadata_schema"): + if tc1.metadata_schema != tc2.metadata_schema: + raise AssertionError( + f"Metadata schemas differ: self={tc1.metadata_schema} " + f"other={tc2.metadata_schema}" + ) + if hasattr(tc1, "metadata") and hasattr(tc2, "metadata"): + if tc1.metadata != tc2.metadata: + raise AssertionError( + f"Metadata differs: self={tc1.metadata} other={tc2.metadata}" + ) + + if not ignore_reference_sequence: + tc1.reference_sequence.assert_equals( + tc2.reference_sequence, ignore_metadata=ignore_metadata + ) + + if tc1.time_units != tc2.time_units: + raise AssertionError( + f"Time units differs: self={tc1.time_units} other={tc2.time_units}" + ) + + if tc1.sequence_length != tc2.sequence_length: + raise AssertionError( + f"Sequence Length differs: self={tc1.sequence_length} " + f"other={tc2.sequence_length}" + ) + + if not ignore_tables: + for table_name, table in tc1.table_name_map.items(): + if table_name == "provenances": + continue + other_table = getattr(tc2, table_name) + if isinstance(table, ImmutableBaseTable): + table.assert_equals(other_table, ignore_metadata=ignore_metadata) + elif isinstance(other_table, ImmutableBaseTable): + other_table.assert_equals(table, ignore_metadata=ignore_metadata) + else: + table.assert_equals(other_table, ignore_metadata=ignore_metadata) + + if not ignore_provenance and not ignore_tables: + prov1 = tc1.provenances + prov2 = tc2.provenances + if isinstance(prov1, ImmutableProvenanceTable): + prov1.assert_equals(prov2, ignore_timestamps=ignore_timestamps) + elif isinstance(prov2, ImmutableProvenanceTable): + prov2.assert_equals(prov1, ignore_timestamps=ignore_timestamps) + else: + prov1.assert_equals(prov2, ignore_timestamps=ignore_timestamps) + + if ( + not ignore_metadata + and hasattr(tc1, "_ll_object") + and hasattr(tc2, "_ll_object") + and hasattr(tc1._ll_object, "metadata_schema") + and hasattr(tc2._ll_object, "metadata_schema") + and tc1._ll_object.metadata_schema != tc2._ll_object.metadata_schema + and tc1.metadata_schema == tc2.metadata_schema + ): + # Schemas differ in byte representation but are equivalent when decoded + return + + # If we reach here, all comparisons matched; treat collections as equal. + return + + +class MutableBaseTable(BaseTable): + # Abstract base class for mutable tables that use the low-level table implementation. + + mutable = True + + def __init__(self, ll_table, row_class): + self.ll_table = ll_table + self.row_class = row_class + + def _fast_equals(self, other, *, ignore_metadata=False): + return self.ll_table.equals(other.ll_table, ignore_metadata=ignore_metadata) + + @property + def num_rows(self) -> int: + return self.ll_table.num_rows + + @property + def max_rows(self) -> int: + return self.ll_table.max_rows + + @property + def max_rows_increment(self) -> int: + return self.ll_table.max_rows_increment + def __getattr__(self, name): if name in self.column_names: return getattr(self.ll_table, name) @@ -499,7 +687,13 @@ def __setattr__(self, name, value): object.__setattr__(self, name, value) def _make_row(self, *args): - return self.row_class(*args) + try: + return self.row_class( + *args, metadata_decoder=self.metadata_schema.decode_row + ) + except AttributeError: + # No metadata schema + return self.row_class(*args) def __getitem__(self, index): """ @@ -662,19 +856,6 @@ def copy(self): copy.set_columns(**self.asdict()) return copy - def asdict(self): - """ - Returns a dictionary mapping the names of the columns in this table - to the corresponding numpy arrays. - """ - ret = {col: getattr(self, col) for col in self.column_names} - # Not all tables have metadata - try: - ret["metadata_schema"] = repr(self.metadata_schema) - except AttributeError: - pass - return ret - def set_columns(self, **kwargs): """ Sets the values for each column in this :class:`Table` using values @@ -682,143 +863,199 @@ def set_columns(self, **kwargs): """ raise NotImplementedError() - def __str__(self): - headers, rows = self._text_header_and_rows( - limit=tskit._print_options["max_lines"] - ) - return util.unicode_table(rows, header=headers, row_separator=False) - - def _repr_html_(self): - """ - Called e.g. by jupyter notebooks to render tables - """ - headers, rows = self._text_header_and_rows( - limit=tskit._print_options["max_lines"] - ) - return util.html_table(rows, header=headers) - - def _columns_all_integer(self, *colnames): - # For displaying floating point values without loads of decimal places - return all( - np.all(getattr(self, col) == np.floor(getattr(self, col))) - for col in colnames - ) +class ImmutableBaseTable(BaseTable): + # List of all mutation methods that should give a nice error + _MUTATION_METHODS = { + "add_row", + "clear", + "set_columns", + "truncate", + "replace_with", + "append_columns", + "keep_rows", + "append", + "reset", + # Table-specific packset methods + "packset_location", + "packset_parents", + "packset_ancestral_state", + "packset_derived_state", + "packset_record", + "packset_timestamp", + # EdgeTable-specific methods + "squash", + } + + # These are set by subclasses. + table_name = None + mutable_class = None + mutable = False + + # String columns use numpy1-style raw data and offset arrays directly + + def __init__(self, ll_tree_sequence, row_indices=None): + self._llts = ll_tree_sequence + # Subsets of immutable tables are done with views + # based on the row indices here + self._row_indices = row_indices + + if row_indices is None: + self.num_rows = getattr(self._llts, f"get_num_{self.table_name}")() + + # Optimization: directly assign array properties when no row_indices + # This avoids the `if self._row_indices is None` check on every + # property access + for column_name in self.column_names: + if not column_name.endswith("_schema"): + # Get array from low-level tree sequence + array = getattr(self._llts, f"{self.table_name}_{column_name}") + if column_name == "metadata": + array = np.asarray(array) + if array.dtype == np.uint8: + array = array.view(np.int8) + setattr(self, column_name, array) + else: + self.num_rows = len(row_indices) -class MetadataTable(BaseTable): - """ - Base class for tables that have a metadata column. - """ + def copy(self): + # Get a mutatable copy of this table + mutable_table = self.mutable_class() + column_data = self.asdict() + mutable_table.set_columns(**column_data) + return mutable_table - # TODO this class has some overlap with the MetadataProvider base class - # and also the TreeSequence class. These all have methods to deal with - # schemas and essentially do the same thing (provide a facade for the - # low-level get/set metadata schemas functionality). We should refactor - # this so we're only doing it in one place. - # https://github.com/tskit-dev/tskit/issues/1957 - def __init__(self, ll_table, row_class): - super().__init__(ll_table, row_class) + def __len__(self): + return self.num_rows - def _make_row(self, *args): - return self.row_class(*args, metadata_decoder=self.metadata_schema.decode_row) + def _fast_equals(self, other, *, ignore_metadata=False): + if self.num_rows != other.num_rows: + return False + if not ignore_metadata: + if self.metadata_schema != other.metadata_schema: + return False + for column_name in self.column_names: + if ignore_metadata and column_name.startswith("metadata"): + continue + if not np.array_equal( + getattr(self, column_name), getattr(other, column_name), equal_nan=True + ): + return False - def packset_metadata(self, metadatas): - """ - Packs the specified list of metadata values and updates the ``metadata`` - and ``metadata_offset`` columns. The length of the metadatas array - must be equal to the number of rows in the table. + return True - :param list metadatas: A list of metadata bytes values. - """ - packed, offset = util.pack_bytes(metadatas) - d = self.asdict() - d["metadata"] = packed - d["metadata_offset"] = offset - self.set_columns(**d) + def __getattr__(self, name): + # Handle attribute access. This method is only called when an attribute + # is not found through normal lookup. - @property - def metadata_schema(self) -> metadata.MetadataSchema: - """ - The :class:`tskit.MetadataSchema` for this table. - """ - # This isn't as inefficient as it looks because we're using an LRU cache on - # the parse_metadata_schema function. Thus, we're really only incurring the - # cost of creating the unicode string from the low-level schema and looking - # up the functools cache. - return metadata.parse_metadata_schema(self.ll_table.metadata_schema) + # If we are a subset with row_indices, we need to provide indexed access + # to array properties. + # we don't get to this code if we're a full table as we set the arrays + # directly in __init__. + if name in self.column_names: + # Get the full array and index it + full_array = getattr(self._llts, f"{self.table_name}_{name}") + # We have to view as int8 here to maintain compatibility with + # the LWT, and equality to data from low level tables. + # We can't update the LWT now as it would break + # backwards compatibility hard. + # https://github.com/tskit-dev/tskit/issues/3284 + if name == "metadata": + full_array = np.asarray(full_array) + if full_array.dtype == np.uint8: + full_array = full_array.view(np.int8) + + # Handle ragged arrays specially as we have to rebuild to give a view. + offset_column_name = f"{name}_offset" + if offset_column_name in self.column_names: + offset_array = getattr( + self._llts, f"{self.table_name}_{offset_column_name}" + ) + indexed_offsets = offset_array[self._row_indices] + next_offsets = offset_array[self._row_indices + 1] + return np.concatenate( + [ + full_array[start:end] + for start, end in zip(indexed_offsets, next_offsets) + ], + dtype=full_array.dtype, + ) + elif name.endswith("_offset"): + # Recompute offsets for the indexed data + offset_array = full_array + indexed_offsets = offset_array[self._row_indices] + next_offsets = offset_array[self._row_indices + 1] + lengths = next_offsets - indexed_offsets + return np.concatenate([[0], np.cumsum(lengths)]).astype( + offset_array.dtype + ) + else: + return full_array[self._row_indices] - @metadata_schema.setter - def metadata_schema(self, schema: metadata.MetadataSchema) -> None: - if not isinstance(schema, metadata.MetadataSchema): - raise TypeError( - "Only instances of tskit.MetadataSchema can be assigned to " - f"metadata_schema, not {type(schema)}" + if name in self._MUTATION_METHODS: + raise ImmutableTableError( + f"Cannot call {name}() on immutable {self.table_name} table. " + f"Use TreeSequence.dump_tables() for mutable copy." ) - self.ll_table.metadata_schema = repr(schema) - - def metadata_vector(self, key, *, dtype=None, default_value=NOTSET): - """ - Returns a numpy array of metadata values obtained by extracting ``key`` - from each metadata entry, and using ``default_value`` if the key is - not present. ``key`` may be a list, in which case nested values are returned. - For instance, ``key = ["a", "x"]`` will return an array of - ``row.metadata["a"]["x"]`` values, iterated over rows in this table. - - :param str key: The name, or a list of names, of metadata entries. - :param str dtype: The dtype of the result (can usually be omitted). - :param object default_value: The value to be inserted if the metadata key - is not present. Note that for numeric columns, a default value of None - will result in a non-numeric array. The default behaviour is to raise - ``KeyError`` on missing entries. - """ - if default_value == NOTSET: + # If it's not a blocked method or column, delegate to parent classes + # This allows metadata mixins to handle metadata_schema and other attributes + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{name}'" + ) - def getter(d, k): - return d[k] + def __getitem__(self, index): + if isinstance(index, numbers.Integral): + # A single row + if index < 0: + index += self.num_rows + if index < 0 or index >= self.num_rows: + raise IndexError("Index out of bounds") + if self._row_indices is None: + actual_index = index + else: + actual_index = self._row_indices[index] + return self._create_row_object(actual_index) else: + # Create a view from a slice or fancy index + if self._row_indices is None: + current_indices = np.arange(self.num_rows) + else: + current_indices = np.asarray(self._row_indices) + if isinstance(index, slice): + new_indices = current_indices[index] + else: + index = np.asarray(index) + if index.dtype == np.bool_: + if len(index) != len(current_indices): + raise IndexError("Boolean index must be same length as table") + new_indices = current_indices[index] + else: + new_indices = current_indices[index] + return self.__class__(self._llts, new_indices) - def getter(d, k): - return ( - d.get(k, default_value) if isinstance(d, Mapping) else default_value - ) - if isinstance(key, list): - out = np.array( - [ - reduce( - getter, - key, - row.metadata, - ) - for row in self - ], - dtype=dtype, - ) - else: - out = np.array( - [getter(row.metadata, key) for row in self], - dtype=dtype, - ) - return out +class MutableMetadataTable(MutableBaseTable, metadata.TableMetadataWriter): + pass - def drop_metadata(self, *, keep_schema=False): - """ - Drops all metadata in this table. By default, the schema is also cleared, - except if ``keep_schema`` is True. - :param bool keep_schema: True if the current schema should be kept intact. +class ImmutableMetadataTable(ImmutableBaseTable, metadata.TableMetadataReader): + @property + def metadata_schema(self): """ - data = self.asdict() - data["metadata"] = [] - data["metadata_offset"][:] = 0 - self.set_columns(**data) - if not keep_schema: - self.metadata_schema = metadata.MetadataSchema.null() + The :class:`tskit.MetadataSchema` for this table. + Overrides the base implementation to access schema from tree sequence. + """ + # Get table metadata schemas from tree sequence + schemas = self._llts.get_table_metadata_schemas() + # Use singular form for table name (individuals -> individual, etc.) + table_name_singular = self.table_name.rstrip("s") + schema_str = getattr(schemas, table_name_singular) + return metadata.parse_metadata_schema(schema_str) -class IndividualTable(MetadataTable): +class IndividualTable(MutableMetadataTable): """ A table defining the individuals in a tree sequence. Note that although each Individual has associated nodes, reference to these is not stored in @@ -852,6 +1089,7 @@ class IndividualTable(MetadataTable): :vartype metadata_schema: tskit.MetadataSchema """ + table_name = "individuals" column_names = [ "flags", "location", @@ -867,28 +1105,6 @@ def __init__(self, max_rows_increment=0, ll_table=None): ll_table = _tskit.IndividualTable(max_rows_increment=max_rows_increment) super().__init__(ll_table, IndividualTableRow) - def _text_header_and_rows(self, limit=None): - headers = ("id", "flags", "location", "parents", "metadata") - rows = [] - row_indexes = util.truncate_rows(self.num_rows, limit) - for j in row_indexes: - if j == -1: - rows.append(f"__skipped__{self.num_rows - limit}") - else: - row = self[j] - location_str = ", ".join(map(str, row.location)) - parents_str = ", ".join([f"{p:,}" for p in row.parents]) - rows.append( - "{:,}\t{}\t{}\t{}\t{}".format( - j, - row.flags, - location_str, - parents_str, - util.render_metadata(row.metadata), - ).split("\t") - ) - return headers, rows - def add_row(self, flags=0, location=None, parents=None, metadata=None): """ Adds a new row to this :class:`IndividualTable` and returns the ID of the @@ -1098,7 +1314,7 @@ def keep_rows(self, keep): return super().keep_rows(keep) -class NodeTable(MetadataTable): +class NodeTable(MutableMetadataTable): """ A table defining the nodes in a tree sequence. See the :ref:`definitions ` for details on the columns @@ -1126,6 +1342,7 @@ class NodeTable(MetadataTable): :vartype metadata_schema: tskit.MetadataSchema """ + table_name = "nodes" column_names = [ "time", "flags", @@ -1140,29 +1357,6 @@ def __init__(self, max_rows_increment=0, ll_table=None): ll_table = _tskit.NodeTable(max_rows_increment=max_rows_increment) super().__init__(ll_table, NodeTableRow) - def _text_header_and_rows(self, limit=None): - headers = ("id", "flags", "population", "individual", "time", "metadata") - rows = [] - row_indexes = util.truncate_rows(self.num_rows, limit) - decimal_places_times = 0 if self._columns_all_integer("time") else 8 - for j in row_indexes: - row = self[j] - if j == -1: - rows.append(f"__skipped__{self.num_rows - limit}") - else: - rows.append( - "{:,}\t{}\t{:,}\t{:,}\t{:,.{dp}f}\t{}".format( - j, - row.flags, - row.population, - row.individual, - row.time, - util.render_metadata(row.metadata), - dp=decimal_places_times, - ).split("\t") - ) - return headers, rows - def add_row(self, flags=0, time=0, population=-1, individual=-1, metadata=None): """ Adds a new row to this :class:`NodeTable` and returns the ID of the @@ -1292,7 +1486,7 @@ def append_columns( ) -class EdgeTable(MetadataTable): +class EdgeTable(MutableMetadataTable): """ A table defining the edges in a tree sequence. See the :ref:`definitions ` for details on the columns @@ -1320,6 +1514,7 @@ class EdgeTable(MetadataTable): :vartype metadata_schema: tskit.MetadataSchema """ + table_name = "edges" column_names = [ "left", "right", @@ -1334,29 +1529,6 @@ def __init__(self, max_rows_increment=0, ll_table=None): ll_table = _tskit.EdgeTable(max_rows_increment=max_rows_increment) super().__init__(ll_table, EdgeTableRow) - def _text_header_and_rows(self, limit=None): - headers = ("id", "left", "right", "parent", "child", "metadata") - rows = [] - row_indexes = util.truncate_rows(self.num_rows, limit) - decimal_places = 0 if self._columns_all_integer("left", "right") else 8 - for j in row_indexes: - if j == -1: - rows.append(f"__skipped__{self.num_rows - limit}") - else: - row = self[j] - rows.append( - "{:,}\t{:,.{dp}f}\t{:,.{dp}f}\t{:,}\t{:,}\t{}".format( - j, - row.left, - row.right, - row.parent, - row.child, - util.render_metadata(row.metadata), - dp=decimal_places, - ).split("\t") - ) - return headers, rows - def add_row(self, left, right, parent, child, metadata=None): """ Adds a new row to this :class:`EdgeTable` and returns the ID of the @@ -1500,7 +1672,7 @@ def squash(self): self.ll_table.squash() -class MigrationTable(MetadataTable): +class MigrationTable(MutableMetadataTable): """ A table defining the migrations in a tree sequence. See the :ref:`definitions ` for details on the columns @@ -1533,6 +1705,7 @@ class MigrationTable(MetadataTable): :vartype metadata_schema: tskit.MetadataSchema """ + table_name = "migrations" column_names = [ "left", "right", @@ -1549,35 +1722,6 @@ def __init__(self, max_rows_increment=0, ll_table=None): ll_table = _tskit.MigrationTable(max_rows_increment=max_rows_increment) super().__init__(ll_table, MigrationTableRow) - def _text_header_and_rows(self, limit=None): - headers = ("id", "left", "right", "node", "source", "dest", "time", "metadata") - rows = [] - row_indexes = util.truncate_rows(self.num_rows, limit) - decimal_places_coords = 0 if self._columns_all_integer("left", "right") else 8 - decimal_places_times = 0 if self._columns_all_integer("time") else 8 - for j in row_indexes: - if j == -1: - rows.append(f"__skipped__{self.num_rows - limit}") - else: - row = self[j] - rows.append( - "{:,}\t{:,.{dp_c}f}\t{:,.{dp_c}f}\t{:,}\t{:,}\t{:,}\t{:,.{dp_t}f}\t{}".format( - j, - row.left, - row.right, - row.node, - row.source, - row.dest, - row.time, - util.render_metadata(row.metadata), - dp_c=decimal_places_coords, - dp_t=decimal_places_times, - ).split( - "\t" - ) - ) - return headers, rows - def add_row(self, left, right, node, source, dest, time, metadata=None): """ Adds a new row to this :class:`MigrationTable` and returns the ID of the @@ -1722,7 +1866,7 @@ def append_columns( ) -class SiteTable(MetadataTable): +class SiteTable(MutableMetadataTable): """ A table defining the sites in a tree sequence. See the :ref:`definitions ` for details on the columns @@ -1751,6 +1895,7 @@ class SiteTable(MetadataTable): :vartype metadata_schema: tskit.MetadataSchema """ + table_name = "sites" column_names = [ "position", "ancestral_state", @@ -1764,27 +1909,6 @@ def __init__(self, max_rows_increment=0, ll_table=None): ll_table = _tskit.SiteTable(max_rows_increment=max_rows_increment) super().__init__(ll_table, SiteTableRow) - def _text_header_and_rows(self, limit=None): - headers = ("id", "position", "ancestral_state", "metadata") - rows = [] - row_indexes = util.truncate_rows(self.num_rows, limit) - decimal_places = 0 if self._columns_all_integer("position") else 8 - for j in row_indexes: - if j == -1: - rows.append(f"__skipped__{self.num_rows - limit}") - else: - row = self[j] - rows.append( - "{:,}\t{:,.{dp}f}\t{}\t{}".format( - j, - row.position, - row.ancestral_state, - util.render_metadata(row.metadata), - dp=decimal_places, - ).split("\t") - ) - return headers, rows - def add_row(self, position, ancestral_state, metadata=None): """ Adds a new row to this :class:`SiteTable` and returns the ID of the @@ -1929,7 +2053,7 @@ def packset_ancestral_state(self, ancestral_states): self.set_columns(**d) -class MutationTable(MetadataTable): +class MutationTable(MutableMetadataTable): """ A table defining the mutations in a tree sequence. See the :ref:`definitions ` for details on the columns @@ -1964,6 +2088,7 @@ class MutationTable(MetadataTable): :vartype metadata_schema: tskit.MetadataSchema """ + table_name = "mutations" column_names = [ "site", "node", @@ -1980,31 +2105,6 @@ def __init__(self, max_rows_increment=0, ll_table=None): ll_table = _tskit.MutationTable(max_rows_increment=max_rows_increment) super().__init__(ll_table, MutationTableRow) - def _text_header_and_rows(self, limit=None): - headers = ("id", "site", "node", "time", "derived_state", "parent", "metadata") - rows = [] - row_indexes = util.truncate_rows(self.num_rows, limit) - # Currently mutations do not have discretised times: this for consistency - decimal_places_times = 0 if self._columns_all_integer("time") else 8 - for j in row_indexes: - if j == -1: - rows.append(f"__skipped__{self.num_rows - limit}") - else: - row = self[j] - rows.append( - "{:,}\t{:,}\t{:,}\t{:,.{dp}f}\t{}\t{:,}\t{}".format( - j, - row.site, - row.node, - row.time, - row.derived_state, - row.parent, - util.render_metadata(row.metadata), - dp=decimal_places_times, - ).split("\t") - ) - return headers, rows - def add_row(self, site, node, derived_state, parent=-1, metadata=None, time=None): """ Adds a new row to this :class:`MutationTable` and returns the ID of the @@ -2215,7 +2315,7 @@ def keep_rows(self, keep): return super().keep_rows(keep) -class PopulationTable(MetadataTable): +class PopulationTable(MutableMetadataTable): """ A table defining the populations referred to in a tree sequence. The PopulationTable stores metadata for populations that may be referred to @@ -2236,6 +2336,7 @@ class PopulationTable(MetadataTable): :vartype metadata_schema: tskit.MetadataSchema """ + table_name = "populations" column_names = ["metadata", "metadata_offset"] def __init__(self, max_rows_increment=0, ll_table=None): @@ -2261,17 +2362,6 @@ def add_row(self, metadata=None): metadata = self.metadata_schema.validate_and_encode_row(metadata) return self.ll_table.add_row(metadata=metadata) - def _text_header_and_rows(self, limit=None): - headers = ("id", "metadata") - rows = [] - row_indexes = util.truncate_rows(self.num_rows, limit) - for j in row_indexes: - if j == -1: - rows.append(f"__skipped__{self.num_rows - limit}") - else: - rows.append((str(j), util.render_metadata(self[j].metadata, length=70))) - return headers, rows - def set_columns(self, metadata=None, metadata_offset=None, metadata_schema=None): """ Sets the values for each column in this :class:`PopulationTable` using the @@ -2326,7 +2416,7 @@ def append_columns(self, metadata=None, metadata_offset=None): ) -class ProvenanceTable(BaseTable): +class ProvenanceTable(MutableBaseTable): """ A table recording the provenance (i.e., history) of this table, so that the origin of the underlying data and sequence of subsequent operations can be @@ -2351,6 +2441,7 @@ class ProvenanceTable(BaseTable): :vartype timestamp_offset: numpy.ndarray, dtype=np.uint32 """ + table_name = "provenances" column_names = ["record", "record_offset", "timestamp", "timestamp_offset"] def __init__(self, max_rows_increment=0, ll_table=None): @@ -2512,24 +2603,6 @@ def append_columns( ) ) - def _text_header_and_rows(self, limit=None): - headers = ("id", "timestamp", "record") - rows = [] - row_indexes = util.truncate_rows(self.num_rows, limit) - for j in row_indexes: - if j == -1: - rows.append(f"__skipped__{self.num_rows - limit}") - else: - row = self[j] - rows.append( - ( - str(j), - str(row.timestamp), - util.truncate_string_end(str(row.record), length=60), - ) - ) - return headers, rows - def packset_record(self, records): """ Packs the specified list of record values and updates the @@ -3193,6 +3266,9 @@ def equals( :return: True if other is equal to this table collection; False otherwise. :rtype: bool """ + if self is other: + return True + ret = False if type(other) is type(self): ret = bool( @@ -3206,6 +3282,16 @@ def equals( ignore_reference_sequence=bool(ignore_reference_sequence), ) ) + elif hasattr(other, "_llts") and not hasattr(other, "_ll_tables"): + ret = other.equals( + self, + ignore_metadata=ignore_metadata, + ignore_ts_metadata=ignore_ts_metadata, + ignore_provenance=ignore_provenance, + ignore_timestamps=ignore_timestamps, + ignore_tables=ignore_tables, + ignore_reference_sequence=ignore_reference_sequence, + ) return ret def assert_equals( @@ -3223,7 +3309,8 @@ def assert_equals( Raise an AssertionError for the first found difference between this and another table collection. Note that table indexes are not checked. - :param TableCollection other: Another table collection. + :param other: Another table collection (TableCollection or + ImmutableTableCollection). :param bool ignore_metadata: If True *all* metadata and metadata schemas will be excluded from the comparison. This includes the top-level tree sequence and constituent table metadata (default=False). @@ -3240,11 +3327,8 @@ def assert_equals( :param bool ignore_reference_sequence: If True the reference sequence is not included in the comparison. """ - if type(other) is not type(self): - raise AssertionError(f"Types differ: self={type(self)} other={type(other)}") - # Check using the low-level method to avoid slowly going through everything - if self.equals( + if type(other) is type(self) and self.equals( other, ignore_metadata=ignore_metadata, ignore_ts_metadata=ignore_ts_metadata, @@ -3255,50 +3339,20 @@ def assert_equals( ): return - if not (ignore_metadata or ignore_ts_metadata): - super().assert_equals(other) - - if not ignore_reference_sequence: - self.reference_sequence.assert_equals( - other.reference_sequence, ignore_metadata=ignore_metadata - ) - - if self.time_units != other.time_units: - raise AssertionError( - f"Time units differs: self={self.time_units} " - f"other={other.time_units}" - ) - - if self.sequence_length != other.sequence_length: - raise AssertionError( - f"Sequence Length" - f" differs: self={self.sequence_length} other={other.sequence_length}" - ) - - for table_name, table in self.table_name_map.items(): - if table_name != "provenances": - table.assert_equals( - getattr(other, table_name), ignore_metadata=ignore_metadata - ) - - if not ignore_provenance: - self.provenances.assert_equals( - other.provenances, ignore_timestamps=ignore_timestamps - ) - - # We can reach this point if the metadata schemas byte representations - # differ when the decoded schema is the same - if ( - not ignore_metadata - and self._ll_object.metadata_schema != other._ll_object.metadata_schema - and self.metadata_schema == other.metadata_schema - ): - return + valid_types = (TableCollection, ImmutableTableCollection) + if not isinstance(other, valid_types): + raise AssertionError(f"Types differ: self={type(self)} other={type(other)}") - raise AssertionError( - "TableCollections differ in an undetected way - " - "this is a bug, please report an issue on github" - ) # pragma: no cover + _assert_table_collections_equal( + self, + other, + ignore_metadata=ignore_metadata, + ignore_ts_metadata=ignore_ts_metadata, + ignore_provenance=ignore_provenance, + ignore_timestamps=ignore_timestamps, + ignore_tables=ignore_tables, + ignore_reference_sequence=ignore_reference_sequence, + ) def __eq__(self, other): return self.equals(other) @@ -4306,3 +4360,494 @@ def ibd_segments( store_pairs=store_pairs, store_segments=store_segments, ) + + +class ImmutableNodeTable(ImmutableMetadataTable): + table_name = "nodes" + mutable_class = NodeTable + + column_names = [ + "time", + "flags", + "population", + "individual", + "metadata", + "metadata_offset", + ] + + def _create_row_object(self, index): + metadata_bytes = self.metadata[ + self.metadata_offset[index] : self.metadata_offset[index + 1] + ].tobytes() + return NodeTableRow( + flags=self.flags[index], + time=self.time[index], + population=self.population[index], + individual=self.individual[index], + metadata=metadata_bytes, + metadata_decoder=self.metadata_schema.decode_row, + ) + + +class ImmutableIndividualTable(ImmutableMetadataTable): + table_name = "individuals" + mutable_class = IndividualTable + + column_names = [ + "flags", + "location", + "location_offset", + "parents", + "parents_offset", + "metadata", + "metadata_offset", + ] + + def _create_row_object(self, index): + location_array = self.location[ + self.location_offset[index] : self.location_offset[index + 1] + ] + parents_array = self.parents[ + self.parents_offset[index] : self.parents_offset[index + 1] + ] + metadata_bytes = self.metadata[ + self.metadata_offset[index] : self.metadata_offset[index + 1] + ].tobytes() + return IndividualTableRow( + flags=self.flags[index], + location=location_array, + parents=parents_array, + metadata=metadata_bytes, + metadata_decoder=self.metadata_schema.decode_row, + ) + + +class ImmutableEdgeTable(ImmutableMetadataTable): + table_name = "edges" + mutable_class = EdgeTable + + column_names = [ + "left", + "right", + "parent", + "child", + "metadata", + "metadata_offset", + ] + + def _create_row_object(self, index): + metadata_bytes = self.metadata[ + self.metadata_offset[index] : self.metadata_offset[index + 1] + ].tobytes() + return EdgeTableRow( + left=self.left[index], + right=self.right[index], + parent=self.parent[index], + child=self.child[index], + metadata=metadata_bytes, + metadata_decoder=self.metadata_schema.decode_row, + ) + + +class ImmutableMigrationTable(ImmutableMetadataTable): + table_name = "migrations" + mutable_class = MigrationTable + + column_names = [ + "left", + "right", + "node", + "source", + "dest", + "time", + "metadata", + "metadata_offset", + ] + + def _create_row_object(self, index): + metadata_bytes = self.metadata[ + self.metadata_offset[index] : self.metadata_offset[index + 1] + ].tobytes() + return MigrationTableRow( + left=self.left[index], + right=self.right[index], + node=self.node[index], + source=self.source[index], + dest=self.dest[index], + time=self.time[index], + metadata=metadata_bytes, + metadata_decoder=self.metadata_schema.decode_row, + ) + + +class ImmutableSiteTable(ImmutableMetadataTable): + table_name = "sites" + mutable_class = SiteTable + + column_names = [ + "position", + "ancestral_state", + "ancestral_state_offset", + "metadata", + "metadata_offset", + ] + + def __init__(self, ll_tree_sequence, row_indices=None): + super().__init__(ll_tree_sequence, row_indices=row_indices) + + def _create_row_object(self, index): + ancestral_state_str = self._llts.sites_ancestral_state_string[index] + metadata_bytes = self.metadata[ + self.metadata_offset[index] : self.metadata_offset[index + 1] + ].tobytes() + + return SiteTableRow( + position=self.position[index], + ancestral_state=ancestral_state_str, + metadata=metadata_bytes, + metadata_decoder=self.metadata_schema.decode_row, + ) + + +class ImmutableMutationTable(ImmutableMetadataTable): + table_name = "mutations" + mutable_class = MutationTable + + column_names = [ + "site", + "node", + "time", + "derived_state", + "derived_state_offset", + "parent", + "metadata", + "metadata_offset", + ] + + def __init__(self, ll_tree_sequence, row_indices=None): + super().__init__(ll_tree_sequence, row_indices=row_indices) + + def _create_row_object(self, index): + derived_state_str = self._llts.mutations_derived_state_string[index] + metadata_bytes = self.metadata[ + self.metadata_offset[index] : self.metadata_offset[index + 1] + ].tobytes() + return MutationTableRow( + site=self.site[index], + node=self.node[index], + time=self.time[index], + derived_state=derived_state_str, + parent=self.parent[index], + metadata=metadata_bytes, + metadata_decoder=self.metadata_schema.decode_row, + ) + + +class ImmutablePopulationTable(ImmutableMetadataTable): + table_name = "populations" + mutable_class = PopulationTable + + column_names = ["metadata", "metadata_offset"] + + def _create_row_object(self, index): + metadata_bytes = self.metadata[ + self.metadata_offset[index] : self.metadata_offset[index + 1] + ].tobytes() + return PopulationTableRow( + metadata=metadata_bytes, + metadata_decoder=self.metadata_schema.decode_row, + ) + + +class ImmutableProvenanceTable(ImmutableBaseTable): + table_name = "provenances" + mutable_class = ProvenanceTable + + column_names = [ + "record", + "record_offset", + "timestamp", + "timestamp_offset", + ] + + def __init__(self, ll_tree_sequence, row_indices=None): + super().__init__(ll_tree_sequence, row_indices=row_indices) + + def equals(self, other, ignore_timestamps=False): + try: + self.assert_equals(other, ignore_timestamps=ignore_timestamps) + return True + except AssertionError: + return False + + def assert_equals(self, other, *, ignore_timestamps=False): + if not isinstance(other, BaseTable): + raise AssertionError(f"Types differ: self={type(self)} other={type(other)}") + table_name_other = getattr(other, "table_name", None) + if self.table_name != table_name_other: + raise AssertionError(f"Types differ: self={type(self)} other={type(other)}") + if self.num_rows != other.num_rows: + raise AssertionError( + f"ProvenanceTable number of rows differ: " + f"self={self.num_rows} other={other.num_rows}" + ) + columns_to_compare = ["record", "record_offset"] + if not ignore_timestamps: + columns_to_compare.extend(["timestamp", "timestamp_offset"]) + for column_name in columns_to_compare: + if not np.array_equal( + getattr(self, column_name), getattr(other, column_name), equal_nan=True + ): + raise AssertionError(f"ProvenanceTable column '{column_name}' differs") + + def _create_row_object(self, index): + record_str = self._llts.provenances_record_string[index] + timestamp_str = self._llts.provenances_timestamp_string[index] + return ProvenanceTableRow( + record=record_str, + timestamp=timestamp_str, + ) + + +class ImmutableTableCollection(metadata.MetadataProvider): + # An immutable view of tables backed by a low level TreeSequence instance. + # Provides zero-copy read access to all table data without allowing + # mutation of state. + + def __init__(self, ll_tree_sequence): + self._llts = ll_tree_sequence + super().__init__(ll_tree_sequence) + + # Create immutable table views - lazy initialization could be added later + self.individuals = ImmutableIndividualTable(ll_tree_sequence) + self.nodes = ImmutableNodeTable(ll_tree_sequence) + self.edges = ImmutableEdgeTable(ll_tree_sequence) + self.migrations = ImmutableMigrationTable(ll_tree_sequence) + self.sites = ImmutableSiteTable(ll_tree_sequence) + self.mutations = ImmutableMutationTable(ll_tree_sequence) + self.populations = ImmutablePopulationTable(ll_tree_sequence) + self.provenances = ImmutableProvenanceTable(ll_tree_sequence) + + @property + def sequence_length(self): + return self._llts.get_sequence_length() + + @property + def file_uuid(self): + return self._llts.get_file_uuid() + + @property + def time_units(self): + return self._llts.get_time_units() + + @property + def reference_sequence(self): + return ReferenceSequence(self._llts.reference_sequence) + + @property + def metadata_schema(self): + return metadata.parse_metadata_schema(self._llts.get_metadata_schema()) + + @metadata_schema.setter + def metadata_schema(self, schema): + raise ImmutableTableError( + "Cannot modify metadata schema on immutable table collection. " + "Use TreeSequence.dump_tables() for a mutable copy." + ) + + @property + def metadata(self): + return self.metadata_schema.decode_row(self.metadata_bytes) + + @metadata.setter + def metadata(self, value): + raise ImmutableTableError( + "Cannot modify metadata on immutable table collection. " + "Use TreeSequence.dump_tables() for a mutable copy." + ) + + @property + def metadata_bytes(self): + return self._llts.get_metadata() + + @metadata_bytes.setter + def metadata_bytes(self, value): + raise ImmutableTableError( + "Cannot modify metadata on immutable table collection. " + "Use TreeSequence.dump_tables() for a mutable copy." + ) + + @property + def table_name_map(self): + return { + "edges": self.edges, + "individuals": self.individuals, + "migrations": self.migrations, + "mutations": self.mutations, + "nodes": self.nodes, + "populations": self.populations, + "provenances": self.provenances, + "sites": self.sites, + } + + @property + def indexes(self) -> TableCollectionIndexes: + return TableCollectionIndexes( + **{ + "edge_insertion_order": self._llts.indexes_edge_insertion_order, + "edge_removal_order": self._llts.indexes_edge_removal_order, + } + ) + + def has_index(self): + return ( + self._llts.indexes_edge_insertion_order is not None + and self._llts.indexes_edge_removal_order is not None + ) + + def asdict(self): + # TODO Could avoid the copy here + return self.copy().asdict() + + def equals( + self, + other, + *, + ignore_metadata=False, + ignore_ts_metadata=False, + ignore_provenance=False, + ignore_timestamps=False, + ignore_tables=False, + ignore_reference_sequence=False, + ): + if self is other: + return True + try: + self.assert_equals( + other, + ignore_metadata=ignore_metadata, + ignore_ts_metadata=ignore_ts_metadata, + ignore_provenance=ignore_provenance, + ignore_timestamps=ignore_timestamps, + ignore_tables=ignore_tables, + ignore_reference_sequence=ignore_reference_sequence, + ) + return True + except AssertionError: + return False + + def assert_equals( + self, + other, + *, + ignore_metadata=False, + ignore_ts_metadata=False, + ignore_provenance=False, + ignore_timestamps=False, + ignore_tables=False, + ignore_reference_sequence=False, + ): + _assert_table_collections_equal( + self, + other, + ignore_metadata=ignore_metadata, + ignore_ts_metadata=ignore_ts_metadata, + ignore_provenance=ignore_provenance, + ignore_timestamps=ignore_timestamps, + ignore_tables=ignore_tables, + ignore_reference_sequence=ignore_reference_sequence, + ) + + @property + def nbytes(self): + return sum( + ( + 8, # sequence length + len(self.metadata_bytes) + len(self._llts.get_metadata_schema()), + len(self.time_units.encode()), + self.indexes.nbytes, + self.reference_sequence.nbytes, + sum(table.nbytes for table in self.table_name_map.values()), + ) + ) + + def __eq__(self, other): + return self.equals(other) + + def __str__(self): + return "\n".join( + [ + "ImmutableTableCollection", + "", + f"Sequence Length: {self.sequence_length}", + f"Time units: {self.time_units}", + "", + "Individuals", + str(self.individuals), + "Nodes", + str(self.nodes), + "Edges", + str(self.edges), + "Sites", + str(self.sites), + "Mutations", + str(self.mutations), + "Migrations", + str(self.migrations), + "Populations", + str(self.populations), + "Provenances", + str(self.provenances), + ] + ) + + _MUTATOR_METHODS = { + "clear", + "sort", + "sort_individuals", + "canonicalise", + "compute_mutation_parents", + "compute_mutation_times", + "deduplicate_sites", + "delete_sites", + "delete_intervals", + "keep_intervals", + "ltrim", + "rtrim", + "trim", + "shift", + "delete_older", + "build_index", + "drop_index", + "subset", + "union", + "ibd_segments", + "fromdict", + "simplify", + "link_ancestors", + "map_ancestors", + } + + def copy(self): + ll_tables = _tskit.TableCollection(self.sequence_length) + self._llts.dump_tables(ll_tables) + return TableCollection(ll_tables=ll_tables) + + def dump(self, file_or_path): + return self.copy().dump(file_or_path) + + def tree_sequence(self): + return self.copy().tree_sequence() + + def has_reference_sequence(self): + return self._llts.has_reference_sequence() + + def __getattr__(self, name): + if name in self._MUTATOR_METHODS: + raise ImmutableTableError( + f"Cannot call {name}() on immutable table collection. " + f"Use TreeSequence.dump_tables() for mutable copy." + ) + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{name}'" + ) diff --git a/python/tskit/trees.py b/python/tskit/trees.py index 81c49c3224..bf68e1695b 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -4136,6 +4136,7 @@ class TreeSequence: def __init__(self, ll_tree_sequence): self._ll_tree_sequence = ll_tree_sequence + self._immutable_tables = None metadata_schema_strings = self._ll_tree_sequence.get_table_metadata_schemas() metadata_schema_instances = { name: metadata_module.parse_metadata_schema( @@ -4321,21 +4322,23 @@ def tables_dict(self): @property def tables(self): """ - Returns the :class:`tables` underlying this tree - sequence, intended for read-only access. See :meth:`.dump_tables` if you wish - to modify the tables. + Returns an immutable view of the tables underlying this tree sequence. - .. warning:: This property currently returns a copy of the tables - underlying a tree sequence but it may return a read-only - **view** in the future. Thus, if the tables will subsequently be - updated, please use the :meth:`.dump_tables` method instead as - this will always return a new copy of the TableCollection. + This view shares the same data as the TreeSequence (zero-copy). + Use :meth:`.dump_tables` for a modifiable copy. - :return: A :class:`TableCollection` containing all a copy of the - tables underlying this tree sequence. - :rtype: TableCollection + Note that if tkskit was built with Numpy 1, this method acts as + :meth:`.dump_tables` and returns a mutable TableCollection. + + :return: An immutable view of the TableCollection underlying this tree sequence. """ - return self.dump_tables() + if not _tskit.HAS_NUMPY_2: + return self.dump_tables() + if self._immutable_tables is None: + self._immutable_tables = tables.ImmutableTableCollection( + self._ll_tree_sequence + ) + return self._immutable_tables @property def nbytes(self): @@ -5978,7 +5981,9 @@ def sites_ancestral_state(self): "The sites_ancestral_state property requires numpy 2.0 or later." ) if self._sites_ancestral_state is None: - self._sites_ancestral_state = self._ll_tree_sequence.sites_ancestral_state + self._sites_ancestral_state = ( + self._ll_tree_sequence.sites_ancestral_state_string + ) return self._sites_ancestral_state @property @@ -6050,7 +6055,7 @@ def mutations_derived_state(self): ) if self._mutations_derived_state is None: self._mutations_derived_state = ( - self._ll_tree_sequence.mutations_derived_state + self._ll_tree_sequence.mutations_derived_state_string ) return self._mutations_derived_state @@ -6098,7 +6103,7 @@ def mutations_inherited_state(self): ) if self._mutations_inherited_state is None: self._mutations_inherited_state = ( - self._ll_tree_sequence.mutations_inherited_state + self._ll_tree_sequence.mutations_inherited_state_string ) return self._mutations_inherited_state @@ -10449,7 +10454,7 @@ def ibd_segments( IBD information. :rtype: IdentitySegments """ - return self.tables.ibd_segments( + return self.dump_tables().ibd_segments( within=within, between=between, max_time=max_time,