Skip to content

Commit dab5732

Browse files
committed
Remove dead code
1 parent 1330b43 commit dab5732

File tree

4 files changed

+51
-104
lines changed

4 files changed

+51
-104
lines changed

c/tskit/genotypes.c

Lines changed: 1 addition & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -127,13 +127,6 @@ tsk_haplotype_build_child_index(tsk_haplotype_t *self)
127127
tsk_size_t num_edges = edges->num_rows;
128128
tsk_haplotype_edge_sort_t *sorted = NULL;
129129

130-
self->parent_edge_counts
131-
= tsk_calloc(self->num_nodes, sizeof(*self->parent_edge_counts));
132-
if (self->num_nodes > 0 && self->parent_edge_counts == NULL) {
133-
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
134-
goto out;
135-
}
136-
137130
if (num_edges == 0) {
138131
self->child_order = NULL;
139132
self->child_offsets
@@ -157,14 +150,6 @@ tsk_haplotype_build_child_index(tsk_haplotype_t *self)
157150
sorted[j].edge_id = (tsk_id_t) j;
158151
sorted[j].child = edges->child[j];
159152
sorted[j].left = edges->left[j];
160-
tsk_id_t parent = edges->parent[j];
161-
if (parent >= 0 && parent < (tsk_id_t) self->num_nodes) {
162-
if (self->parent_edge_counts[parent] == INT32_MAX) {
163-
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);
164-
goto out;
165-
}
166-
self->parent_edge_counts[parent]++;
167-
}
168153
}
169154
qsort(sorted, num_edges, sizeof(*sorted), tsk_haplotype_edge_sort_cmp);
170155

@@ -268,10 +253,6 @@ tsk_haplotype_build_mutation_index(tsk_haplotype_t *self)
268253
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);
269254
goto out;
270255
}
271-
if (allele == (uint8_t) self->missing_char) {
272-
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
273-
goto out;
274-
}
275256
self->node_mutation_sites[counts[node]] = (int32_t)(site - site_start);
276257
self->node_mutation_states[counts[node]] = allele;
277258
counts[node]++;
@@ -290,7 +271,6 @@ tsk_haplotype_build_ancestral_states(tsk_haplotype_t *self)
290271
const tsk_table_collection_t *tables = self->tree_sequence->tables;
291272
const tsk_site_table_t *sites = &tables->sites;
292273
tsk_id_t site_start = self->site_start;
293-
tsk_id_t site_stop = self->site_stop;
294274
tsk_size_t j;
295275

296276
if (self->num_sites == 0) {
@@ -319,10 +299,6 @@ tsk_haplotype_build_ancestral_states(tsk_haplotype_t *self)
319299
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);
320300
goto out;
321301
}
322-
if (allele == (uint8_t) self->missing_char) {
323-
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
324-
goto out;
325-
}
326302
self->ancestral_states[j] = allele;
327303
}
328304

@@ -421,7 +397,7 @@ tsk_haplotype_alloc_bitset(tsk_haplotype_t *self)
421397

422398
int
423399
tsk_haplotype_init(tsk_haplotype_t *self, const tsk_treeseq_t *tree_sequence,
424-
tsk_id_t site_start, tsk_id_t site_stop, int8_t missing_char, tsk_flags_t options)
400+
tsk_id_t site_start, tsk_id_t site_stop)
425401
{
426402
int ret = 0;
427403
const tsk_table_collection_t *tables;
@@ -434,8 +410,6 @@ tsk_haplotype_init(tsk_haplotype_t *self, const tsk_treeseq_t *tree_sequence,
434410

435411
tsk_memset(self, 0, sizeof(*self));
436412
self->tree_sequence = tree_sequence;
437-
self->missing_char = missing_char;
438-
self->isolated_as_missing = !(options & TSK_ISOLATED_NOT_MISSING);
439413

440414
tables = tree_sequence->tables;
441415
sites = &tables->sites;
@@ -445,17 +419,12 @@ tsk_haplotype_init(tsk_haplotype_t *self, const tsk_treeseq_t *tree_sequence,
445419
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
446420
goto out;
447421
}
448-
if ((unsigned char) missing_char > 0x7F) {
449-
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
450-
goto out;
451-
}
452422

453423
self->site_start = (int32_t) site_start;
454424
self->site_stop = (int32_t) site_stop;
455425
self->num_sites = (int32_t)(site_stop - site_start);
456426
self->num_nodes = tables->nodes.num_rows;
457427
self->num_edges = tables->edges.num_rows;
458-
self->node_flags = tables->nodes.flags;
459428
self->site_positions = sites->position + site_start;
460429

461430
ret = tsk_haplotype_build_child_index(self);
@@ -527,9 +496,6 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
527496
if (node < 0 || node >= (tsk_id_t) self->num_nodes) {
528497
return tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
529498
}
530-
if (self->isolated_as_missing && !(self->node_flags[node] & TSK_NODE_IS_SAMPLE)) {
531-
return tsk_trace_error(TSK_ERR_MUST_IMPUTE_NON_SAMPLES);
532-
}
533499
if (self->num_sites == 0) {
534500
return 0;
535501
}
@@ -651,16 +617,9 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
651617
}
652618
}
653619

654-
bool has_incoming = self->child_offsets[node + 1] != self->child_offsets[node];
655-
bool has_outgoing
656-
= self->parent_edge_counts != NULL && self->parent_edge_counts[node] > 0;
657-
bool mark_missing = self->isolated_as_missing && !(has_incoming || has_outgoing);
658620
idx = tsk_haplotype_bitset_next(
659621
bits, self->num_bit_words, 0, (tsk_size_t) self->num_sites);
660622
while (idx < (tsk_size_t) self->num_sites) {
661-
if (mark_missing) {
662-
haplotype[idx] = self->missing_char;
663-
}
664623
tsk_haplotype_bitset_clear(bits, idx);
665624
idx = tsk_haplotype_bitset_next(
666625
bits, self->num_bit_words, idx, (tsk_size_t) self->num_sites);
@@ -681,7 +640,6 @@ tsk_haplotype_free(tsk_haplotype_t *self)
681640
tsk_safe_free(self->node_mutation_states);
682641
tsk_safe_free(self->child_order);
683642
tsk_safe_free(self->child_offsets);
684-
tsk_safe_free(self->parent_edge_counts);
685643
tsk_safe_free(self->edge_start_index);
686644
tsk_safe_free(self->edge_end_index);
687645
tsk_safe_free(self->edge_stack);
@@ -692,7 +650,6 @@ tsk_haplotype_free(tsk_haplotype_t *self)
692650
tsk_safe_free(self->unresolved_bits);
693651
tsk_safe_free(self->initial_bits);
694652
self->tree_sequence = NULL;
695-
self->node_flags = NULL;
696653
self->site_positions = NULL;
697654
self->initialised = false;
698655
return 0;

c/tskit/genotypes.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,17 +93,13 @@ typedef struct {
9393
int32_t site_start;
9494
int32_t site_stop;
9595
int32_t num_sites;
96-
int8_t missing_char;
97-
bool isolated_as_missing;
98-
const tsk_flags_t *node_flags;
9996
const double *site_positions;
10097
uint8_t *ancestral_states;
10198
int32_t *node_mutation_offsets;
10299
int32_t *node_mutation_sites;
103100
uint8_t *node_mutation_states;
104101
tsk_id_t *child_order;
105102
int32_t *child_offsets;
106-
int32_t *parent_edge_counts;
107103
int32_t *edge_start_index;
108104
int32_t *edge_end_index;
109105
tsk_id_t *edge_stack;
@@ -211,7 +207,7 @@ void tsk_variant_print_state(const tsk_variant_t *self, FILE *out);
211207

212208
/* Deprecated vargen methods (since C API v1.0) */
213209
int tsk_haplotype_init(tsk_haplotype_t *self, const tsk_treeseq_t *tree_sequence,
214-
tsk_id_t site_start, tsk_id_t site_stop, int8_t missing_char, tsk_flags_t options);
210+
tsk_id_t site_start, tsk_id_t site_stop);
215211
int tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype);
216212
int tsk_haplotype_free(tsk_haplotype_t *self);
217213
int tsk_vargen_init(tsk_vargen_t *self, const tsk_treeseq_t *tree_sequence,

python/_tskitmodule.c

Lines changed: 4 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10636,45 +10636,16 @@ Haplotype_init(Haplotype *self, PyObject *args, PyObject *kwds)
1063610636
{
1063710637
int ret = -1;
1063810638
int err;
10639-
static char *kwlist[] = { "tree_sequence", "site_start", "site_stop",
10640-
"isolated_as_missing", "missing_data_character", NULL };
10639+
static char *kwlist[] = { "tree_sequence", "site_start", "site_stop", NULL };
1064110640
TreeSequence *tree_sequence = NULL;
1064210641
Py_ssize_t site_start;
1064310642
Py_ssize_t site_stop;
10644-
int isolated_as_missing = 1;
10645-
PyObject *missing_obj = NULL;
10646-
PyObject *missing_bytes = NULL;
10647-
const char *missing_ptr = NULL;
10648-
Py_ssize_t missing_length = 0;
10649-
char missing_char = 'N';
10650-
tsk_flags_t options = 0;
1065110643

10652-
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!nn|pO", kwlist, &TreeSequenceType,
10653-
&tree_sequence, &site_start, &site_stop, &isolated_as_missing,
10654-
&missing_obj)) {
10644+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!nn", kwlist, &TreeSequenceType,
10645+
&tree_sequence, &site_start, &site_stop)) {
1065510646
goto out;
1065610647
}
1065710648

10658-
if (missing_obj != NULL && missing_obj != Py_None) {
10659-
if (PyBytes_Check(missing_obj)) {
10660-
missing_ptr = PyBytes_AS_STRING(missing_obj);
10661-
missing_length = PyBytes_GET_SIZE(missing_obj);
10662-
} else {
10663-
missing_bytes = PyUnicode_AsASCIIString(missing_obj);
10664-
if (missing_bytes == NULL) {
10665-
goto out;
10666-
}
10667-
missing_ptr = PyBytes_AS_STRING(missing_bytes);
10668-
missing_length = PyBytes_GET_SIZE(missing_bytes);
10669-
}
10670-
if (missing_length != 1) {
10671-
PyErr_SetString(PyExc_ValueError,
10672-
"missing_data_character must be a single ASCII character");
10673-
goto out;
10674-
}
10675-
missing_char = missing_ptr[0];
10676-
}
10677-
1067810649
self->haplotype = PyMem_Malloc(sizeof(*self->haplotype));
1067910650
if (self->haplotype == NULL) {
1068010651
PyErr_NoMemory();
@@ -10684,12 +10655,8 @@ Haplotype_init(Haplotype *self, PyObject *args, PyObject *kwds)
1068410655
self->tree_sequence = tree_sequence;
1068510656
Py_INCREF(tree_sequence);
1068610657

10687-
if (!isolated_as_missing) {
10688-
options |= TSK_ISOLATED_NOT_MISSING;
10689-
}
10690-
1069110658
err = tsk_haplotype_init(self->haplotype, tree_sequence->tree_sequence,
10692-
(tsk_id_t) site_start, (tsk_id_t) site_stop, (int8_t) missing_char, options);
10659+
(tsk_id_t) site_start, (tsk_id_t) site_stop);
1069310660
if (err != 0) {
1069410661
handle_library_error(err);
1069510662
goto out;
@@ -10706,7 +10673,6 @@ Haplotype_init(Haplotype *self, PyObject *args, PyObject *kwds)
1070610673
Py_XDECREF(self->tree_sequence);
1070710674
self->tree_sequence = NULL;
1070810675
}
10709-
Py_XDECREF(missing_bytes);
1071010676
return ret;
1071110677
}
1071210678

python/tskit/trees.py

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5272,16 +5272,45 @@ def _haplotypes_array(
52725272

52735273
start_site, stop_site = np.searchsorted(self.sites_position, interval)
52745274
num_sites = stop_site - start_site
5275+
missing_int8 = ord(missing_data_character)
5276+
5277+
want_missing = (
5278+
True if isolated_as_missing is None else bool(isolated_as_missing)
5279+
)
5280+
5281+
if want_missing and num_sites > 0:
5282+
ll_ts = self._ll_tree_sequence
5283+
anc_offsets = ll_ts.sites_ancestral_state_offset
5284+
anc_data = ll_ts.sites_ancestral_state
5285+
anc_slice = anc_offsets[start_site : stop_site + 1]
5286+
anc_lengths = np.diff(anc_slice)
5287+
if np.any(anc_lengths > 0):
5288+
anc_index = anc_slice[:-1][anc_lengths > 0]
5289+
if np.any(anc_data[anc_index] == missing_int8):
5290+
raise ValueError(
5291+
"missing_data_character must differ from existing allele states"
5292+
)
5293+
mut_sites = ll_ts.mutations_site
5294+
if mut_sites.size > 0:
5295+
mut_offsets = ll_ts.mutations_derived_state_offset
5296+
mut_lengths = np.diff(mut_offsets)
5297+
mask = (mut_sites >= start_site) & (mut_sites < stop_site)
5298+
valid = mask & (mut_lengths > 0)
5299+
if np.any(valid):
5300+
mut_start = mut_offsets[:-1][valid]
5301+
derived_chars = ll_ts.mutations_derived_state[mut_start]
5302+
if np.any(derived_chars == missing_int8):
5303+
raise ValueError(
5304+
"missing_data_character must differ from existing allele "
5305+
"states"
5306+
)
52755307

52765308
if samples is None:
52775309
sample_nodes = self.samples()
52785310
else:
52795311
sample_nodes = np.array(samples, dtype=np.int64)
52805312
num_samples = len(sample_nodes)
52815313

5282-
want_missing = (
5283-
True if isolated_as_missing is None else bool(isolated_as_missing)
5284-
)
52855314
if want_missing and samples is not None and num_samples > 0:
52865315
flags = self.nodes_flags[sample_nodes]
52875316
if np.any((flags & NODE_IS_SAMPLE) == 0):
@@ -5294,19 +5323,20 @@ def _haplotypes_array(
52945323
if num_samples == 0 or num_sites == 0:
52955324
return H, (start_site, stop_site - 1)
52965325

5297-
missing_int8 = ord(missing_data_character)
5326+
# For now deal with missing data using the variants iterator
52985327
missing_mask = None
52995328
if want_missing:
5300-
missing_mask = np.zeros((num_samples, num_sites), dtype=bool)
5301-
5302-
for var in self.variants(
5303-
samples=samples,
5304-
isolated_as_missing=isolated_as_missing,
5305-
left=interval.left,
5306-
right=interval.right,
5307-
copy=False,
5308-
):
5309-
if want_missing and missing_mask is not None:
5329+
for var in self.variants(
5330+
samples=samples,
5331+
isolated_as_missing=isolated_as_missing,
5332+
left=interval.left,
5333+
right=interval.right,
5334+
copy=False,
5335+
):
5336+
if not var.has_missing_data:
5337+
continue
5338+
if missing_mask is None:
5339+
missing_mask = np.zeros((num_samples, num_sites), dtype=bool)
53105340
genotypes = np.asarray(var.genotypes, dtype=np.int32)
53115341
missing_mask[:, var.site.id - start_site] = (
53125342
genotypes == tskit.MISSING_DATA
@@ -5317,8 +5347,6 @@ def _haplotypes_array(
53175347
self._ll_tree_sequence,
53185348
int(start_site),
53195349
int(stop_site),
5320-
isolated_as_missing=False,
5321-
missing_data_character=missing_data_character,
53225350
)
53235351
except exceptions.LibraryError as err:
53245352
if "TSK_ERR_UNSUPPORTED_OPERATION" in str(err):
@@ -5331,7 +5359,7 @@ def _haplotypes_array(
53315359
data = hap.decode(int(node))
53325360
H[row, :] = np.frombuffer(data, dtype=np.int8, count=num_sites)
53335361

5334-
if want_missing and missing_mask is not None:
5362+
if missing_mask is not None:
53355363
H[missing_mask] = missing_int8
53365364

53375365
return H, (start_site, stop_site - 1)

0 commit comments

Comments
 (0)