Skip to content

Commit fcef2a6

Browse files
Merge pull request #2388 from jeromekelleher/more-internal-samples-2
Some tests for no-leaf-samples
2 parents 0c86a4f + 20d666d commit fcef2a6

File tree

5 files changed

+144
-4
lines changed

5 files changed

+144
-4
lines changed

CHANGELOG.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
# Changelog
22

33

4-
## [1.3.5] - 2025-XX-XX
4+
## [1.4.0] - 2025-XX-XX
5+
6+
**New features**
7+
8+
- The `FixedPedigree` simulation model now supports internal samples ({issue}`1855`,
9+
{pr}`2321`, {pr}`2326`, {pr}`2388`, {user}`abureau`, {user}`jeromekelleher`).
510

611
**Breaking changes**:
712

@@ -33,7 +38,7 @@ Bugfix release for issues with Dirac and Beta coalescent models.
3338

3439
- Add `record_provenance` argument to `sim_mutations` ({issue}`2272`, {pr}`2273`, {user}`peterlharp`)
3540

36-
- Add support and wheels for Python3.12, drop Python3.8
41+
- Add support and wheels for Python3.12, drop Python3.8
3742

3843
## [1.3.1] - 2024-02-09
3944

@@ -48,7 +53,7 @@ Bugfix release for issues with Dirac and Beta coalescent models.
4853
- Add a `MicrosatMutationModel` mutation model class, that
4954
represents a generalized interface for constructing mutational
5055
models appropriate to STRs. In addition 3 specific microsat models
51-
are added `SMM`, `TPM`, and `EL2`.
56+
are added `SMM`, `TPM`, and `EL2`.
5257
({issue}`2013`, {user}`andrewkern`).
5358

5459
- Raise an error if `log_arg_likelihood` is called on a recombinant tree

lib/msprime.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3007,6 +3007,25 @@ msp_pedigree_add_sample_ancestry(msp_t *self, tsk_id_t node_id)
30073007
return ret;
30083008
}
30093009

3010+
static void
3011+
msp_skip_pedigree_non_samples(msp_t *self)
3012+
{
3013+
individual_t *ind = NULL;
3014+
pedigree_t *pedigree = &self->pedigree;
3015+
const tsk_id_t num_individuals = (tsk_id_t) pedigree->num_individuals;
3016+
const tsk_flags_t *nodes_flags = self->tables->nodes.flags;
3017+
tsk_id_t j, node;
3018+
3019+
for (j = 0; j < num_individuals; j++) {
3020+
ind = pedigree->visit_order[j];
3021+
node = ind->nodes[0];
3022+
if ((nodes_flags[node] & TSK_NODE_IS_SAMPLE) != 0) {
3023+
break;
3024+
}
3025+
}
3026+
pedigree->next_individual = j;
3027+
}
3028+
30103029
static int MSP_WARN_UNUSED
30113030
msp_pedigree_initialise(msp_t *self)
30123031
{
@@ -3057,6 +3076,7 @@ msp_pedigree_initialise(msp_t *self)
30573076
counter->value = 0;
30583077

30593078
self->pedigree.next_individual = 0;
3079+
msp_skip_pedigree_non_samples(self);
30603080
out:
30613081
return ret;
30623082
}

lib/tests/test_pedigrees.c

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,54 @@ verify_pedigree(double recombination_rate, unsigned long seed,
204204
tsk_table_collection_free(&tables);
205205
}
206206

207+
static void
208+
verify_pedigree_event_by_event(double recombination_rate, unsigned long seed,
209+
tsk_size_t num_individuals, tsk_id_t *parents, double *time, tsk_flags_t *is_sample,
210+
tsk_id_t *population, uint32_t additional_nodes)
211+
{
212+
int ret, status1, status2;
213+
int ploidy = 2;
214+
msp_t msp1, msp2;
215+
tsk_table_collection_t tables1, tables2;
216+
gsl_rng *rng1 = safe_rng_alloc();
217+
gsl_rng *rng2 = safe_rng_alloc();
218+
219+
ret = build_pedigree_sim(&msp1, &tables1, rng1, 100, ploidy, num_individuals,
220+
parents, time, is_sample, population);
221+
CU_ASSERT_EQUAL_FATAL(ret, 0);
222+
ret = build_pedigree_sim(&msp2, &tables2, rng2, 100, ploidy, num_individuals,
223+
parents, time, is_sample, population);
224+
CU_ASSERT_EQUAL_FATAL(ret, 0);
225+
226+
ret = msp_initialise(&msp1);
227+
CU_ASSERT_EQUAL_FATAL(ret, 0);
228+
ret = msp_initialise(&msp2);
229+
CU_ASSERT_EQUAL_FATAL(ret, 0);
230+
231+
ret = msp_run(&msp1, DBL_MAX, UINT32_MAX);
232+
CU_ASSERT_FATAL(ret == MSP_EXIT_MODEL_COMPLETE || ret == MSP_EXIT_COALESCENCE);
233+
status1 = ret;
234+
235+
ret = MSP_EXIT_MAX_EVENTS;
236+
while (ret == MSP_EXIT_MAX_EVENTS) {
237+
ret = msp_run(&msp2, DBL_MAX, 1);
238+
CU_ASSERT_FATAL(ret >= 0);
239+
msp_verify(&msp2, 0);
240+
}
241+
CU_ASSERT_FATAL(ret == MSP_EXIT_MODEL_COMPLETE || ret == MSP_EXIT_COALESCENCE);
242+
status2 = ret;
243+
CU_ASSERT_EQUAL(status1, status2);
244+
245+
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables1, &tables2, 0));
246+
247+
tsk_table_collection_free(&tables1);
248+
msp_free(&msp1);
249+
tsk_table_collection_free(&tables2);
250+
msp_free(&msp2);
251+
gsl_rng_free(rng1);
252+
gsl_rng_free(rng2);
253+
}
254+
207255
static void
208256
test_trio(void)
209257
{
@@ -850,6 +898,29 @@ test_internal_samples(void)
850898

851899
verify_pedigree(0, 1, 3, parents, time, is_sample, NULL, 0);
852900
verify_pedigree(0.1, 1, 3, parents, time, is_sample, NULL, 0);
901+
902+
verify_pedigree_event_by_event(0, 1, 3, parents, time, is_sample, NULL, 0);
903+
verify_pedigree_event_by_event(0.1, 1, 3, parents, time, is_sample, NULL, 0);
904+
}
905+
906+
static void
907+
test_no_leaf_samples(void)
908+
{
909+
tsk_id_t *parents = deep_n10_parents;
910+
double *time = deep_n10_time;
911+
size_t num_inds = sizeof(deep_n10_time) / sizeof(*deep_n10_time);
912+
tsk_flags_t is_sample[num_inds];
913+
size_t j;
914+
915+
for (j = 0; j < num_inds; j++) {
916+
is_sample[j] = time[j] == 1;
917+
}
918+
919+
verify_pedigree(0, 1, num_inds, parents, time, is_sample, NULL, 0);
920+
verify_pedigree(0.1, 1, num_inds, parents, time, is_sample, NULL, 0);
921+
922+
verify_pedigree_event_by_event(0, 1, num_inds, parents, time, is_sample, NULL, 0);
923+
verify_pedigree_event_by_event(0.1, 1, num_inds, parents, time, is_sample, NULL, 0);
853924
}
854925

855926
static void
@@ -895,6 +966,7 @@ test_trio_same_pop(void)
895966
tsk_id_t population[] = { 2, 2, 2 };
896967

897968
verify_pedigree(0, 1, 3, parents, time, NULL, population, 0);
969+
verify_pedigree_event_by_event(0, 1, 3, parents, time, NULL, population, 0);
898970
}
899971

900972
static void
@@ -906,6 +978,7 @@ test_trio_child_different_pop(void)
906978
tsk_id_t population[] = { 2, 2, 1 };
907979

908980
verify_pedigree(0, 1, 3, parents, time, NULL, population, 0);
981+
verify_pedigree_event_by_event(0, 1, 3, parents, time, NULL, population, 0);
909982
}
910983

911984
int
@@ -918,6 +991,7 @@ main(int argc, char **argv)
918991
{ "test_sibs", test_sibs },
919992
{ "test_ancient_sample", test_ancient_sample },
920993
{ "test_internal_samples", test_internal_samples },
994+
{ "test_no_leaf_samples", test_no_leaf_samples },
921995
{ "test_large_family", test_large_family },
922996
{ "test_unrelated_n3", test_unrelated_n3 },
923997
{ "test_very_deep_n2", test_very_deep_n2 },
@@ -933,7 +1007,6 @@ main(int argc, char **argv)
9331007
{ "test_replicates_exit_coalescence", test_replicates_exit_coalescence },
9341008
{ "test_errors", test_errors },
9351009
{ "test_combined_with_other_models", test_combined_with_other_models },
936-
9371010
{ "test_trio_same_pop", test_trio_same_pop },
9381011
{ "test_trio_child_different_pop", test_trio_child_different_pop },
9391012
CU_TEST_INFO_NULL,

tests/test_algorithms.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,21 @@ def test_pedigree_internal_sample(self, r):
436436
input_tables.nodes.assert_equals(output_tables.nodes[: len(input_tables.nodes)])
437437
assert len(output_tables.edges) >= 2
438438

439+
@pytest.mark.parametrize("r", [0, 0.1, 1])
440+
def test_pedigree_only_internal_samples(self, r):
441+
input_tables = simulate_pedigree(
442+
num_founders=4, num_generations=4, sample_gen=[1]
443+
)
444+
with tempfile.TemporaryDirectory() as tmpdir:
445+
ts_path = pathlib.Path(tmpdir) / "pedigree.trees"
446+
input_tables.dump(ts_path)
447+
ts = self.run_script(f"0 --from-ts {ts_path} -r {r} --model=fixed_pedigree")
448+
output_tables = ts.dump_tables()
449+
input_tables.individuals.assert_equals(output_tables.individuals)
450+
input_tables.nodes.assert_equals(output_tables.nodes[: len(input_tables.nodes)])
451+
assert np.all(ts.nodes_time[ts.samples()] == 1)
452+
assert len(output_tables.edges) >= 2
453+
439454
@pytest.mark.parametrize("num_founders", [1, 2, 20])
440455
def test_one_gen_pedigree(self, num_founders):
441456
tables = simulate_pedigree(num_founders=num_founders, num_generations=1)

tests/test_pedigree.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,30 @@ def test_shallow_internal(self, num_founders, recombination_rate):
567567
)
568568
self.verify(tables, recombination_rate)
569569

570+
@pytest.mark.parametrize("num_founders", [2, 3, 5])
571+
@pytest.mark.parametrize("recombination_rate", [0, 0.01])
572+
def test_internal_and_leaf_samples(self, num_founders, recombination_rate):
573+
tables = simulate_pedigree(
574+
num_founders=num_founders,
575+
num_children_prob=[0, 0, 1],
576+
num_generations=5,
577+
sequence_length=100,
578+
sample_gen=[0, 2],
579+
)
580+
self.verify(tables, recombination_rate)
581+
582+
@pytest.mark.parametrize("num_founders", [2, 3, 5])
583+
@pytest.mark.parametrize("recombination_rate", [0, 0.01])
584+
def test_no_leaf_samples(self, num_founders, recombination_rate):
585+
tables = simulate_pedigree(
586+
num_founders=num_founders,
587+
num_children_prob=[0, 0, 1],
588+
num_generations=5,
589+
sequence_length=100,
590+
sample_gen=[1],
591+
)
592+
self.verify(tables, recombination_rate)
593+
570594
@pytest.mark.parametrize("num_founders", [2, 3, 10, 20])
571595
@pytest.mark.parametrize("recombination_rate", [0, 0.01])
572596
def test_deep(self, num_founders, recombination_rate):
@@ -836,8 +860,11 @@ def verify(self, input_tables, recombination_rate=0):
836860
recombination_rate=recombination_rate,
837861
random_seed=1,
838862
)
863+
# print(ts1.tables)
864+
# print(ts1.draw_text())
839865
sim.run(event_chunk=1)
840866
output_tables = tskit.TableCollection.fromdict(sim.tables.asdict())
867+
# print(output_tables)
841868
output_tables.assert_equals(ts1.tables, ignore_provenance=True)
842869
return ts1
843870

0 commit comments

Comments
 (0)