@@ -2566,6 +2566,69 @@ test_simplest_reduce_site_topology(void)
25662566 tsk_table_collection_free (& tables );
25672567}
25682568
2569+ static void
2570+ test_simplest_simplify_defragment (void )
2571+ {
2572+ const char * nodes = "0 2 -1\n"
2573+ "0 2 -1\n"
2574+ "0 2 -1\n"
2575+ "0 2 -1\n"
2576+ "0 2 -1\n"
2577+ "0 2 -1\n"
2578+ "0 1 -1\n"
2579+ "0 1 -1\n"
2580+ "0 1 -1\n"
2581+ "0 1 -1\n"
2582+ "0 1 -1\n"
2583+ "0 1 -1\n"
2584+ "1 0 -1\n"
2585+ "1 0 -1\n"
2586+ "1 0 -1\n"
2587+ "1 0 -1\n"
2588+ "1 0 -1\n"
2589+ "1 0 -1\n" ;
2590+ const char * edges = "0.00000000 0.20784841 8 12\n"
2591+ "0.00000000 0.42202433 8 15\n"
2592+ "0.00000000 0.63541014 8 16\n"
2593+ "0.42202433 1.00000000 9 15\n"
2594+ "0.00000000 1.00000000 9 17\n"
2595+ "0.00000000 1.00000000 10 14\n"
2596+ "0.20784841 1.00000000 11 12\n"
2597+ "0.00000000 1.00000000 11 13\n"
2598+ "0.63541014 1.00000000 11 16\n"
2599+ "0.00000000 1.00000000 0 10\n"
2600+ "0.62102072 1.00000000 1 9\n"
2601+ "0.00000000 1.00000000 1 11\n"
2602+ "0.00000000 0.26002984 2 6\n"
2603+ "0.26002984 1.00000000 2 6\n"
2604+ "0.00000000 0.62102072 2 9\n"
2605+ "0.55150554 1.00000000 3 8\n"
2606+ "0.00000000 1.00000000 4 7\n"
2607+ "0.00000000 0.55150554 5 8\n" ;
2608+
2609+ tsk_id_t samples [] = { 12 , 13 , 14 , 15 , 16 , 17 };
2610+ tsk_table_collection_t tables ;
2611+ int ret ;
2612+
2613+ /* This was the simplest example I could find that exercised the
2614+ * inner loops of the simplifier_extract_ancestry function */
2615+ ret = tsk_table_collection_init (& tables , 0 );
2616+ CU_ASSERT_EQUAL_FATAL (ret , 0 );
2617+ tables .sequence_length = 1 ;
2618+ parse_nodes (nodes , & tables .nodes );
2619+ CU_ASSERT_EQUAL_FATAL (tables .nodes .num_rows , 18 );
2620+ parse_edges (edges , & tables .edges );
2621+ CU_ASSERT_EQUAL_FATAL (tables .edges .num_rows , 18 );
2622+
2623+ ret = tsk_table_collection_simplify (& tables , samples , 6 , 0 , NULL );
2624+ CU_ASSERT_EQUAL_FATAL (ret , 0 );
2625+
2626+ CU_ASSERT_EQUAL (tables .nodes .num_rows , 10 );
2627+ CU_ASSERT_EQUAL (tables .edges .num_rows , 10 );
2628+
2629+ tsk_table_collection_free (& tables );
2630+ }
2631+
25692632static void
25702633test_simplest_population_filter (void )
25712634{
@@ -3640,6 +3703,7 @@ test_single_tree_iter_depths(void)
36403703 tsk_tree_free (& tree );
36413704 tsk_treeseq_free (& ts );
36423705}
3706+
36433707static void
36443708test_single_tree_simplify (void )
36453709{
@@ -3697,6 +3761,55 @@ test_single_tree_simplify(void)
36973761 tsk_table_collection_free (& tables );
36983762}
36993763
3764+ static void
3765+ test_single_tree_simplify_debug (void )
3766+ {
3767+ tsk_treeseq_t ts , simplified ;
3768+ tsk_id_t samples [] = { 0 , 1 };
3769+ int ret ;
3770+ FILE * save_stdout = stdout ;
3771+ FILE * tmp = fopen (_tmp_file_name , "w" );
3772+
3773+ CU_ASSERT_FATAL (tmp != NULL );
3774+ tsk_treeseq_from_text (& ts , 1 , single_tree_ex_nodes , single_tree_ex_edges , NULL ,
3775+ single_tree_ex_sites , single_tree_ex_mutations , NULL , NULL , 0 );
3776+
3777+ stdout = tmp ;
3778+ ret = tsk_treeseq_simplify (& ts , samples , 2 , TSK_DEBUG , & simplified , NULL );
3779+ stdout = save_stdout ;
3780+ CU_ASSERT_EQUAL_FATAL (ret , 0 );
3781+ CU_ASSERT_TRUE (ftell (tmp ) > 0 );
3782+
3783+ fclose (tmp );
3784+ tsk_treeseq_free (& ts );
3785+ tsk_treeseq_free (& simplified );
3786+ }
3787+
3788+ static void
3789+ test_single_tree_simplify_keep_input_roots (void )
3790+ {
3791+ tsk_treeseq_t ts ;
3792+ tsk_table_collection_t tables ;
3793+ tsk_id_t samples [] = { 0 , 1 };
3794+ int ret ;
3795+
3796+ tsk_treeseq_from_text (& ts , 1 , single_tree_ex_nodes , single_tree_ex_edges , NULL ,
3797+ single_tree_ex_sites , single_tree_ex_mutations , NULL , NULL , 0 );
3798+ verify_simplify (& ts );
3799+ ret = tsk_treeseq_copy_tables (& ts , & tables , 0 );
3800+ CU_ASSERT_EQUAL_FATAL (ret , 0 );
3801+
3802+ ret = tsk_table_collection_simplify (& tables , samples , 2 , TSK_KEEP_INPUT_ROOTS , NULL );
3803+ CU_ASSERT_EQUAL_FATAL (ret , 0 );
3804+ CU_ASSERT_EQUAL (tables .nodes .num_rows , 4 );
3805+ CU_ASSERT_EQUAL (tables .edges .num_rows , 3 );
3806+ CU_ASSERT_EQUAL (tables .sites .num_rows , 3 );
3807+ CU_ASSERT_EQUAL (tables .mutations .num_rows , 4 );
3808+
3809+ tsk_treeseq_free (& ts );
3810+ tsk_table_collection_free (& tables );
3811+ }
3812+
37003813static void
37013814test_single_tree_simplify_no_sample_nodes (void )
37023815{
@@ -4283,6 +4396,60 @@ test_nonbinary_multi_tree(void)
42834396 tsk_treeseq_free (& ts );
42844397}
42854398
4399+ static void
4400+ test_simplify_keep_input_roots_multi_tree (void )
4401+ {
4402+
4403+ /*
4404+ 0.25┊ 8 ┊ ┊ ┊
4405+ ┊ ┏━┻━┓ ┊ ┊ ┊
4406+ 0.20┊ ┃ ┃ ┊ ┊ 7 ┊
4407+ ┊ ┃ ┃ ┊ ┊ ┏━┻━┓ ┊
4408+ 0.17┊ 6 ┃ ┊ 6 ┊ ┃ ┃ ┊
4409+ ┊ ┏━┻┓ ┃ ┊ ┏━┻━┓ ┊ ┃ ┃ ┊
4410+ 0.09┊ ┃ 5 ┃ ┊ ┃ 5 ┊ ┃ 5 ┊
4411+ ┊ ┃ ┏┻┓ ┃ ┊ ┃ ┏━┻┓ ┊ ┃ ┏━┻┓ ┊
4412+ 0.07┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ 4 ┊ ┃ ┃ 4 ┊
4413+ ┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┊ ┃ ┃ ┏┻┓ ┊
4414+ 0.00┊ 0 1 3 2 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊
4415+ 0.00 2.00 7.00 10.00
4416+
4417+ Simplifies to
4418+
4419+ 0.25┊ 4 ┊ ┊ ┊
4420+ ┊ ┃ ┊ ┊ ┊
4421+ 0.20┊ ┃ ┊ ┊ 3 ┊
4422+ ┊ ┃ ┊ ┊ ┏┻┓ ┊
4423+ 0.17┊ 2 ┊ 2 ┊ ┃ ┃ ┊
4424+ ┊ ┏┻┓ ┊ ┏┻┓ ┊ ┃ ┃ ┊
4425+ 0.00┊ 0 1 ┊ 0 1 ┊ 0 1 ┊
4426+ 0.00 2.00 7.00 10.00
4427+
4428+ */
4429+ int ret = 0 ;
4430+ // clang-format off
4431+ tsk_id_t parents [] = {
4432+ 2 , 2 , 4 , -1 , -1 ,
4433+ 2 , 2 , -1 , -1 , -1 ,
4434+ 3 , 3 , -1 , -1 , -1 ,
4435+ };
4436+ // clang-format on
4437+ uint32_t num_trees = 3 ;
4438+
4439+ tsk_id_t samples [] = { 0 , 3 };
4440+ tsk_treeseq_t ts , simplified ;
4441+
4442+ tsk_treeseq_from_text (& ts , 10 , paper_ex_nodes , paper_ex_edges , NULL , paper_ex_sites ,
4443+ paper_ex_mutations , paper_ex_individuals , NULL , 0 );
4444+ tsk_treeseq_dump (& ts , "tmp.trees" , 0 );
4445+ ret = tsk_treeseq_simplify (& ts , samples , 2 , TSK_KEEP_INPUT_ROOTS , & simplified , NULL );
4446+ CU_ASSERT_EQUAL_FATAL (ret , 0 );
4447+ verify_trees (& simplified , num_trees , parents );
4448+
4449+ tsk_treeseq_free (& ts );
4450+ tsk_treeseq_free (& simplified );
4451+ }
4452+
42864453static void
42874454test_left_to_right_multi_tree (void )
42884455{
@@ -5811,6 +5978,7 @@ main(int argc, char **argv)
58115978 { "test_simplest_overlapping_unary_edges_internal_samples_simplify" ,
58125979 test_simplest_overlapping_unary_edges_internal_samples_simplify },
58135980 { "test_simplest_reduce_site_topology" , test_simplest_reduce_site_topology },
5981+ { "test_simplest_simplify_defragment" , test_simplest_simplify_defragment },
58145982 { "test_simplest_population_filter" , test_simplest_population_filter },
58155983 { "test_simplest_individual_filter" , test_simplest_individual_filter },
58165984 { "test_simplest_map_mutations" , test_simplest_map_mutations },
@@ -5839,6 +6007,9 @@ main(int argc, char **argv)
58396007 { "test_single_tree_iter_times" , test_single_tree_iter_times },
58406008 { "test_single_tree_iter_depths" , test_single_tree_iter_depths },
58416009 { "test_single_tree_simplify" , test_single_tree_simplify },
6010+ { "test_single_tree_simplify_debug" , test_single_tree_simplify_debug },
6011+ { "test_single_tree_simplify_keep_input_roots" ,
6012+ test_single_tree_simplify_keep_input_roots },
58426013 { "test_single_tree_simplify_no_sample_nodes" ,
58436014 test_single_tree_simplify_no_sample_nodes },
58446015 { "test_single_tree_simplify_null_samples" ,
@@ -5859,6 +6030,8 @@ main(int argc, char **argv)
58596030 { "test_internal_sample_multi_tree" , test_internal_sample_multi_tree },
58606031 { "test_internal_sample_simplified_multi_tree" ,
58616032 test_internal_sample_simplified_multi_tree },
6033+ { "test_simplify_keep_input_roots_multi_tree" ,
6034+ test_simplify_keep_input_roots_multi_tree },
58626035 { "test_left_to_right_multi_tree" , test_left_to_right_multi_tree },
58636036 { "test_gappy_multi_tree" , test_gappy_multi_tree },
58646037 { "test_tsk_treeseq_bad_records" , test_tsk_treeseq_bad_records },
0 commit comments