@@ -4549,19 +4549,138 @@ tsk_tree_position_in_interval(const tsk_tree_t *self, double x)
4549
4549
return self -> interval .left <= x && x < self -> interval .right ;
4550
4550
}
4551
4551
4552
- int TSK_WARN_UNUSED
4553
- tsk_tree_seek (tsk_tree_t * self , double x , tsk_flags_t TSK_UNUSED (options ))
4552
+ /* NOTE:
4553
+ *
4554
+ * Notes from Kevin Thornton:
4555
+ *
4556
+ * This method inserts the edges for an arbitrary tree
4557
+ * in linear time and requires no additional memory.
4558
+ *
4559
+ * During design, the following alternatives were tested
4560
+ * (in a combination of rust + C):
4561
+ * 1. Indexing edge insertion/removal locations by tree.
4562
+ * The indexing can be done in O(n) time, giving O(1)
4563
+ * access to the first edge in a tree. We can then add
4564
+ * edges to the tree in O(e) time, where e is the number
4565
+ * of edges. This apparoach requires O(n) additional memory
4566
+ * and is only marginally faster than the implementation below.
4567
+ * 2. Building an interval tree mapping edge id -> span.
4568
+ * This approach adds a lot of complexity and wasn't any faster
4569
+ * than the indexing described above.
4570
+ */
4571
+ static int
4572
+ tsk_tree_seek_from_null (tsk_tree_t * self , double x , tsk_flags_t TSK_UNUSED (options ))
4554
4573
{
4555
4574
int ret = 0 ;
4575
+ tsk_size_t edge ;
4576
+ tsk_id_t p , c , e , j , k , tree_index ;
4556
4577
const double L = tsk_treeseq_get_sequence_length (self -> tree_sequence );
4557
- const double t_l = self -> interval .left ;
4558
- const double t_r = self -> interval .right ;
4559
- double distance_left , distance_right ;
4578
+ const tsk_treeseq_t * treeseq = self -> tree_sequence ;
4579
+ const tsk_table_collection_t * tables = treeseq -> tables ;
4580
+ const tsk_id_t * restrict edge_parent = tables -> edges .parent ;
4581
+ const tsk_id_t * restrict edge_child = tables -> edges .child ;
4582
+ const tsk_size_t num_edges = tables -> edges .num_rows ;
4583
+ const tsk_size_t num_trees = self -> tree_sequence -> num_trees ;
4584
+ const double * restrict edge_left = tables -> edges .left ;
4585
+ const double * restrict edge_right = tables -> edges .right ;
4586
+ const double * restrict breakpoints = treeseq -> breakpoints ;
4587
+ const tsk_id_t * restrict insertion = tables -> indexes .edge_insertion_order ;
4588
+ const tsk_id_t * restrict removal = tables -> indexes .edge_removal_order ;
4589
+
4590
+ // NOTE: it may be better to get the
4591
+ // index first and then ask if we are
4592
+ // searching in the first or last 1/2
4593
+ // of trees.
4594
+ j = -1 ;
4595
+ if (x <= L / 2.0 ) {
4596
+ for (edge = 0 ; edge < num_edges ; edge ++ ) {
4597
+ e = insertion [edge ];
4598
+ if (edge_left [e ] > x ) {
4599
+ j = (tsk_id_t ) edge ;
4600
+ break ;
4601
+ }
4602
+ if (x >= edge_left [e ] && x < edge_right [e ]) {
4603
+ p = edge_parent [e ];
4604
+ c = edge_child [e ];
4605
+ tsk_tree_insert_edge (self , p , c , e );
4606
+ }
4607
+ }
4608
+ } else {
4609
+ for (edge = 0 ; edge < num_edges ; edge ++ ) {
4610
+ e = removal [num_edges - edge - 1 ];
4611
+ if (edge_right [e ] < x ) {
4612
+ j = (tsk_id_t )(num_edges - edge - 1 );
4613
+ while (j < (tsk_id_t ) num_edges && edge_left [insertion [j ]] <= x ) {
4614
+ j ++ ;
4615
+ }
4616
+ break ;
4617
+ }
4618
+ if (x >= edge_left [e ] && x < edge_right [e ]) {
4619
+ p = edge_parent [e ];
4620
+ c = edge_child [e ];
4621
+ tsk_tree_insert_edge (self , p , c , e );
4622
+ }
4623
+ }
4624
+ }
4560
4625
4561
- if (x < 0 || x >= L ) {
4626
+ if (j == -1 ) {
4627
+ j = 0 ;
4628
+ while (j < (tsk_id_t ) num_edges && edge_left [insertion [j ]] <= x ) {
4629
+ j ++ ;
4630
+ }
4631
+ }
4632
+ k = 0 ;
4633
+ while (k < (tsk_id_t ) num_edges && edge_right [removal [k ]] <= x ) {
4634
+ k ++ ;
4635
+ }
4636
+
4637
+ /* NOTE: tsk_search_sorted finds the first the first
4638
+ * insertion locatiom >= the query point, which
4639
+ * finds a RIGHT value for queries not at the left edge.
4640
+ */
4641
+ tree_index = (tsk_id_t ) tsk_search_sorted (breakpoints , num_trees + 1 , x );
4642
+ if (breakpoints [tree_index ] > x ) {
4643
+ tree_index -= 1 ;
4644
+ }
4645
+ self -> index = tree_index ;
4646
+ self -> interval .left = breakpoints [tree_index ];
4647
+ self -> interval .right = breakpoints [tree_index + 1 ];
4648
+ self -> left_index = j ;
4649
+ self -> right_index = k ;
4650
+ self -> direction = TSK_DIR_FORWARD ;
4651
+ self -> num_nodes = tables -> nodes .num_rows ;
4652
+ if (tables -> sites .num_rows > 0 ) {
4653
+ self -> sites = treeseq -> tree_sites [self -> index ];
4654
+ self -> sites_length = treeseq -> tree_sites_length [self -> index ];
4655
+ }
4656
+
4657
+ return ret ;
4658
+ }
4659
+
4660
+ int TSK_WARN_UNUSED
4661
+ tsk_tree_seek_index (tsk_tree_t * self , tsk_id_t tree , tsk_flags_t options )
4662
+ {
4663
+ int ret = 0 ;
4664
+ double x ;
4665
+
4666
+ if (tree < 0 || tree >= (tsk_id_t ) self -> tree_sequence -> num_trees ) {
4562
4667
ret = TSK_ERR_SEEK_OUT_OF_BOUNDS ;
4563
4668
goto out ;
4564
4669
}
4670
+ x = self -> tree_sequence -> breakpoints [tree ];
4671
+ ret = tsk_tree_seek (self , x , options );
4672
+ out :
4673
+ return ret ;
4674
+ }
4675
+
4676
+ static int TSK_WARN_UNUSED
4677
+ tsk_tree_seek_linear (tsk_tree_t * self , double x , tsk_flags_t TSK_UNUSED (options ))
4678
+ {
4679
+ const double L = tsk_treeseq_get_sequence_length (self -> tree_sequence );
4680
+ const double t_l = self -> interval .left ;
4681
+ const double t_r = self -> interval .right ;
4682
+ int ret = 0 ;
4683
+ double distance_left , distance_right ;
4565
4684
4566
4685
if (x < t_l ) {
4567
4686
/* |-----|-----|========|---------| */
@@ -4594,6 +4713,27 @@ tsk_tree_seek(tsk_tree_t *self, double x, tsk_flags_t TSK_UNUSED(options))
4594
4713
return ret ;
4595
4714
}
4596
4715
4716
+ int TSK_WARN_UNUSED
4717
+ tsk_tree_seek (tsk_tree_t * self , double x , tsk_flags_t options )
4718
+ {
4719
+ int ret = 0 ;
4720
+ const double L = tsk_treeseq_get_sequence_length (self -> tree_sequence );
4721
+
4722
+ if (x < 0 || x >= L ) {
4723
+ ret = TSK_ERR_SEEK_OUT_OF_BOUNDS ;
4724
+ goto out ;
4725
+ }
4726
+
4727
+ if (self -> index == -1 ) {
4728
+ ret = tsk_tree_seek_from_null (self , x , options );
4729
+ } else {
4730
+ ret = tsk_tree_seek_linear (self , x , options );
4731
+ }
4732
+
4733
+ out :
4734
+ return ret ;
4735
+ }
4736
+
4597
4737
int TSK_WARN_UNUSED
4598
4738
tsk_tree_clear (tsk_tree_t * self )
4599
4739
{
0 commit comments