@@ -419,8 +419,9 @@ ancestor_builder_compute_ancestral_states(const ancestor_builder_t *self, int di
419
419
tsk_id_t last_site = focal_site ;
420
420
int64_t l ;
421
421
tsk_id_t u ;
422
- size_t j , ones , zeros , tmp_size , sample_set_size , min_sample_set_size ;
422
+ size_t j , ones , zeros , tmp_size , sample_set_size , min_sample_set_size , derived_count ;
423
423
double focal_site_time = self -> sites [focal_site ].time ;
424
+ double site_time ;
424
425
const site_t * restrict sites = self -> sites ;
425
426
const size_t num_sites = self -> num_sites ;
426
427
allele_t consensus ;
@@ -440,73 +441,78 @@ ancestor_builder_compute_ancestral_states(const ancestor_builder_t *self, int di
440
441
/* printf("\tl = %d\n", (int) l); */
441
442
ancestor [l ] = 0 ;
442
443
last_site = (tsk_id_t ) l ;
443
- if (sites [l ].time > focal_site_time ) {
444
444
445
- /* printf("\t%d\t%d:", (int) l, (int) sample_set_size); */
446
- /* for (j = 0; j < sample_set_size; j++) { */
447
- /* printf("%d, ", sample_set[j]); */
448
- /* } */
449
- /* printf("\n"); */
450
-
451
- ancestor_builder_get_site_genotypes_subset (
452
- self , (tsk_id_t ) l , sample_set , sample_set_size , genotypes );
453
- ones = 0 ;
454
- zeros = 0 ;
455
- for (j = 0 ; j < sample_set_size ; j ++ ) {
456
- switch (genotypes [j ]) {
457
- case 0 :
458
- zeros ++ ;
459
- break ;
460
- case 1 :
461
- ones ++ ;
462
- break ;
463
- }
445
+ /* printf("\t%d\t%d:", (int) l, (int) sample_set_size); */
446
+ /* for (j = 0; j < sample_set_size; j++) { */
447
+ /* printf("%d, ", sample_set[j]); */
448
+ /* } */
449
+ /* printf("\n"); */
450
+
451
+ ancestor_builder_get_site_genotypes_subset (
452
+ self , (tsk_id_t ) l , sample_set , sample_set_size , genotypes );
453
+ ones = 0 ;
454
+ zeros = 0 ;
455
+ for (j = 0 ; j < sample_set_size ; j ++ ) {
456
+ switch (genotypes [j ]) {
457
+ case 0 :
458
+ zeros ++ ;
459
+ break ;
460
+ case 1 :
461
+ ones ++ ;
462
+ break ;
464
463
}
464
+ }
465
+ if (ones >= zeros ) {
466
+ consensus = 1 ;
467
+ } else {
468
+ consensus = 0 ;
469
+ }
470
+ /* printf("\t:ones=%d, consensus=%d\n", (int) ones, consensus); */
471
+ /* fflush(stdout); */
472
+ for (j = 0 ; j < sample_set_size ; j ++ ) {
473
+ u = sample_set [j ];
474
+ if (disagree [u ] && (genotypes [j ] != consensus )
475
+ && (genotypes [j ] != TSK_MISSING_DATA )) {
476
+ /* This sample has disagreed with consensus twice in a row,
477
+ * so remove it */
478
+ /* printf("\t\tremoving %d\n", sample_set[j]); */
479
+ sample_set [j ] = -1 ;
480
+ }
481
+ }
482
+
483
+ site_time = sites [l ].time ;
484
+ if (site_time > focal_site_time ) {
465
485
if (ones + zeros == 0 ) {
466
486
ancestor [l ] = TSK_MISSING_DATA ;
467
487
} else {
468
- if (ones >= zeros ) {
469
- consensus = 1 ;
470
- } else {
471
- consensus = 0 ;
472
- }
473
- /* printf("\t:ones=%d, consensus=%d\n", (int) ones, consensus); */
474
- /* fflush(stdout); */
475
- for (j = 0 ; j < sample_set_size ; j ++ ) {
476
- u = sample_set [j ];
477
- if (disagree [u ] && (genotypes [j ] != consensus )
478
- && (genotypes [j ] != TSK_MISSING_DATA )) {
479
- /* This sample has disagreed with consensus twice in a row,
480
- * so remove it */
481
- /* printf("\t\tremoving %d\n", sample_set[j]); */
482
- sample_set [j ] = -1 ;
483
- }
484
- }
485
488
ancestor [l ] = consensus ;
486
- /* For the remaining samples, set the disagree flags based
487
- * on whether they agree with the consensus for this site. */
488
- for (j = 0 ; j < sample_set_size ; j ++ ) {
489
- u = sample_set [j ];
490
- if (u != -1 ) {
491
- disagree [u ] = ((genotypes [j ] != consensus )
492
- && (genotypes [j ] != TSK_MISSING_DATA ));
493
- }
494
- }
495
- /* Repack the sample set */
496
- tmp_size = 0 ;
497
- for (j = 0 ; j < sample_set_size ; j ++ ) {
498
- if (sample_set [j ] != -1 ) {
499
- sample_set [tmp_size ] = sample_set [j ];
500
- tmp_size ++ ;
501
- }
502
- }
503
- sample_set_size = tmp_size ;
504
- if (sample_set_size <= min_sample_set_size ) {
505
- /* printf("BREAK\n"); */
506
- break ;
489
+ }
490
+ }
491
+ /* For the remaining samples, set the disagree flags based
492
+ * on whether they agree with the consensus for this site. */
493
+ derived_count = sites [l ].derived_count ;
494
+ if ((site_time > focal_site_time ) || (derived_count > ones )) {
495
+ for (j = 0 ; j < sample_set_size ; j ++ ) {
496
+ u = sample_set [j ];
497
+ if (u != -1 ) {
498
+ disagree [u ] = ((genotypes [j ] != consensus )
499
+ && (genotypes [j ] != TSK_MISSING_DATA ));
507
500
}
508
501
}
509
502
}
503
+ /* Repack the sample set */
504
+ tmp_size = 0 ;
505
+ for (j = 0 ; j < sample_set_size ; j ++ ) {
506
+ if (sample_set [j ] != -1 ) {
507
+ sample_set [tmp_size ] = sample_set [j ];
508
+ tmp_size ++ ;
509
+ }
510
+ }
511
+ sample_set_size = tmp_size ;
512
+ if (sample_set_size <= min_sample_set_size ) {
513
+ /* printf("BREAK\n"); */
514
+ break ;
515
+ }
510
516
}
511
517
* last_site_ret = last_site ;
512
518
return ret ;
@@ -647,7 +653,7 @@ ancestor_builder_allocate_genotypes(ancestor_builder_t *self)
647
653
}
648
654
649
655
int WARN_UNUSED
650
- ancestor_builder_add_site (ancestor_builder_t * self , double time , allele_t * genotypes )
656
+ ancestor_builder_add_site (ancestor_builder_t * self , double time , allele_t * genotypes , tsk_size_t derived_count )
651
657
{
652
658
int ret = 0 ;
653
659
site_t * site ;
@@ -676,6 +682,7 @@ ancestor_builder_add_site(ancestor_builder_t *self, double time, allele_t *genot
676
682
pattern_map = & time_map -> pattern_map ;
677
683
site = & self -> sites [site_id ];
678
684
site -> time = time ;
685
+ site -> derived_count = derived_count ;
679
686
680
687
search .encoded_genotypes = encoded_genotypes ;
681
688
search .encoded_genotypes_size = self -> encoded_genotypes_size ;
0 commit comments