Skip to content

Commit 7b8f648

Browse files
committed
add ModelTamer original subsample method
1 parent e6d8bb9 commit 7b8f648

File tree

3 files changed

+42
-3
lines changed

3 files changed

+42
-3
lines changed

alignment/alignment.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6407,9 +6407,32 @@ void createSUAlignment(Params &params) {
64076407
std::mt19937 gen;
64086408
gen.seed(params.ran_seed);
64096409
int n_site = alignment->getNSite();
6410-
int n_target_site = static_cast<int>(ceil((params.model_tamer/100.0)*n_site));
6410+
int n_target_site = static_cast<int>(ceil(params.model_tamer * n_site / 100.0)); //method 1: directly subsample sites
64116411

64126412
for (int i=0; i<n_sub; i++ ) {
6413+
if (params.model_tamer_method == 0) {
6414+
// original ModelTamer method
6415+
//1. estimated how many distinct pattern are needeed
6416+
int n_ptn = alignment->getNPattern();
6417+
int n_target_ptn = static_cast<int>(ceil(params.model_tamer * n_ptn / 100.0));
6418+
6419+
//2. initially subsample the estimated needed number of pattern
6420+
vector<int> init_sub_sites(n_site);
6421+
std::iota(init_sub_sites.begin(), init_sub_sites.end(), 0);
6422+
std::shuffle(init_sub_sites.begin(), init_sub_sites.end(), gen);
6423+
init_sub_sites.resize(n_target_ptn);
6424+
Alignment *init_sub_alignment = NULL;
6425+
init_sub_alignment = new Alignment();
6426+
init_sub_alignment ->extractSites(alignment, init_sub_sites);
6427+
6428+
//3. compute how many site are needed to subsample enough pattern
6429+
int n_init_ptn = init_sub_alignment->getNPattern();
6430+
n_target_site = (n_target_ptn * n_target_ptn + n_init_ptn - 1) / n_init_ptn; //ceil( (n_target_ptn/n_init_ptn) * n_target_ptn )
6431+
6432+
delete init_sub_alignment;
6433+
}
6434+
// else: method 1 has been done
6435+
64136436
// subsample sites
64146437
vector<int> sub_sites(n_site);
64156438
std::iota(sub_sites.begin(), sub_sites.end(), 0);

utils/tools.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,6 +1290,7 @@ void parseArg(int argc, char *argv[], Params &params) {
12901290
params.model_tamer = 100;
12911291
params.model_tamer_sub = 1;
12921292
params.model_tamer_up = 1;
1293+
params.model_tamer_method = 0;
12931294
params.gamma_shape = -1.0;
12941295
params.min_gamma_shape = MIN_GAMMA_SHAPE;
12951296
params.gamma_median = false;
@@ -3797,6 +3798,15 @@ void parseArg(int argc, char *argv[], Params &params) {
37973798
if (params.model_tamer_up < 1)
37983799
throw "Wrong number of ModelTamer upsampling time for -mt-up. Must be at least 1";
37993800
continue;
3801+
}
3802+
if (strcmp(argv[cnt], "-mt-method") == 0 || strcmp(argv[cnt], "--mt-method") == 0) {
3803+
cnt++;
3804+
if (cnt >= argc)
3805+
throw "Use -mt-method <0|1>";
3806+
params.model_tamer_method = convert_int(argv[cnt]);
3807+
if (params.model_tamer_method < 0 || params.model_tamer_method > 1)
3808+
throw "Wrong option for -mt-method. Only 0 or 1 is allowed.";
3809+
continue;
38003810
}
38013811
if (strcmp(argv[cnt], "-a") == 0) {
38023812
cnt++;
@@ -7746,6 +7756,7 @@ void Params::setDefault() {
77467756
model_tamer = 100;
77477757
model_tamer_sub = 1;
77487758
model_tamer_up = 1;
7759+
model_tamer_method = 0;
77497760
gamma_shape = -1.0;
77507761
min_gamma_shape = MIN_GAMMA_SHAPE;
77517762
gamma_median = false;

utils/tools.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1765,15 +1765,20 @@ class Params {
17651765
double model_tamer;
17661766

17671767
/**
1768-
Model tamer subsampling time.
1768+
ModelTamer subsampling time.
17691769
*/
17701770
int model_tamer_sub;
17711771

17721772
/**
1773-
Model tamer upsampling time.
1773+
ModelTamer upsampling time.
17741774
*/
17751775
int model_tamer_up;
17761776

1777+
/**
1778+
ModelTamer subsampling method.
1779+
*/
1780+
int model_tamer_method;
1781+
17771782
/**
17781783
shape parameter (alpha) of the Gamma distribution for site rates
17791784
*/

0 commit comments

Comments
 (0)