Skip to content

Commit 5b68a8e

Browse files
authored
Merge pull request #1 from leejoey0921/linclust_integration
Make adjacent sequence matching configurable
2 parents e12665b + ecea89f commit 5b68a8e

File tree

4 files changed

+332
-139
lines changed

4 files changed

+332
-139
lines changed

src/commons/Parameters.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ Parameters::Parameters():
152152
PARAM_IGNORE_MULTI_KMER(PARAM_IGNORE_MULTI_KMER_ID, "--ignore-multi-kmer", "Skip repeating k-mers", "Skip k-mers occurring multiple times (>=2)", typeid(bool), (void *) &ignoreMultiKmer, "", MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
153153
PARAM_HASH_SHIFT(PARAM_HASH_SHIFT_ID, "--hash-shift", "Shift hash", "Shift k-mer hash initialization", typeid(int), (void *) &hashShift, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
154154
PARAM_PICK_N_SIMILAR(PARAM_PICK_N_SIMILAR_ID, "--pick-n-sim-kmer", "Add N similar to search", "Add N similar k-mers to search", typeid(int), (void *) &pickNbest, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
155+
PARAM_MATCH_ADJACENT_SEQ(PARAM_MATCH_ADJACENT_SEQ_ID, "--match-adjacent-seq", "Compare adjacent sequences to k-mers", "Compare sequence information adjacent to k-mers and elect multiple representative sequences per cluster", typeid(bool), (void *) &matchAdjacentSeq, "", MMseqsParameter::COMMAND_CLUSTLINEAR),
155156
PARAM_ADJUST_KMER_LEN(PARAM_ADJUST_KMER_LEN_ID, "--adjust-kmer-len", "Adjust k-mer length", "Adjust k-mer length based on specificity (only for nucleotides)", typeid(bool), (void *) &adjustKmerLength, "", MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
156157
PARAM_RESULT_DIRECTION(PARAM_RESULT_DIRECTION_ID, "--result-direction", "Result direction", "result is 0: query, 1: target centric", typeid(int), (void *) &resultDirection, "^[0-1]{1}$", MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
157158
PARAM_WEIGHT_FILE(PARAM_WEIGHT_FILE_ID, "--weights", "Weight file name", "Weights used for cluster priorization", typeid(std::string), (void*) &weightFile, "", MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT ),
@@ -2513,6 +2514,8 @@ void Parameters::setDefaults() {
25132514
resultDirection = Parameters::PARAM_RESULT_DIRECTION_TARGET;
25142515
weightThr = 0.9;
25152516
weightFile = "";
2517+
// TODO: change to true after fixing regression tests
2518+
matchAdjacentSeq = false;
25162519
hashSeqBuffer = 1.05;
25172520

25182521
// result2stats

src/commons/Parameters.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,7 @@ class Parameters {
552552
int resultDirection;
553553
float weightThr;
554554
std::string weightFile;
555+
bool matchAdjacentSeq;
555556
float hashSeqBuffer;
556557

557558
// indexdb
@@ -866,6 +867,7 @@ class Parameters {
866867
PARAMETER(PARAM_IGNORE_MULTI_KMER)
867868
PARAMETER(PARAM_HASH_SHIFT)
868869
PARAMETER(PARAM_PICK_N_SIMILAR)
870+
PARAMETER(PARAM_MATCH_ADJACENT_SEQ)
869871
PARAMETER(PARAM_ADJUST_KMER_LEN)
870872
PARAMETER(PARAM_RESULT_DIRECTION)
871873
PARAMETER(PARAM_WEIGHT_FILE)

0 commit comments

Comments
 (0)