Skip to content

Commit b15e95a

Browse files
Merge branch 'master' of https://github.com/soedinglab/mmseqs2
2 parents b7ec0e9 + f8b3f8b commit b15e95a

File tree

17 files changed

+476
-330
lines changed

17 files changed

+476
-330
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ script:
121121
if [ -e "/opt/intel/inteloneapi/setvars.sh" ]; then source /opt/intel/inteloneapi/setvars.sh; fi; \
122122
mkdir build; cd build; \
123123
cmake -DHAVE_MPI="$([[ -z "$MPI" ]]; echo $?)" -DENABLE_WERROR=1 -DHAVE_TESTS=1 ..; \
124-
make -j $(nproc --all); \
124+
make -j ${MMSEQS_NUM_THREADS:-$(nproc --all)}; \
125125
mkdir path; \
126126
printf '#!/bin/sh\n/usr/bin/tee "$@" | tail\n' > path/tee; \
127127
chmod +x path/tee; \

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ MMseqs2 (Many-against-Many sequence searching) is a software suite to search and
1111

1212
[![BioConda Install](https://img.shields.io/conda/dn/bioconda/mmseqs2.svg?style=flag&label=BioConda%20install)](https://anaconda.org/bioconda/mmseqs2)
1313
[![Github All Releases](https://img.shields.io/github/downloads/soedinglab/mmseqs2/total.svg)](https://github.com/soedinglab/mmseqs2/releases/latest)
14+
[![Biocontainer Pulls](https://img.shields.io/endpoint?url=https%3A%2F%2Fmmseqs.com%2Fbiocontainer.php%3Fcontainer%3Dmmseqs2)](https://biocontainers.pro/#/tools/mmseqs2)
1415
[![Docker Pulls](https://img.shields.io/docker/pulls/soedinglab/mmseqs2.svg)](https://hub.docker.com/r/soedinglab/mmseqs2)
1516
[![Build Status](https://dev.azure.com/themartinsteinegger/mmseqs2/_apis/build/status/soedinglab.MMseqs2?branchName=master)](https://dev.azure.com/themartinsteinegger/mmseqs2/_build/latest?definitionId=2&branchName=master)
1617
[![Travis CI](https://travis-ci.org/soedinglab/MMseqs2.svg?branch=master)](https://travis-ci.org/soedinglab/MMseqs2)

data/workflow/update_clustering.sh

Lines changed: 120 additions & 192 deletions
Large diffs are not rendered by default.

src/CommandDeclarations.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ extern int lca(int argc, const char **argv, const Command& command);
5959
extern int taxonomyreport(int argc, const char **argv, const Command& command);
6060
extern int linclust(int argc, const char **argv, const Command& command);
6161
extern int map(int argc, const char **argv, const Command& command);
62+
extern int renamedbkeys(int argc, const char **argv, const Command& command);
6263
extern int maskbygff(int argc, const char **argv, const Command& command);
6364
extern int mergeclusters(int argc, const char **argv, const Command& command);
6465
extern int mergedbs(int argc, const char **argv, const Command& command);

src/MMseqsBase.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -788,7 +788,14 @@ std::vector<Command> baseCommands = {
788788
"<i:resultDB> <o:resultDB>",
789789
CITATION_MMSEQS2, {{"DB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::allDb },
790790
{"DB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::allDb }}},
791-
791+
{"renamedbkeys", renamedbkeys, &par.renamedbkeys, COMMAND_DB,
792+
"Create a new DB with original keys renamed",
793+
NULL,
794+
"Milot Mirdita <[email protected]>",
795+
"<i:idMapFile|stdin> <i:DB> <o:DB>",
796+
CITATION_MMSEQS2, {{"idMapFile", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::flatfileAndStdin },
797+
{"resultDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::allDb },
798+
{"resultDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::allDb }}},
792799

793800
{"extractorfs", extractorfs, &par.extractorfs, COMMAND_SEQUENCE,
794801
"Six-frame extraction of open reading frames",

src/commons/DBReader.cpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,12 +1090,15 @@ void DBReader<T>::removeDb(const std::string &databaseName){
10901090
}
10911091
}
10921092

1093-
template<typename T>
1094-
void DBReader<T>::softlinkDb(const std::string &databaseName, const std::string &outDb, DBFiles::Files dbFilesFlags) {
1093+
void copyLinkDb(const std::string &databaseName, const std::string &outDb, DBFiles::Files dbFilesFlags, bool link) {
10951094
if (dbFilesFlags & DBFiles::DATA) {
10961095
std::vector<std::string> names = FileUtil::findDatafiles(databaseName.c_str());
10971096
if (names.size() == 1) {
1098-
FileUtil::symlinkAbs(names[0], outDb);
1097+
if (link) {
1098+
FileUtil::symlinkAbs(names[0].c_str(), outDb.c_str());
1099+
} else {
1100+
FileUtil::copyFile(names[0].c_str(), outDb.c_str());
1101+
}
10991102
} else {
11001103
for (size_t i = 0; i < names.size(); i++) {
11011104
std::string::size_type idx = names[i].rfind('.');
@@ -1107,7 +1110,11 @@ void DBReader<T>::softlinkDb(const std::string &databaseName, const std::string
11071110
<< "Filename: " << names[i] << ".\n";
11081111
EXIT(EXIT_FAILURE);
11091112
}
1110-
FileUtil::symlinkAbs(names[i], outDb + ext);
1113+
if (link) {
1114+
FileUtil::symlinkAbs(names[i], outDb + ext);
1115+
} else {
1116+
FileUtil::copyFile(names[i].c_str(), (outDb + ext).c_str());
1117+
}
11111118
}
11121119
}
11131120
}
@@ -1140,11 +1147,26 @@ void DBReader<T>::softlinkDb(const std::string &databaseName, const std::string
11401147
for (size_t i = 0; i < ARRAY_SIZE(suffices); ++i) {
11411148
std::string file = databaseName + suffices[i].suffix;
11421149
if (dbFilesFlags & suffices[i].flag && FileUtil::fileExists(file.c_str())) {
1143-
FileUtil::symlinkAbs(file, outDb + suffices[i].suffix);
1150+
if (link) {
1151+
FileUtil::symlinkAbs(file, outDb + suffices[i].suffix);
1152+
} else {
1153+
FileUtil::copyFile(file.c_str(), (outDb + suffices[i].suffix).c_str());
1154+
}
11441155
}
11451156
}
11461157
}
11471158

1159+
1160+
template<typename T>
1161+
void DBReader<T>::softlinkDb(const std::string &databaseName, const std::string &outDb, DBFiles::Files dbFilesFlags) {
1162+
copyLinkDb(databaseName, outDb, dbFilesFlags, true);
1163+
}
1164+
1165+
template<typename T>
1166+
void DBReader<T>::copyDb(const std::string &databaseName, const std::string &outDb, DBFiles::Files dbFilesFlags) {
1167+
copyLinkDb(databaseName, outDb, dbFilesFlags, false);
1168+
}
1169+
11481170
template<typename T>
11491171
void DBReader<T>::decomposeDomainByAminoAcid(size_t worldRank, size_t worldSize, size_t *startEntry, size_t *numEntries){
11501172
const size_t dataSize = getDataSize();

src/commons/DBReader.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ class DBReader : public MemoryTracker {
278278
static void removeDb(const std::string &databaseName);
279279

280280
static void softlinkDb(const std::string &databaseName, const std::string &outDb, DBFiles::Files dbFilesFlags = DBFiles::ALL);
281+
static void copyDb(const std::string &databaseName, const std::string &outDb, DBFiles::Files dbFilesFlags = DBFiles::ALL);
281282

282283
char *mmapData(FILE *file, size_t *dataSize);
283284

src/commons/Parameters.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ Parameters::Parameters():
229229
PARAM_EXTRACT_MODE(PARAM_EXTRACT_MODE_ID, "--extract-mode", "Extract mode", "Extract from 1: Query, 2: Target", typeid(int), (void *) &extractMode, "^[1-2]{1}$"),
230230
// convertkb
231231
PARAM_KB_COLUMNS(PARAM_KB_COLUMNS_ID, "--kb-columns", "UniprotKB columns", "list of indices of UniprotKB columns to be extracted", typeid(std::string), (void *) &kbColumns, ""),
232-
PARAM_RECOVER_DELETED(PARAM_RECOVER_DELETED_ID, "--recover-deleted", "Recover deleted", "Indicates if sequences are allowed to be be removed during updating", typeid(bool), (void *) &recoverDeleted, ""),
232+
PARAM_RECOVER_DELETED(PARAM_RECOVER_DELETED_ID, "--recover-deleted", "Recover deleted", "Find and recover deleted sequences during updating of clustering", typeid(bool), (void *) &recoverDeleted, ""),
233233
// filtertaxdb
234234
PARAM_TAXON_LIST(PARAM_TAXON_LIST_ID, "--taxon-list", "Selected taxa", "Taxonomy ID, possibly multiple values separated by ','", typeid(std::string), (void *) &taxonList, ""),
235235
// view
@@ -977,6 +977,11 @@ Parameters::Parameters():
977977
createsubdb.push_back(&PARAM_SUBDB_MODE);
978978
createsubdb.push_back(&PARAM_V);
979979
980+
// renamedbkeys
981+
renamedbkeys.push_back(&PARAM_SUBDB_MODE);
982+
renamedbkeys.push_back(&PARAM_THREADS);
983+
renamedbkeys.push_back(&PARAM_V);
984+
980985
// createtaxdb
981986
createtaxdb.push_back(&PARAM_NCBI_TAX_DUMP);
982987
createtaxdb.push_back(&PARAM_TAX_MAPPING_FILE);

src/commons/Parameters.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,7 @@ class Parameters {
10151015
std::vector<MMseqsParameter*> taxpercontig;
10161016
std::vector<MMseqsParameter*> easytaxonomy;
10171017
std::vector<MMseqsParameter*> createsubdb;
1018+
std::vector<MMseqsParameter*> renamedbkeys;
10181019
std::vector<MMseqsParameter*> createtaxdb;
10191020
std::vector<MMseqsParameter*> profile2pssm;
10201021
std::vector<MMseqsParameter*> profile2seq;

src/taxonomy/aggregatetax.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ const int ROOT_RANK = INT_MAX;
1818

1919
struct taxHit {
2020
void setByEntry(const TaxID & taxonInput, const bool useAln, const char ** taxHitData, const size_t numCols, const int voteMode) {
21-
// plain format: 3+ tax columns: taxid, rank (can be more than one col), name (can be more than one col)
22-
// taxid + aln format has 11 columns: taxid, tkey, bitscore, seqid, evalue, qs, qe, ql, ts, te, tl
2321
taxon = taxonInput;
2422
evalue = 1.0;
2523
weight = 0.0;
@@ -291,7 +289,7 @@ int aggregate(const bool useAln, int argc, const char **argv, const Command& com
291289
results = Util::skipLine(results);
292290
}
293291

294-
// aggregate - the counters will be filled by the section function:
292+
// aggregate - the counters will be filled by the selection function:
295293
size_t numAssignedSeqs = 0;
296294
size_t numUnassignedSeqs = 0;
297295
size_t numSeqsAgreeWithSelectedTaxon = 0;

0 commit comments

Comments
 (0)