Skip to content

Commit a65c014

Browse files
authored
Merge pull request #149 from jaebeom-kim/master
More thread-safe database creation
2 parents 56836b6 + 522d991 commit a65c014

19 files changed

+364
-404
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
22
# keep old policy for setting implicit link libraries
33
# zlib causes issues in static builds otherwise
44
cmake_policy(SET CMP0060 OLD)
5-
project(metabuli CXX C)
5+
project(metabuli CXX)
66
set(CMAKE_CXX_STANDARD 14)
77
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
88
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/lib/mmseqs/cmake")

lib/fastq_utils/fastq.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ gzFile fastq_open(const char* filename,const char *mode);
6262
static void fastq_close(gzFile fd);
6363

6464
void fastq_print_version() {
65-
fprintf(stderr,"fastq_utils %s\n",FASTQ_UTIL_VERSION);
65+
fprintf(stdout,"fastq_utils %s\n",FASTQ_UTIL_VERSION);
6666
}
6767

6868

@@ -479,17 +479,17 @@ char* fastq_get_readname(FASTQ_FILE* fd, FASTQ_ENTRY* e,char* rn,unsigned long *
479479
if ( fd->readname_format == UNDEF ) {
480480
fd->is_casava_18=is_casava_1_8_readname(rn);
481481
if (fd->is_casava_18) {
482-
fprintf(stderr,"CASAVA=1.8\n");
482+
fprintf(stdout,"CASAVA=1.8\n");
483483
fd->readname_format=CASAVA18;
484484
} else {
485485
int is_int_name=is_int_readname(rn);
486486
if ( is_int_name ) {
487-
fprintf(stderr,"Read name provided as an integer\n");
487+
fprintf(stdout,"Read name provided as an integer\n");
488488
fd->readname_format=INTEGERNAME;
489489
} else {
490490
int no_suffix=is_nosuffix_readname(rn);
491491
if ( no_suffix ) {
492-
fprintf(stderr,"Read name provided with no suffix\n");
492+
fprintf(stdout,"Read name provided with no suffix\n");
493493
fd->readname_format=NOP;
494494
} else
495495
fd->readname_format=DEFAULT;
@@ -500,7 +500,7 @@ char* fastq_get_readname(FASTQ_FILE* fd, FASTQ_ENTRY* e,char* rn,unsigned long *
500500
if ( fd->space==UNDEFSPACE ) {
501501
fd->space=is_color_space(e->seq,fd);
502502
if ( fd->space==COLORSPACE ) {
503-
fprintf(stderr,"Color space\n");
503+
fprintf(stdout,"Color space\n");
504504
}
505505
}
506506

lib/fastq_utils/fastq.h

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,22 +65,57 @@ typedef long FASTQ_READ_OFFSET;
6565
#define min(a,b) (a<b?a:b)
6666
#define max(a,b) (a>b?a:b)
6767

68-
69-
#define PRINT_INFO(s...) {fprintf(stderr,"INFO:"); fprintf(stderr,##s );fprintf(stderr,"\n");}
70-
#define PRINT_ERROR(s...) {fprintf(stderr,"\nERROR: "); fprintf(stderr,##s );fprintf(stderr,"\n");}
71-
#define FATAL_ERROR(e,s...) {fprintf(stderr,"\nERROR: "); fprintf(stderr,##s );fprintf(stderr,"\n");exit(e);}
72-
68+
// Print informational message
69+
#define PRINT_INFO(...) \
70+
do { \
71+
fprintf(stdout, "INFO: "); \
72+
fprintf(stdout, __VA_ARGS__); \
73+
fprintf(stdout, "\n"); \
74+
} while (0)
75+
76+
// Print error message
77+
#define PRINT_ERROR(...) \
78+
do { \
79+
fprintf(stdout, "\nERROR: "); \
80+
fprintf(stdout, __VA_ARGS__); \
81+
fprintf(stdout, "\n"); \
82+
} while (0)
83+
84+
// Print fatal error message and exit
85+
#define FATAL_ERROR(code, ...) \
86+
do { \
87+
fprintf(stdout, "\nERROR: "); \
88+
fprintf(stdout, __VA_ARGS__); \
89+
fprintf(stdout, "\n"); \
90+
exit(code); \
91+
} while (0)
92+
93+
// Print debug message (only if DEBUG is defined)
7394
#ifdef DEBUG
74-
#define PRINT_DEBUG(s...) { fprintf(stderr,"DEBUG: "); fprintf(stderr,##s ); }
95+
#define PRINT_DEBUG(...) \
96+
do { \
97+
fprintf(stdout, "DEBUG: "); \
98+
fprintf(stdout, __VA_ARGS__); \
99+
} while (0)
75100
#else
76-
#define PRINT_DEBUG(s...)
101+
#define PRINT_DEBUG(...) do {} while (0)
77102
#endif
78103

104+
// #define PRINT_INFO(s...) {fprintf(stderr,"INFO:"); fprintf(stderr,##s );fprintf(stderr,"\n");}
105+
// #define PRINT_ERROR(s...) {fprintf(stderr,"\nERROR: "); fprintf(stderr,##s );fprintf(stderr,"\n");}
106+
// #define FATAL_ERROR(e,s...) {fprintf(stderr,"\nERROR: "); fprintf(stderr,##s );fprintf(stderr,"\n");exit(e);}
107+
108+
// #ifdef DEBUG
109+
// #define PRINT_DEBUG(s...) { fprintf(stderr,"DEBUG: "); fprintf(stderr,##s ); }
110+
// #else
111+
// #define PRINT_DEBUG(s...)
112+
// #endif
113+
79114
#define PARAMS_ERROR_EXIT_STATUS 1
80115
#define SYS_INT_ERROR_EXIT_STATUS 2
81116
#define FASTQ_FORMAT_ERROR_EXIT_STATUS 3
82117

83-
#define PRINT_READS_PROCESSED(c,n) { if (c%n==0) { fprintf(stderr,"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%lu",c);fflush(stderr); }}
118+
#define PRINT_READS_PROCESSED(c,n) { if (c%n==0) { fprintf(stdout,"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%lu",c);fflush(stdout); }}
84119

85120
extern unsigned long index_mem;
86121
extern char* encodings[];

lib/fastq_utils/fastq_info.cpp

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <stdlib.h>
3030
#include <regex.h>
3131
#include <zlib.h>
32+
#include <inttypes.h>
3233

3334

3435
#include "hash.h"
@@ -58,7 +59,7 @@ static inline unsigned int median_rl(FASTQ_FILE* fd1,FASTQ_FILE* fd2) {
5859
FASTQ_FILE* validate_interleaved(char *f) {
5960
//unsigned long cline=1;
6061

61-
fprintf(stderr,"Paired-end interleaved\n");
62+
fprintf(stdout,"Paired-end interleaved\n");
6263

6364
FASTQ_FILE* fd1=fastq_new(f,FALSE,"r");
6465
fastq_is_pe(fd1);
@@ -237,7 +238,7 @@ int fastq_info_main(int argc, char **argv ) {
237238
break;
238239
case 'f':
239240
//fix_dot = TRUE;
240-
fprintf(stderr,"Fixing (-f) enabled: Replacing . by N (creating .fix.gz files)\n");
241+
fprintf(stdout,"Fixing (-f) enabled: Replacing . by N (creating .fix.gz files)\n");
241242
PRINT_ERROR("-f option is no longer valid.");
242243
exit(PARAMS_ERROR_EXIT_STATUS);
243244
++nopt;
@@ -276,30 +277,31 @@ int fastq_info_main(int argc, char **argv ) {
276277
fd1=validate_interleaved(argv[1+nopt]);
277278
num_reads1=fd1->num_rds;
278279
} else if ( is_paired_data && is_sorted && skip_readname_check ) {
279-
fprintf(stderr,"-s option used: assuming that reads have the same ordering in both files\n");
280+
fprintf(stdout,"-s option used: assuming that reads have the same ordering in both files\n");
280281
fd1=validate_paired_sorted_fastq_file(argv[1+nopt],argv[2+nopt]);
281282
num_reads1=fd1->num_rds;
282283

283284
} else if ( !is_paired_data && skip_readname_check) {
284285
// SE & skip readname check
285-
fprintf(stderr,"Skipping check for duplicated read names\n");
286+
fprintf(stdout,"Skipping check for duplicated read names\n");
286287
fd1=validate_single_fastq_file(argv[1+nopt]);
287288
num_reads1=fd1->num_rds;
288289
} else {
289290
// single or pair of fastq file(s)
290291
fd1=fastq_new(argv[1+nopt],FALSE,"r");
291292
if ( is_paired_data) fastq_is_pe(fd1);
292-
fprintf(stderr,"DEFAULT_HASHSIZE=%lu\n",(long unsigned int)DEFAULT_HASHSIZE);
293+
fprintf(stdout,"DEFAULT_HASHSIZE=%lu\n",(long unsigned int)DEFAULT_HASHSIZE);
293294
index=new_hashtable(DEFAULT_HASHSIZE);
294295
index_mem+=sizeof(hashtable);
295-
fprintf(stderr,"Scanning and indexing all reads from %s\n",fd1->filename);
296+
fprintf(stdout,"Scanning and indexing all reads from %s\n",fd1->filename);
296297
fastq_index_readnames(fd1,index,0,FALSE);
297-
fprintf(stderr,"Scanning complete.\n");
298+
fprintf(stdout,"Scanning complete.\n");
298299
num_reads1=index->n_entries;
299-
fprintf(stderr,"\n");
300+
fprintf(stdout,"\n");
300301
// print some info
301-
fprintf(stderr,"Reads processed: %llu\n",index->n_entries);
302-
fprintf(stderr,"Memory used in indexing: ~%ld MB\n",index_mem/1024/1024);
302+
fprintf(stdout, "Reads processed: %" PRIu64 "\n", index->n_entries);
303+
// fprintf(stdout,"Reads processed: %llu\n",index->n_entries);
304+
fprintf(stdout,"Memory used in indexing: ~%ld MB\n",index_mem/1024/1024);
303305
}
304306

305307
if (num_reads1 == 0 ) {
@@ -321,8 +323,8 @@ int fastq_info_main(int argc, char **argv ) {
321323

322324
// pair-end
323325
if (argc-nopt ==3 && !is_interleaved && ! is_sorted ) {
324-
fprintf(stderr,"File %s processed\n",argv[1+nopt]);
325-
fprintf(stderr,"Next file %s\n",argv[2+nopt]);
326+
fprintf(stdout,"File %s processed\n",argv[1+nopt]);
327+
fprintf(stdout,"Next file %s\n",argv[2+nopt]);
326328
// validate the second file and check if all reads are paired
327329
fd2=fastq_new(argv[2+nopt],FALSE,"r");
328330
fastq_is_pe(fd2);
@@ -352,7 +354,8 @@ int fastq_info_main(int argc, char **argv ) {
352354
printf("\n");
353355
//fastq_destroy(fdf);//???
354356
if (index->n_entries>0 ) {
355-
PRINT_ERROR("Error in file %s: found %llu unpaired reads",argv[1+nopt],index->n_entries);
357+
PRINT_ERROR("Error in file %s: found %" PRIu64 " unpaired reads",argv[1+nopt],index->n_entries);
358+
// PRINT_ERROR("Error in file %s: found %llu unpaired reads",argv[1+nopt],index->n_entries);
356359
exit(FASTQ_FORMAT_ERROR_EXIT_STATUS);
357360
}
358361
// stats
@@ -365,7 +368,7 @@ int fastq_info_main(int argc, char **argv ) {
365368
// stats
366369
// min qual/max qual/read len
367370
FILE* out;
368-
out=stderr;
371+
out=stdout;
369372

370373
fprintf(out,"------------------------------------\n");
371374
if ( num_reads2>0 ) {

src/commons/FileMerger.cpp

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ void FileMerger::updateTaxId2SpeciesTaxId(const string & taxIdListFileName) {
3737
}
3838

3939
char taxID[100];
40-
while(feof(taxIdFile) == 0) {
41-
fscanf(taxIdFile,"%s",taxID);
40+
while(fscanf(taxIdFile,"%s",taxID) == 1) {
4241
TaxID taxId = atol(taxID);
4342
TaxonNode const * taxon = taxonomy->taxonNode(taxId);
4443
if (taxId == taxon->taxId){
@@ -72,20 +71,7 @@ void FileMerger::printTaxIdList(const string & taxIdListFileName) {
7271
for (auto & taxid: taxId2speciesId) {
7372
fprintf(taxidListFile, "%d\n", taxid.first);
7473
}
75-
76-
77-
FILE * taxIdFile;
78-
if((taxIdFile = fopen(taxIdListFileName.c_str(),"r")) == NULL){
79-
cout << "Cannot open the taxID list file: " << taxIdListFileName << endl;
80-
return;
81-
}
82-
83-
char taxID[100];
84-
while(feof(taxIdFile) == 0) {
85-
fscanf(taxIdFile,"%s",taxID);
86-
cout << taxID << endl;
87-
}
88-
fclose(taxIdFile);
74+
fclose(taxidListFile);
8975
}
9076

9177
void FileMerger::mergeTargetFiles() {

0 commit comments

Comments
 (0)