|
54 | 54 | #define PICK_SHORT 8 |
55 | 55 | #define PICK_IUPAC 16 |
56 | 56 |
|
57 | | -#define TO_UPPER 0 |
58 | | -#define TO_LOWER 1 |
| 57 | +#define TO_UPPER 1 |
| 58 | +#define TO_LOWER 2 |
59 | 59 |
|
60 | 60 | typedef struct |
61 | 61 | { |
@@ -324,7 +324,7 @@ static void init_region(args_t *args, char *line) |
324 | 324 | { |
325 | 325 | char *ss, *se = line; |
326 | 326 | while ( *se && !isspace(*se) && *se!=':' ) se++; |
327 | | - int from = 0, to = 0; |
| 327 | + hts_pos_t from = 0, to = 0; |
328 | 328 | char tmp = 0, *tmp_ptr = NULL; |
329 | 329 | if ( *se ) |
330 | 330 | { |
@@ -356,7 +356,14 @@ static void init_region(args_t *args, char *line) |
356 | 356 | args->fa_frz_mod = -1; |
357 | 357 | args->fa_case = -1; |
358 | 358 | args->vcf_rbuf.n = 0; |
359 | | - bcf_sr_seek(args->files,line,args->fa_ori_pos); |
| 359 | + |
| 360 | + kstring_t str = {0,0,0}; |
| 361 | + if ( from==0 ) from = 1; |
| 362 | + if ( to==0 ) to = HTS_POS_MAX; |
| 363 | + ksprintf(&str,"%s:%"PRIhts_pos"-%"PRIhts_pos,line,from,to); |
| 364 | + bcf_sr_set_regions(args->files,line,0); |
| 365 | + free(str.s); |
| 366 | + |
360 | 367 | if ( tmp_ptr ) *tmp_ptr = tmp; |
361 | 368 | fprintf(args->fp_out,">%s%s\n",args->chr_prefix?args->chr_prefix:"",line); |
362 | 369 | if ( args->chain_fname ) |
@@ -466,25 +473,37 @@ static char *mark_del(char *ref, int rlen, char *alt, int mark) |
466 | 473 | static void mark_ins(char *ref, char *alt, char mark) |
467 | 474 | { |
468 | 475 | int i, nref = strlen(ref), nalt = strlen(alt); |
469 | | - if ( mark=='l' ) |
| 476 | + if ( mark==TO_LOWER ) |
470 | 477 | for (i=nref; i<nalt; i++) alt[i] = tolower(alt[i]); |
471 | | - else |
| 478 | + else if ( mark==TO_UPPER ) |
472 | 479 | for (i=nref; i<nalt; i++) alt[i] = toupper(alt[i]); |
| 480 | + else if ( mark ) |
| 481 | + for (i=nref; i<nalt; i++) alt[i] = mark; |
473 | 482 | } |
474 | 483 | static void mark_snv(char *ref, char *alt, char mark) |
475 | 484 | { |
476 | 485 | int i, nref = strlen(ref), nalt = strlen(alt); |
477 | 486 | int n = nref < nalt ? nref : nalt; |
478 | | - if ( mark=='l' ) |
| 487 | + if ( mark==TO_LOWER ) |
479 | 488 | { |
480 | 489 | for (i=0; i<n; i++) |
481 | 490 | if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = tolower(alt[i]); |
482 | 491 | } |
483 | | - else |
| 492 | + else if ( mark==TO_UPPER) |
484 | 493 | { |
485 | 494 | for (i=0; i<n; i++) |
486 | 495 | if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = toupper(alt[i]); |
487 | 496 | } |
| 497 | + else if ( mark==TO_UPPER) |
| 498 | + { |
| 499 | + for (i=0; i<n; i++) |
| 500 | + if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = toupper(alt[i]); |
| 501 | + } |
| 502 | + else if ( mark ) |
| 503 | + { |
| 504 | + for (i=0; i<n; i++) |
| 505 | + if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = mark; |
| 506 | + } |
488 | 507 | } |
489 | 508 | static void iupac_init(args_t *args, bcf1_t *rec) |
490 | 509 | { |
@@ -1099,19 +1118,18 @@ static void usage(args_t *args) |
1099 | 1118 | fprintf(stderr, " -f, --fasta-ref FILE Reference sequence in fasta format\n"); |
1100 | 1119 | fprintf(stderr, " -H, --haplotype WHICH Choose which allele to use from the FORMAT/GT field, note\n"); |
1101 | 1120 | fprintf(stderr, " the codes are case-insensitive:\n"); |
1102 | | - fprintf(stderr, " 1: first allele from GT, regardless of phasing\n"); |
1103 | | - fprintf(stderr, " 2: second allele from GT, regardless of phasing\n"); |
| 1121 | + fprintf(stderr, " N: N={1,2,3,..} is the index of the allele from GT, regardless of phasing (e.g. \"2\")\n"); |
1104 | 1122 | fprintf(stderr, " R: REF allele in het genotypes\n"); |
1105 | 1123 | fprintf(stderr, " A: ALT allele\n"); |
1106 | 1124 | fprintf(stderr, " I: IUPAC code for all genotypes\n"); |
1107 | 1125 | fprintf(stderr, " LR,LA: longer allele and REF/ALT if equal length\n"); |
1108 | 1126 | fprintf(stderr, " SR,SA: shorter allele and REF/ALT if equal length\n"); |
1109 | | - fprintf(stderr, " 1pIu,2pIu: first/second allele for phased and IUPAC code for unphased GTs\n"); |
| 1127 | + fprintf(stderr, " NpIu: index of the allele for phased and IUPAC code for unphased GTs (e.g. \"2pIu\")\n"); |
1110 | 1128 | fprintf(stderr, " -i, --include EXPR Select sites for which the expression is true (see man page for details)\n"); |
1111 | 1129 | fprintf(stderr, " -I, --iupac-codes Output IUPAC codes based on FORMAT/GT, use -s/-S to subset samples\n"); |
1112 | | - fprintf(stderr, " --mark-del CHAR Instead of removing sequence, insert CHAR for deletions\n"); |
1113 | | - fprintf(stderr, " --mark-ins uc|lc Highlight insertions in uppercase (uc) or lowercase (lc), leaving the rest as is\n"); |
1114 | | - fprintf(stderr, " --mark-snv uc|lc Highlight substitutions in uppercase (uc) or lowercase (lc), leaving the rest as is\n"); |
| 1130 | + fprintf(stderr, " --mark-del CHAR Instead of removing sequence, insert character CHAR for deletions\n"); |
| 1131 | + fprintf(stderr, " --mark-ins uc|lc|CHAR Highlight insertions in uppercase (uc), lowercase (lc), or use CHAR, leaving the rest as is\n"); |
| 1132 | + fprintf(stderr, " --mark-snv uc|lc|CHAR Highlight substitutions in uppercase (uc), lowercase (lc), or use CHAR, leaving the rest as is\n"); |
1115 | 1133 | fprintf(stderr, " -m, --mask FILE Replace regions according to the next --mask-with option. The default is --mask-with N\n"); |
1116 | 1134 | fprintf(stderr, " --mask-with CHAR|uc|lc Replace with CHAR (skips overlapping variants); change to uppercase (uc) or lowercase (lc)\n"); |
1117 | 1135 | fprintf(stderr, " -M, --missing CHAR Output CHAR instead of skipping a missing genotype \"./.\"\n"); |
@@ -1163,13 +1181,15 @@ int main_consensus(int argc, char *argv[]) |
1163 | 1181 | { |
1164 | 1182 | case 1 : args->mark_del = optarg[0]; break; |
1165 | 1183 | case 2 : |
1166 | | - if ( !strcasecmp(optarg,"uc") ) args->mark_ins = 'u'; |
1167 | | - else if ( !strcasecmp(optarg,"lc") ) args->mark_ins = 'l'; |
| 1184 | + if ( !strcasecmp(optarg,"uc") ) args->mark_ins = TO_UPPER; |
| 1185 | + else if ( !strcasecmp(optarg,"lc") ) args->mark_ins = TO_LOWER; |
| 1186 | + else if ( !optarg[1] && optarg[0]>32 && optarg[0]<127 ) args->mark_ins = optarg[0]; |
1168 | 1187 | else error("The argument is not recognised: --mark-ins %s\n",optarg); |
1169 | 1188 | break; |
1170 | 1189 | case 3 : |
1171 | | - if ( !strcasecmp(optarg,"uc") ) args->mark_snv = 'u'; |
1172 | | - else if ( !strcasecmp(optarg,"lc") ) args->mark_snv = 'l'; |
| 1190 | + if ( !strcasecmp(optarg,"uc") ) args->mark_snv = TO_UPPER; |
| 1191 | + else if ( !strcasecmp(optarg,"lc") ) args->mark_snv = TO_LOWER; |
| 1192 | + else if ( !optarg[1] && optarg[0]>32 && optarg[0]<127 ) args->mark_snv = optarg[0]; |
1173 | 1193 | else error("The argument is not recognised: --mark-snv %s\n",optarg); |
1174 | 1194 | break; |
1175 | 1195 | case 'p': args->chr_prefix = optarg; break; |
@@ -1211,7 +1231,8 @@ int main_consensus(int argc, char *argv[]) |
1211 | 1231 | { |
1212 | 1232 | char *tmp; |
1213 | 1233 | args->haplotype = strtol(optarg, &tmp, 10); |
1214 | | - if ( tmp==optarg || *tmp ) error("Error: Could not parse --haplotype %s, expected numeric argument\n", optarg); |
| 1234 | + if ( tmp==optarg || (*tmp && strcasecmp(tmp,"pIu")) ) error("Error: Could not parse \"--haplotype %s\", expected number of number followed with \"pIu\"\n", optarg); |
| 1235 | + if ( *tmp ) args->allele |= PICK_IUPAC; |
1215 | 1236 | if ( args->haplotype <=0 ) error("Error: Expected positive integer with --haplotype\n"); |
1216 | 1237 | } |
1217 | 1238 | break; |
|
0 commit comments