@@ -610,12 +610,16 @@ int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, in
610610 prefix_len = 6 ; // IonTorrent
611611 * fixed_len = 6 ;
612612 * is_fixed = 1 ;
613- } else if (l > 37 && d [f + 8 ] == '-' && d [f + 13 ] == '-' && d [f + 18 ] == '-' && d [f + 23 ] == '-' &&
614- ((d [f + 0 ] >= '0' && d [f + 0 ] <='9' ) || (d [f + 0 ] >= 'a' && d [f + 0 ] <= 'f' )) &&
615- ((d [f + 35 ] >= '0' && d [f + 35 ] <='9' ) || (d [f + 35 ] >= 'a' && d [f + 35 ] <= 'f' ))) {
613+ } else if (l >= 36
614+ && d [f + 8 ]== '-' && d [f + 13 ]== '-' && d [f + 18 ]== '-' && d [f + 23 ]== '-'
615+ && isxdigit ((uint8_t )d [f + 0 ]) && isxdigit ((uint8_t )d [f + 7 ])
616+ && isxdigit ((uint8_t )d [f + 9 ]) && isxdigit ((uint8_t )d [f + 12 ])
617+ && isxdigit ((uint8_t )d [f + 14 ]) && isxdigit ((uint8_t )d [f + 17 ])
618+ && isxdigit ((uint8_t )d [f + 19 ]) && isxdigit ((uint8_t )d [f + 22 ])
619+ && isxdigit ((uint8_t )d [f + 24 ]) && isxdigit ((uint8_t )d [f + 35 ])) {
616620 // ONT: f33d30d5-6eb8-4115-8f46-154c2620a5da_Basecall_1D_template...
617- prefix_len = 37 ;
618- * fixed_len = 37 ;
621+ prefix_len = 36 ;
622+ * fixed_len = 36 ;
619623 * is_fixed = 1 ;
620624 } else {
621625 // Check Illumina and trim back to lane:tile:x:y.
@@ -638,7 +642,6 @@ int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, in
638642 * is_fixed = 0 ;
639643 }
640644 }
641- //prefix_len = INT_MAX;
642645
643646 if (!ctx -> t_head ) {
644647 ctx -> t_head = calloc (1 , sizeof (* ctx -> t_head ));
@@ -647,6 +650,7 @@ int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, in
647650 }
648651
649652 // Find an item in the trie
653+ int from_punct = from ;
650654 for (nlines = i = 0 ; i < len ; i ++ , nlines ++ ) {
651655 t = ctx -> t_head ;
652656 while (i < len && data [i ] > '\n' ) {
@@ -661,24 +665,18 @@ int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, in
661665 x = x -> sibling ;
662666 t = x ;
663667
664- // t = t->next[c];
665-
666- // if (!t)
667- // return -1;
668-
669668 from = t -> n ;
669+ if ((ispunct (c ) || isspace (c )) && t -> n != n )
670+ from_punct = t -> n ;
670671 if (i == prefix_len ) p3 = t -> n ;
671- //if (t->count >= .0035*ctx->t_head->count && t->n != n) p3 = t->n; // pacbio
672- //if (i == 60) p3 = t->n; // pacbio
673- //if (i == 7) p3 = t->n; // iontorrent
674672 t -> n = n ;
675673 }
676674 }
677675
678676 //printf("Looked for %d, found %d, prefix %d\n", n, from, p3);
679677
680678 * exact = (n != from ) && len ;
681- return * exact ? from : p3 ;
679+ return * exact ? from : ( p3 != -1 ? p3 : from_punct ) ;
682680}
683681
684682
@@ -731,17 +729,8 @@ static int encode_name(name_context *ctx, char *name, int len, int mode) {
731729 encode_token_diff (ctx , cnum - pnum );
732730 int ntok = 1 ;
733731
734- // Look for common form of UUID4 names and special case them
735- i = 0 ;
736- if (len == 36 ) {
737- for (i = 0 ; i < len ; i ++ ) {
738- if (!(isxdigit ((uint8_t )name [i ]) || name [i ] == '-' ))
739- break ;
740- }
741- }
742-
743- // Is uuid4 (eg ONT).
744- if (i == len ) {
732+ if (fixed_len == 36 ) {
733+ // ONT uuid4 format data
745734 if (37 >= ctx -> max_tok ) {
746735 do {
747736 memset (& ctx -> desc [ctx -> max_tok << 4 ], 0 , 16 * sizeof (ctx -> desc [0 ]));
@@ -752,17 +741,15 @@ static int encode_name(name_context *ctx, char *name, int len, int mode) {
752741#ifdef ENC_DEBUG
753742 fprintf (stderr , "Tok %d (%d x uuid chr)" , ntok , len );
754743#endif
755- //encode_token_nop(ctx, ntok++);
756- for (i = 0 ; i < len ; i ++ , ntok ++ ) {
744+ for (i = 0 ; i < 36 ; i ++ , ntok ++ ) {
757745 encode_token_char (ctx , ntok , name [i ]);
758746 ctx -> lc [cnum ].last [ntok ].token_int = name [i ];
759747 ctx -> lc [cnum ].last [ntok ].token_type = N_CHAR ;
760748 }
761- goto end ;
762- }
763-
764- i = 0 ;
765- if (is_fixed ) {
749+ is_fixed = 0 ;
750+ i = 36 ;
751+ } else if (is_fixed ) {
752+ // Other fixed length data
766753 if (ntok >= ctx -> max_tok ) {
767754 memset (& ctx -> desc [ctx -> max_tok << 4 ], 0 , 16 * sizeof (ctx -> desc [0 ]));
768755 memset (& ctx -> token_dcount [ctx -> max_tok ], 0 , sizeof (int ));
@@ -782,6 +769,8 @@ static int encode_name(name_context *ctx, char *name, int len, int mode) {
782769 ctx -> lc [cnum ].last [ntok ].token_str = 0 ;
783770 ctx -> lc [cnum ].last [ntok ++ ].token_type = N_ALPHA ;
784771 i = fixed_len ;
772+ } else {
773+ i = 0 ;
785774 }
786775
787776 for (; i < len ; i ++ ) {
@@ -997,7 +986,6 @@ static int encode_name(name_context *ctx, char *name, int len, int mode) {
997986 //putchar(' ');
998987 }
999988
1000- end :
1001989#ifdef ENC_DEBUG
1002990 fprintf (stderr , "Tok %d (end)\n" , N_END );
1003991#endif
0 commit comments