@@ -610,12 +610,16 @@ int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, in
610610 prefix_len = 6 ; // IonTorrent
611611 * fixed_len = 6 ;
612612 * is_fixed = 1 ;
613- } else if (l > 37 && d [f + 8 ] == '-' && d [f + 13 ] == '-' && d [f + 18 ] == '-' && d [f + 23 ] == '-' &&
614- ((d [f + 0 ] >= '0' && d [f + 0 ] <='9' ) || (d [f + 0 ] >= 'a' && d [f + 0 ] <= 'f' )) &&
615- ((d [f + 35 ] >= '0' && d [f + 35 ] <='9' ) || (d [f + 35 ] >= 'a' && d [f + 35 ] <= 'f' ))) {
613+ } else if (l >= 36
614+ && d [f + 8 ]== '-' && d [f + 13 ]== '-' && d [f + 18 ]== '-' && d [f + 23 ]== '-'
615+ && isxdigit ((uint8_t )d [f + 0 ]) && isxdigit ((uint8_t )d [f + 7 ])
616+ && isxdigit ((uint8_t )d [f + 9 ]) && isxdigit ((uint8_t )d [f + 12 ])
617+ && isxdigit ((uint8_t )d [f + 14 ]) && isxdigit ((uint8_t )d [f + 17 ])
618+ && isxdigit ((uint8_t )d [f + 19 ]) && isxdigit ((uint8_t )d [f + 22 ])
619+ && isxdigit ((uint8_t )d [f + 24 ]) && isxdigit ((uint8_t )d [f + 35 ])) {
616620 // ONT: f33d30d5-6eb8-4115-8f46-154c2620a5da_Basecall_1D_template...
617- prefix_len = 37 ;
618- * fixed_len = 37 ;
621+ prefix_len = 36 ;
622+ * fixed_len = 36 ;
619623 * is_fixed = 1 ;
620624 } else {
621625 // Check Illumina and trim back to lane:tile:x:y.
@@ -638,7 +642,6 @@ int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, in
638642 * is_fixed = 0 ;
639643 }
640644 }
641- //prefix_len = INT_MAX;
642645
643646 if (!ctx -> t_head ) {
644647 ctx -> t_head = calloc (1 , sizeof (* ctx -> t_head ));
@@ -647,6 +650,7 @@ int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, in
647650 }
648651
649652 // Find an item in the trie
653+ int from_punct = from ;
650654 for (nlines = i = 0 ; i < len ; i ++ , nlines ++ ) {
651655 t = ctx -> t_head ;
652656 while (i < len && data [i ] > '\n' ) {
@@ -661,24 +665,18 @@ int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, in
661665 x = x -> sibling ;
662666 t = x ;
663667
664- // t = t->next[c];
665-
666- // if (!t)
667- // return -1;
668-
669668 from = t -> n ;
669+ if ((ispunct (c ) || isspace (c )) && t -> n != n )
670+ from_punct = t -> n ;
670671 if (i == prefix_len ) p3 = t -> n ;
671- //if (t->count >= .0035*ctx->t_head->count && t->n != n) p3 = t->n; // pacbio
672- //if (i == 60) p3 = t->n; // pacbio
673- //if (i == 7) p3 = t->n; // iontorrent
674672 t -> n = n ;
675673 }
676674 }
677675
678676 //printf("Looked for %d, found %d, prefix %d\n", n, from, p3);
679677
680678 * exact = (n != from ) && len ;
681- return * exact ? from : p3 ;
679+ return * exact ? from : ( p3 != -1 ? p3 : from_punct ) ;
682680}
683681
684682
@@ -729,10 +727,29 @@ static int encode_name(name_context *ctx, char *name, int len, int mode) {
729727 if (!ctx -> lc [cnum ].last )
730728 return -1 ;
731729 encode_token_diff (ctx , cnum - pnum );
732-
733730 int ntok = 1 ;
734- i = 0 ;
735- if (is_fixed ) {
731+
732+ if (fixed_len == 36 ) {
733+ // ONT uuid4 format data
734+ if (37 >= ctx -> max_tok ) {
735+ do {
736+ memset (& ctx -> desc [ctx -> max_tok << 4 ], 0 , 16 * sizeof (ctx -> desc [0 ]));
737+ memset (& ctx -> token_dcount [ctx -> max_tok ], 0 , sizeof (int ));
738+ memset (& ctx -> token_icount [ctx -> max_tok ], 0 , sizeof (int ));
739+ } while (ctx -> max_tok ++ < 37 );
740+ }
741+ #ifdef ENC_DEBUG
742+ fprintf (stderr , "Tok %d (%d x uuid chr)" , ntok , len );
743+ #endif
744+ for (i = 0 ; i < 36 ; i ++ , ntok ++ ) {
745+ encode_token_char (ctx , ntok , name [i ]);
746+ ctx -> lc [cnum ].last [ntok ].token_int = name [i ];
747+ ctx -> lc [cnum ].last [ntok ].token_type = N_CHAR ;
748+ }
749+ is_fixed = 0 ;
750+ i = 36 ;
751+ } else if (is_fixed ) {
752+ // Other fixed length data
736753 if (ntok >= ctx -> max_tok ) {
737754 memset (& ctx -> desc [ctx -> max_tok << 4 ], 0 , 16 * sizeof (ctx -> desc [0 ]));
738755 memset (& ctx -> token_dcount [ctx -> max_tok ], 0 , sizeof (int ));
@@ -752,6 +769,8 @@ static int encode_name(name_context *ctx, char *name, int len, int mode) {
752769 ctx -> lc [cnum ].last [ntok ].token_str = 0 ;
753770 ctx -> lc [cnum ].last [ntok ++ ].token_type = N_ALPHA ;
754771 i = fixed_len ;
772+ } else {
773+ i = 0 ;
755774 }
756775
757776 for (; i < len ; i ++ ) {
0 commit comments