Skip to content

Commit c666474

Browse files
committed
Add dedicated UUID4 (ONT) recognition to the name tokeniser
A candidate for replacement of #132. We still need to do more work here on optimising name compression in a general manner, especially on mixed data sets, but this is resolves a simple case while having no impact on any other name formats.
1 parent c82e61c commit c666474

File tree

1 file changed

+32
-1
lines changed

1 file changed

+32
-1
lines changed

htscodecs/tokenise_name3.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -729,8 +729,38 @@ static int encode_name(name_context *ctx, char *name, int len, int mode) {
729729
if (!ctx->lc[cnum].last)
730730
return -1;
731731
encode_token_diff(ctx, cnum-pnum);
732-
733732
int ntok = 1;
733+
734+
// Look for common form of UUID4 names and special case them
735+
i = 0;
736+
if (len == 36) {
737+
for (i = 0; i < len; i++) {
738+
if (!(isxdigit((uint8_t)name[i]) || name[i] == '-'))
739+
break;
740+
}
741+
}
742+
743+
// Is uuid4 (eg ONT).
744+
if (i == len) {
745+
if (37 >= ctx->max_tok) {
746+
do {
747+
memset(&ctx->desc[ctx->max_tok << 4], 0, 16*sizeof(ctx->desc[0]));
748+
memset(&ctx->token_dcount[ctx->max_tok], 0, sizeof(int));
749+
memset(&ctx->token_icount[ctx->max_tok], 0, sizeof(int));
750+
} while (ctx->max_tok++ < 37);
751+
}
752+
#ifdef ENC_DEBUG
753+
fprintf(stderr, "Tok %d (%d x uuid chr)", ntok, len);
754+
#endif
755+
//encode_token_nop(ctx, ntok++);
756+
for (i = 0; i < len; i++, ntok++) {
757+
encode_token_char(ctx, ntok, name[i]);
758+
ctx->lc[cnum].last[ntok].token_int = name[i];
759+
ctx->lc[cnum].last[ntok].token_type = N_CHAR;
760+
}
761+
goto end;
762+
}
763+
734764
i = 0;
735765
if (is_fixed) {
736766
if (ntok >= ctx->max_tok) {
@@ -967,6 +997,7 @@ static int encode_name(name_context *ctx, char *name, int len, int mode) {
967997
//putchar(' ');
968998
}
969999

1000+
end:
9701001
#ifdef ENC_DEBUG
9711002
fprintf(stderr, "Tok %d (end)\n", N_END);
9721003
#endif

0 commit comments

Comments
 (0)