Skip to content

Commit 5abaea3

Browse files
Paul Eggerteggert
authored andcommitted
Tune base64 decoding
This improves performance of base64-decode-region by about 7.5% on my platform, and gets rid of some macros. * src/fns.c (IS_ASCII, IS_BASE64, IS_BASE64_IGNORABLE) (READ_QUADRUPLET_BYTE): Remove. (base64_value_to_char, base64_char_to_value): Now an array of two arrays. All uses changed. (base64url_value_to_char, base64url_char_to_value): Remove. All uses changed to the other array. (base64_char_to_value): Entries are now of type signed char, not short, since we can assume C99. Use C99 initializers; this is clearer and caters to the (theoretical) possibility of systems that do not use ASCII or do not have 8-bit bytes. Allow any index in the range 0..UCHAR_MAX instead of limiting it to 0..127, so that uses need not check for in-range indexes. Also record padding chars. All uses changed. (base64_decode_1): Always store number of chars in *NCHARS_RETURN, for simplicity. All callers changed. Speed up the byte-fetching.
1 parent 1043cd3 commit 5abaea3

File tree

1 file changed

+124
-97
lines changed

1 file changed

+124
-97
lines changed

src/fns.c

Lines changed: 124 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -3186,86 +3186,69 @@ The data read from the system are decoded using `locale-coding-system'. */)
31863186

31873187
#define MIME_LINE_LENGTH 76
31883188

3189-
#define IS_ASCII(Character) \
3190-
((Character) < 128)
3191-
#define IS_BASE64(Character) \
3192-
(IS_ASCII (Character) && b64_char_to_value[Character] >= 0)
3193-
#define IS_BASE64_IGNORABLE(Character) \
3194-
((Character) == ' ' || (Character) == '\t' || (Character) == '\n' \
3195-
|| (Character) == '\f' || (Character) == '\r')
3196-
3197-
/* Used by base64_decode_1 to retrieve a non-base64-ignorable
3198-
character or return retval if there are no characters left to
3199-
process. */
3200-
#define READ_QUADRUPLET_BYTE(retval) \
3201-
do \
3202-
{ \
3203-
if (i == length) \
3204-
{ \
3205-
if (nchars_return) \
3206-
*nchars_return = nchars; \
3207-
return (retval); \
3208-
} \
3209-
c = from[i++]; \
3210-
} \
3211-
while (IS_BASE64_IGNORABLE (c))
3212-
3213-
/* Table of characters coding the 64 values. */
3214-
static const char base64_value_to_char[64] =
3189+
/* Tables of characters coding the 64 values. */
3190+
static char const base64_value_to_char[2][64] =
32153191
{
3192+
/* base64 */
3193+
{
32163194
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', /* 0- 9 */
32173195
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', /* 10-19 */
32183196
'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', /* 20-29 */
32193197
'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', /* 30-39 */
32203198
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', /* 40-49 */
32213199
'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', /* 50-59 */
32223200
'8', '9', '+', '/' /* 60-63 */
3223-
};
3224-
3225-
static const char base64url_value_to_char[64] =
3226-
{
3201+
},
3202+
/* base64url */
3203+
{
32273204
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', /* 0- 9 */
32283205
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', /* 10-19 */
32293206
'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', /* 20-29 */
32303207
'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', /* 30-39 */
32313208
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', /* 40-49 */
32323209
'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', /* 50-59 */
32333210
'8', '9', '-', '_' /* 60-63 */
3211+
}
32343212
};
32353213

3236-
/* Table of base64 values for first 128 characters. */
3237-
static const short base64_char_to_value[128] =
3238-
{
3239-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */
3240-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */
3241-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20- 29 */
3242-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 30- 39 */
3243-
-1, -1, -1, 62, -1, -1, -1, 63, 52, 53, /* 40- 49 */
3244-
54, 55, 56, 57, 58, 59, 60, 61, -1, -1, /* 50- 59 */
3245-
-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, /* 60- 69 */
3246-
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 70- 79 */
3247-
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 80- 89 */
3248-
25, -1, -1, -1, -1, -1, -1, 26, 27, 28, /* 90- 99 */
3249-
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, /* 100-109 */
3250-
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */
3251-
49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */
3252-
};
3253-
3254-
static const short base64url_char_to_value[128] =
3255-
{
3256-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */
3257-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */
3258-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20- 29 */
3259-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 30- 39 */
3260-
-1, -1, -1, -1, -1, 62, -1, -1, 52, 53, /* 40- 49 */
3261-
54, 55, 56, 57, 58, 59, 60, 61, -1, -1, /* 50- 59 */
3262-
-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, /* 60- 69 */
3263-
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 70- 79 */
3264-
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 80- 89 */
3265-
25, -1, -1, -1, -1, 63, -1, 26, 27, 28, /* 90- 99 */
3266-
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, /* 100-109 */
3267-
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */
3268-
49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */
3214+
/* Tables of base64 values for bytes. -1 means ignorable, 0 invalid,
3215+
positive means 1 + the represented value. */
3216+
static signed char const base64_char_to_value[2][UCHAR_MAX] =
3217+
{
3218+
/* base64 */
3219+
{
3220+
['\t']= -1, ['\n']= -1, ['\f']= -1, ['\r']= -1, [' '] = -1,
3221+
['A'] = 1, ['B'] = 2, ['C'] = 3, ['D'] = 4, ['E'] = 5,
3222+
['F'] = 6, ['G'] = 7, ['H'] = 8, ['I'] = 9, ['J'] = 10,
3223+
['K'] = 11, ['L'] = 12, ['M'] = 13, ['N'] = 14, ['O'] = 15,
3224+
['P'] = 16, ['Q'] = 17, ['R'] = 18, ['S'] = 19, ['T'] = 20,
3225+
['U'] = 21, ['V'] = 22, ['W'] = 23, ['X'] = 24, ['Y'] = 25, ['Z'] = 26,
3226+
['a'] = 27, ['b'] = 28, ['c'] = 29, ['d'] = 30, ['e'] = 31,
3227+
['f'] = 32, ['g'] = 33, ['h'] = 34, ['i'] = 35, ['j'] = 36,
3228+
['k'] = 37, ['l'] = 38, ['m'] = 39, ['n'] = 40, ['o'] = 41,
3229+
['p'] = 42, ['q'] = 43, ['r'] = 44, ['s'] = 45, ['t'] = 46,
3230+
['u'] = 47, ['v'] = 48, ['w'] = 49, ['x'] = 50, ['y'] = 51, ['z'] = 52,
3231+
['0'] = 53, ['1'] = 54, ['2'] = 55, ['3'] = 56, ['4'] = 57,
3232+
['5'] = 58, ['6'] = 59, ['7'] = 60, ['8'] = 61, ['9'] = 62,
3233+
['+'] = 63, ['/'] = 64
3234+
},
3235+
/* base64url */
3236+
{
3237+
['\t']= -1, ['\n']= -1, ['\f']= -1, ['\r']= -1, [' '] = -1,
3238+
['A'] = 1, ['B'] = 2, ['C'] = 3, ['D'] = 4, ['E'] = 5,
3239+
['F'] = 6, ['G'] = 7, ['H'] = 8, ['I'] = 9, ['J'] = 10,
3240+
['K'] = 11, ['L'] = 12, ['M'] = 13, ['N'] = 14, ['O'] = 15,
3241+
['P'] = 16, ['Q'] = 17, ['R'] = 18, ['S'] = 19, ['T'] = 20,
3242+
['U'] = 21, ['V'] = 22, ['W'] = 23, ['X'] = 24, ['Y'] = 25, ['Z'] = 26,
3243+
['a'] = 27, ['b'] = 28, ['c'] = 29, ['d'] = 30, ['e'] = 31,
3244+
['f'] = 32, ['g'] = 33, ['h'] = 34, ['i'] = 35, ['j'] = 36,
3245+
['k'] = 37, ['l'] = 38, ['m'] = 39, ['n'] = 40, ['o'] = 41,
3246+
['p'] = 42, ['q'] = 43, ['r'] = 44, ['s'] = 45, ['t'] = 46,
3247+
['u'] = 47, ['v'] = 48, ['w'] = 49, ['x'] = 50, ['y'] = 51, ['z'] = 52,
3248+
['0'] = 53, ['1'] = 54, ['2'] = 55, ['3'] = 56, ['4'] = 57,
3249+
['5'] = 58, ['6'] = 59, ['7'] = 60, ['8'] = 61, ['9'] = 62,
3250+
['-'] = 63, ['_'] = 64
3251+
}
32693252
};
32703253

32713254
/* The following diagram shows the logical steps by which three octets
@@ -3454,7 +3437,7 @@ base64_encode_1 (const char *from, char *to, ptrdiff_t length,
34543437
int c;
34553438
unsigned int value;
34563439
int bytes;
3457-
char const *b64_value_to_char = (base64url) ? base64url_value_to_char : base64_value_to_char;
3440+
char const *b64_value_to_char = base64_value_to_char[base64url];
34583441

34593442
while (i < length)
34603443
{
@@ -3632,8 +3615,9 @@ the base 64 encoding, as defined in RFC 4648. */)
36323615
decoded = SAFE_ALLOCA (length);
36333616

36343617
/* The decoded result should be unibyte. */
3618+
ptrdiff_t decoded_chars;
36353619
decoded_length = base64_decode_1 (SSDATA (string), decoded, length,
3636-
!NILP (base64url), 0, NULL);
3620+
!NILP (base64url), 0, &decoded_chars);
36373621
if (decoded_length > length)
36383622
emacs_abort ();
36393623
else if (decoded_length >= 0)
@@ -3650,89 +3634,132 @@ the base 64 encoding, as defined in RFC 4648. */)
36503634

36513635
/* Base64-decode the data at FROM of LENGTH bytes into TO. If
36523636
MULTIBYTE, the decoded result should be in multibyte
3653-
form. If NCHARS_RETURN is not NULL, store the number of produced
3654-
characters in *NCHARS_RETURN. */
3637+
form. Store the number of produced characters in *NCHARS_RETURN. */
36553638

36563639
static ptrdiff_t
36573640
base64_decode_1 (const char *from, char *to, ptrdiff_t length,
36583641
bool base64url,
36593642
bool multibyte, ptrdiff_t *nchars_return)
36603643
{
3661-
ptrdiff_t i = 0; /* Used inside READ_QUADRUPLET_BYTE */
3644+
char const *f = from;
3645+
char const *flim = from + length;
36623646
char *e = to;
3663-
unsigned char c;
3664-
unsigned long value;
36653647
ptrdiff_t nchars = 0;
3666-
short const *b64_char_to_value = (base64url) ? base64url_char_to_value : base64_char_to_value;
3648+
signed char const *b64_char_to_value = base64_char_to_value[base64url];
3649+
unsigned char multibyte_bit = multibyte << 7;
36673650

3668-
while (1)
3651+
while (true)
36693652
{
3653+
unsigned char c;
3654+
int v1;
3655+
36703656
/* Process first byte of a quadruplet. */
36713657

3672-
READ_QUADRUPLET_BYTE (e-to);
3658+
do
3659+
{
3660+
if (f == flim)
3661+
{
3662+
*nchars_return = nchars;
3663+
return e - to;
3664+
}
3665+
c = *f++;
3666+
v1 = b64_char_to_value[c];
3667+
}
3668+
while (v1 < 0);
36733669

3674-
if (!IS_BASE64 (c))
3670+
if (v1 == 0)
36753671
return -1;
3676-
value = b64_char_to_value[c] << 18;
3672+
unsigned int value = (v1 - 1) << 18;
36773673

36783674
/* Process second byte of a quadruplet. */
36793675

3680-
READ_QUADRUPLET_BYTE (-1);
3676+
do
3677+
{
3678+
if (f == flim)
3679+
return -1;
3680+
c = *f++;
3681+
v1 = b64_char_to_value[c];
3682+
}
3683+
while (v1 < 0);
36813684

3682-
if (!IS_BASE64 (c))
3685+
if (v1 == 0)
36833686
return -1;
3684-
value |= b64_char_to_value[c] << 12;
3687+
value += (v1 - 1) << 12;
36853688

3686-
c = (unsigned char) (value >> 16);
3687-
if (multibyte && c >= 128)
3689+
c = value >> 16 & 0xff;
3690+
if (c & multibyte_bit)
36883691
e += BYTE8_STRING (c, e);
36893692
else
36903693
*e++ = c;
36913694
nchars++;
36923695

36933696
/* Process third byte of a quadruplet. */
36943697

3695-
if (!base64url)
3696-
READ_QUADRUPLET_BYTE (-1);
3697-
else
3698-
READ_QUADRUPLET_BYTE (e-to);
3698+
do
3699+
{
3700+
if (f == flim)
3701+
{
3702+
if (!base64url)
3703+
return -1;
3704+
*nchars_return = nchars;
3705+
return e - to;
3706+
}
3707+
c = *f++;
3708+
v1 = b64_char_to_value[c];
3709+
}
3710+
while (v1 < 0);
36993711

37003712
if (c == '=')
37013713
{
3702-
READ_QUADRUPLET_BYTE (-1);
3714+
do
3715+
{
3716+
if (f == flim)
3717+
return -1;
3718+
c = *f++;
3719+
}
3720+
while (b64_char_to_value[c] < 0);
37033721

37043722
if (c != '=')
37053723
return -1;
37063724
continue;
37073725
}
37083726

3709-
if (!IS_BASE64 (c))
3727+
if (v1 == 0)
37103728
return -1;
3711-
value |= b64_char_to_value[c] << 6;
3729+
value += (v1 - 1) << 6;
37123730

3713-
c = (unsigned char) (0xff & value >> 8);
3714-
if (multibyte && c >= 128)
3731+
c = value >> 8 & 0xff;
3732+
if (c & multibyte_bit)
37153733
e += BYTE8_STRING (c, e);
37163734
else
37173735
*e++ = c;
37183736
nchars++;
37193737

37203738
/* Process fourth byte of a quadruplet. */
37213739

3722-
if (!base64url)
3723-
READ_QUADRUPLET_BYTE (-1);
3724-
else
3725-
READ_QUADRUPLET_BYTE (e-to);
3740+
do
3741+
{
3742+
if (f == flim)
3743+
{
3744+
if (!base64url)
3745+
return -1;
3746+
*nchars_return = nchars;
3747+
return e - to;
3748+
}
3749+
c = *f++;
3750+
v1 = b64_char_to_value[c];
3751+
}
3752+
while (v1 < 0);
37263753

37273754
if (c == '=')
37283755
continue;
37293756

3730-
if (!IS_BASE64 (c))
3757+
if (v1 < 0)
37313758
return -1;
3732-
value |= b64_char_to_value[c];
3759+
value += v1 - 1;
37333760

3734-
c = (unsigned char) (0xff & value);
3735-
if (multibyte && c >= 128)
3761+
c = value & 0xff;
3762+
if (c & multibyte_bit)
37363763
e += BYTE8_STRING (c, e);
37373764
else
37383765
*e++ = c;

0 commit comments

Comments
 (0)