@@ -52,28 +52,19 @@ static inline __m256i translate_exact(__m256i in, uint8_t match, uint8_t decode)
5252 * on decode failure, returns false, else returns true on success.
5353 */
5454static inline bool decode_vec (__m256i * in ) {
55- __m256i tmp1 , tmp2 , tmp3 , tmp4 , tmp5 ;
55+ __m256i tmp1 , tmp2 , tmp3 ;
5656
5757 /*
5858 * Base64 decoding table, see RFC4648
5959 *
6060 * Note that we use multiple vector registers to try to allow the CPU to
61- * parallelize the merging ORs
61+ * paralellize the merging ORs
6262 */
6363 tmp1 = translate_range (* in , 'A' , 'Z' , 0 + 1 );
6464 tmp2 = translate_range (* in , 'a' , 'z' , 26 + 1 );
6565 tmp3 = translate_range (* in , '0' , '9' , 52 + 1 );
66- // Handle both '+' and '-' for value 62
67- tmp4 = translate_exact (* in , '+' , 62 + 1 );
68- tmp4 = _mm256_or_si256 (tmp4 , translate_exact (* in , '-' , 62 + 1 ));
69-
70- // Handle both '/' and '_' for value 63
71- tmp5 = translate_exact (* in , '/' , 63 + 1 );
72- tmp5 = _mm256_or_si256 (tmp5 , translate_exact (* in , '_' , 63 + 1 ));
73-
74- // Combine all results
75- tmp1 = _mm256_or_si256 (tmp1 , tmp4 );
76- tmp2 = _mm256_or_si256 (tmp2 , tmp5 );
66+ tmp1 = _mm256_or_si256 (tmp1 , translate_exact (* in , '+' , 62 + 1 ));
67+ tmp2 = _mm256_or_si256 (tmp2 , translate_exact (* in , '/' , 63 + 1 ));
7768 tmp3 = _mm256_or_si256 (tmp3 , _mm256_or_si256 (tmp1 , tmp2 ));
7869
7970 /*
@@ -273,28 +264,11 @@ static inline __m256i encode_chars(__m256i in) {
273264 return _mm256_or_si256 (tmp3 , _mm256_or_si256 (tmp1 , tmp2 ));
274265}
275266
276- static inline __m256i encode_chars_url_safe (__m256i in ) {
277- __m256i tmp1 , tmp2 , tmp3 ;
278-
279- /*
280- * Base64 URL encoding table, see RFC4648
281- *
282- * We again use fan-in for the ORs here.
283- */
284- tmp1 = translate_range (in , 0 , 25 , 'A' );
285- tmp2 = translate_range (in , 26 , 26 + 25 , 'a' );
286- tmp3 = translate_range (in , 52 , 61 , '0' );
287- tmp1 = _mm256_or_si256 (tmp1 , translate_exact (in , 62 , '-' ));
288- tmp2 = _mm256_or_si256 (tmp2 , translate_exact (in , 63 , '_' ));
289-
290- return _mm256_or_si256 (tmp3 , _mm256_or_si256 (tmp1 , tmp2 ));
291- }
292-
293267/*
294268 * Input: A 256-bit vector, interpreted as 24 bytes (LSB) plus 8 bytes of high-byte padding
295269 * Output: A 256-bit vector of base64 characters
296270 */
297- static inline __m256i encode_stride (__m256i vec , bool url_safe ) {
271+ static inline __m256i encode_stride (__m256i vec ) {
298272 /*
299273 * First, since byte-shuffle operations operate within 128-bit subvectors, swap around the dwords
300274 * to balance the amount of actual data between 128-bit subvectors.
@@ -355,14 +329,10 @@ static inline __m256i encode_stride(__m256i vec, bool url_safe) {
355329 vec = _mm256_or_si256 (_mm256_or_si256 (digit0 , digit1 ), _mm256_or_si256 (digit2 , digit3 ));
356330
357331 /* Finally translate to the base64 character set */
358- return url_safe ? encode_chars ( vec ) : encode_chars_url_safe (vec );
332+ return encode_chars (vec );
359333}
360334
361- void aws_common_private_base64_encode_sse41 (
362- const uint8_t * input ,
363- uint8_t * output ,
364- size_t inlen ,
365- bool url_safe_encoding ) {
335+ void aws_common_private_base64_encode_sse41 (const uint8_t * input , uint8_t * output , size_t inlen ) {
366336 __m256i instride , outstride ;
367337
368338 while (inlen >= 32 ) {
@@ -372,7 +342,7 @@ void aws_common_private_base64_encode_sse41(
372342 * of unreadable pages, so we use bounce buffers below.
373343 */
374344 instride = _mm256_loadu_si256 ((__m256i const * )input );
375- outstride = encode_stride (instride , url_safe_encoding );
345+ outstride = encode_stride (instride );
376346 _mm256_storeu_si256 ((__m256i * )output , outstride );
377347
378348 input += 24 ;
@@ -391,10 +361,10 @@ void aws_common_private_base64_encode_sse41(
391361 memset (& instride , 0 , sizeof (instride ));
392362 memcpy (& instride , input , stridelen );
393363
394- outstride = encode_stride (instride , url_safe_encoding );
364+ outstride = encode_stride (instride );
395365 memcpy (output , & outstride , outlen );
396366
397- if (! url_safe_encoding && inlen < 24 ) {
367+ if (inlen < 24 ) {
398368 if (inlen % 3 >= 1 ) {
399369 /* AA== or AAA= */
400370 output [outlen - 1 ] = '=' ;
0 commit comments