|
| 1 | +/* |
| 2 | + * Copyright (c) 2025 Fastly, Kazuho Oku |
| 3 | + * |
| 4 | + * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 5 | + * of this software and associated documentation files (the "Software"), to |
| 6 | + * deal in the Software without restriction, including without limitation the |
| 7 | + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| 8 | + * sell copies of the Software, and to permit persons to whom the Software is |
| 9 | + * furnished to do so, subject to the following conditions: |
| 10 | + * |
| 11 | + * The above copyright notice and this permission notice shall be included in |
| 12 | + * all copies or substantial portions of the Software. |
| 13 | + * |
| 14 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 15 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 16 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 17 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 18 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 19 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 20 | + * IN THE SOFTWARE. |
| 21 | + */ |
| 22 | +#ifndef picotls_quiclb_h |
| 23 | +#define picotls_quiclb_h |
| 24 | + |
| 25 | +#if defined(__x86_64__) || defined(_M_X64) |
| 26 | +#include <emmintrin.h> |
| 27 | +#define PICOTLS_QUICLB_HAVE_SSE2 1 |
| 28 | +#endif |
| 29 | + |
| 30 | +union picotls_quiclb_block { |
| 31 | + uint8_t bytes[PTLS_AES_BLOCK_SIZE]; |
| 32 | + uint64_t u64[PTLS_AES_BLOCK_SIZE / sizeof(uint64_t)]; |
| 33 | +#if PICOTLS_QUICLB_HAVE_SSE2 |
| 34 | + __m128i m128; |
| 35 | +#endif |
| 36 | +}; |
| 37 | + |
| 38 | +/** |
| 39 | + * encrypts one block of AES, assuming the context is `ptls_cipher_context_t` backed by ptls_foo_aes128ecb |
| 40 | + */ |
| 41 | +static inline void picotls_quiclb_cipher_aes(void *aesecb, union picotls_quiclb_block *block) |
| 42 | +{ |
| 43 | + ptls_cipher_encrypt(aesecb, block->bytes, block->bytes, PTLS_AES_BLOCK_SIZE); |
| 44 | +} |
| 45 | + |
| 46 | +/** |
| 47 | + * calculates X ^ AES(mask_and_expand(Y)) |
| 48 | + */ |
| 49 | +static inline void picotls_quiclb_one_round(void (*aesecb_func)(void *aesecb, union picotls_quiclb_block *), void *aesecb_ctx, |
| 50 | + union picotls_quiclb_block *dest, const union picotls_quiclb_block *x, |
| 51 | + const union picotls_quiclb_block *y, const union picotls_quiclb_block *mask, |
| 52 | + const union picotls_quiclb_block *len_pass) |
| 53 | +{ |
| 54 | +#if PICOTLS_QUICLB_HAVE_SSE2 |
| 55 | + dest->m128 = _mm_or_si128(_mm_and_si128(y->m128, mask->m128), len_pass->m128); |
| 56 | +#else |
| 57 | + for (size_t i = 0; i < PTLS_ELEMENTSOF(dest->u64); ++i) |
| 58 | + dest->u64[i] = (y->u64[i] & mask->u64[i]) | len_pass->u64[i]; |
| 59 | +#endif |
| 60 | + |
| 61 | + aesecb_func(aesecb_ctx, dest); |
| 62 | + |
| 63 | +#if PICOTLS_QUICLB_HAVE_SSE2 |
| 64 | + dest->m128 = _mm_xor_si128(dest->m128, x->m128); |
| 65 | +#else |
| 66 | + for (size_t i = 0; i < PTLS_ELEMENTSOF(dest->u64); ++i) |
| 67 | + dest->u64[i] ^= x->u64[i]; |
| 68 | +#endif |
| 69 | +} |
| 70 | + |
| 71 | +static inline void picotls_quiclb_split_input(union picotls_quiclb_block *l, union picotls_quiclb_block *r, const uint8_t *input, |
| 72 | + size_t len) |
| 73 | +{ |
| 74 | + size_t i; |
| 75 | + for (i = 0; i < (len + 1) / 2; ++i) |
| 76 | + l->bytes[i] = input[i]; |
| 77 | + for (; i < PTLS_ELEMENTSOF(l->bytes); ++i) |
| 78 | + l->bytes[i] = 0; |
| 79 | + for (i = 0; i < (len + 1) / 2; ++i) |
| 80 | + r->bytes[i] = input[i + len / 2]; |
| 81 | + for (; i < PTLS_ELEMENTSOF(r->bytes); ++i) |
| 82 | + r->bytes[i] = 0; |
| 83 | +} |
| 84 | + |
| 85 | +static inline void picotls_quiclb_merge_output(uint8_t *output, size_t len, const union picotls_quiclb_block *l, |
| 86 | + const union picotls_quiclb_block *r) |
| 87 | +{ |
| 88 | + uint8_t *outp = output; |
| 89 | + |
| 90 | + for (size_t i = 0; i < len / 2; ++i) |
| 91 | + *outp++ = l->bytes[i]; |
| 92 | + |
| 93 | + if (len % 2 == 0) { |
| 94 | + for (size_t i = 0; i < len / 2; ++i) |
| 95 | + *outp++ = r->bytes[i]; |
| 96 | + } else { |
| 97 | + *outp++ = (l->bytes[len / 2] & 0xf0) | (r->bytes[0] & 0x0f); |
| 98 | + for (size_t i = 0; i < len / 2; ++i) |
| 99 | + *outp++ = r->bytes[i + 1]; |
| 100 | + } |
| 101 | +} |
| 102 | + |
| 103 | +static inline void picotls_quiclb_do_init(ptls_cipher_context_t *ctx, const void *iv) |
| 104 | +{ |
| 105 | + /* no-op */ |
| 106 | +} |
| 107 | + |
| 108 | +static inline void picotls_quiclb_transform(void (*aesecb_func)(void *aesecb, union picotls_quiclb_block *), void *aesecb_ctx, |
| 109 | + void *output, const void *input, size_t len, int encrypt) |
| 110 | +{ |
| 111 | + static const struct quiclb_mask_t { |
| 112 | + union picotls_quiclb_block l, r; |
| 113 | + } masks[] = { |
| 114 | + {{{0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff}}}, /* 7 (MIN_LEN) */ |
| 115 | + {{{0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff}}}, /* 8 */ |
| 116 | + {{{0xff, 0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff, 0xff}}}, /* 9 */ |
| 117 | + {{{0xff, 0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 10 */ |
| 118 | + {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 11 */ |
| 119 | + {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 12 */ |
| 120 | + {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 13 */ |
| 121 | + {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 14 */ |
| 122 | + {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 15 */ |
| 123 | + {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 16 */ |
| 124 | + {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, |
| 125 | + {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 17 */ |
| 126 | + {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, |
| 127 | + {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 18 */ |
| 128 | + {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, |
| 129 | + {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}} /* 19 */ |
| 130 | + }; |
| 131 | + |
| 132 | + assert(PTLS_QUICLB_MIN_BLOCK_SIZE <= len && len <= PTLS_QUICLB_MAX_BLOCK_SIZE); |
| 133 | + PTLS_BUILD_ASSERT(PTLS_QUICLB_MAX_BLOCK_SIZE == PTLS_QUICLB_MIN_BLOCK_SIZE + PTLS_ELEMENTSOF(masks) - 1); |
| 134 | + |
| 135 | + const struct quiclb_mask_t *mask = &masks[len - PTLS_QUICLB_MIN_BLOCK_SIZE]; |
| 136 | + union picotls_quiclb_block l0, r0, r1, l1, r2, l2, len_pass = {{0}}; |
| 137 | + len_pass.bytes[14] = (uint8_t)len; |
| 138 | + |
| 139 | +#define ROUND(rnd, dest, x, y, mask_side) \ |
| 140 | + do { \ |
| 141 | + len_pass.bytes[15] = (rnd); \ |
| 142 | + picotls_quiclb_one_round(aesecb_func, aesecb_ctx, &dest, &x, &y, &mask->mask_side, &len_pass); \ |
| 143 | + } while (0) |
| 144 | + |
| 145 | + if (encrypt) { |
| 146 | + picotls_quiclb_split_input(&l0, &r0, input, len); |
| 147 | + ROUND(1, r1, r0, l0, l); |
| 148 | + ROUND(2, l1, l0, r1, r); |
| 149 | + ROUND(3, r2, r1, l1, l); |
| 150 | + ROUND(4, l2, l1, r2, r); |
| 151 | + picotls_quiclb_merge_output(output, len, &l2, &r2); |
| 152 | + } else { |
| 153 | + picotls_quiclb_split_input(&l2, &r2, input, len); |
| 154 | + ROUND(4, l1, l2, r2, r); |
| 155 | + ROUND(3, r1, r2, l1, l); |
| 156 | + ROUND(2, l0, l1, r1, r); |
| 157 | + ROUND(1, r0, r1, l0, l); |
| 158 | + picotls_quiclb_merge_output(output, len, &l0, &r0); |
| 159 | + } |
| 160 | + |
| 161 | +#undef ROUND |
| 162 | +} |
| 163 | + |
| 164 | +#endif |
0 commit comments