Skip to content

Commit 08941cc

Browse files
committed
Use AESNI for AES-CTR if available
1 parent 6fb5ea5 commit 08941cc

File tree

1 file changed

+155
-2
lines changed

1 file changed

+155
-2
lines changed

Source/C++/Crypto/Ap4AesBlockCipher.cpp

Lines changed: 155 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1933,7 +1933,7 @@ AP4_AesCtrBlockCipher::Process(const AP4_UI08* input,
19331933
} else {
19341934
AP4_SetMemory(counter, 0, AP4_AES_BLOCK_SIZE);
19351935
}
1936-
1936+
19371937
// process all blocks
19381938
while (input_size) {
19391939
AP4_UI08 block[AP4_AES_BLOCK_SIZE];
@@ -1963,6 +1963,150 @@ AP4_AesCtrBlockCipher::Process(const AP4_UI08* input,
19631963
return AP4_SUCCESS;
19641964
}
19651965

1966+
#if AP4_AES_BLOCK_SIZE == 16 && AP4_AES_KEY_LENGTH == 16 && defined(__AES__) && defined(__SSE4_2__)
1967+
#define AP4_ENABLE_AESNI
1968+
1969+
#include <immintrin.h>
1970+
1971+
#define cpuid(func,ax,bx,cx,dx)\
1972+
__asm__ __volatile__ ("cpuid":\
1973+
"=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func));
1974+
1975+
static bool g_SupportAesNI = false;
1976+
__attribute__((constructor)) static void detect_aesni()
1977+
{
1978+
unsigned int a,b,c,d;
1979+
cpuid(1, a,b,c,d);
1980+
g_SupportAesNI = c & 0x2000000;
1981+
}
1982+
1983+
#endif
1984+
1985+
#ifdef AP4_ENABLE_AESNI
1986+
template <AP4_Size NB>
1987+
static void aesni_process_NB_blocks(AP4_UI08* Out, AP4_UI08 const* In, __m128i& CB, AP4_UI08 const* Keys)
1988+
{
1989+
__m128i CBs[NB];
1990+
const __m128i CB_ = CB;
1991+
#pragma unroll
1992+
for (AP4_Size i = 0; i < NB; ++i) {
1993+
CBs[i] = _mm_add_epi64(CB_, _mm_set_epi32(0,i+1,0,0));
1994+
}
1995+
CB = CBs[NB-1];
1996+
const __m128i Bswap64 = _mm_set_epi8(8,9,10,11,12,13,14,15,7,6,5,4,3,2,1,0);
1997+
#pragma unroll
1998+
for (AP4_Size i = 0; i < NB; ++i) {
1999+
CBs[i] = _mm_shuffle_epi8(CBs[i], Bswap64);
2000+
}
2001+
2002+
__m128i EncrCBs[NB];
2003+
const __m128i Key0 = _mm_loadu_si128((const __m128i*)Keys);
2004+
#pragma unroll
2005+
for (AP4_Size i = 0; i < NB; ++i) {
2006+
EncrCBs[i] = _mm_xor_si128(CBs[i], Key0);
2007+
}
2008+
#pragma unroll
2009+
for (AP4_Size R = 1; R < 10; R++) {
2010+
const __m128i Key = _mm_loadu_si128((const __m128i*)&Keys[R*AP4_AES_KEY_LENGTH]);
2011+
#pragma unroll
2012+
for (AP4_Size i = 0; i < NB; ++i) {
2013+
EncrCBs[i] = _mm_aesenc_si128(EncrCBs[i], Key);
2014+
}
2015+
}
2016+
2017+
const __m128i KeyLast = _mm_loadu_si128((const __m128i*)&Keys[10*AP4_AES_KEY_LENGTH]);
2018+
#pragma unroll
2019+
for (AP4_Size i = 0; i < NB; ++i) {
2020+
EncrCBs[i] = _mm_aesenclast_si128(EncrCBs[i],
2021+
_mm_xor_si128(KeyLast, _mm_loadu_si128((const __m128i*)&In[i*AP4_AES_BLOCK_SIZE])));
2022+
}
2023+
2024+
#pragma unroll
2025+
for (AP4_Size i = 0; i < NB; ++i) {
2026+
_mm_storeu_si128((__m128i*)&Out[i*AP4_AES_BLOCK_SIZE], EncrCBs[i]);
2027+
}
2028+
}
2029+
2030+
static void process(AP4_UI08* output, AP4_UI08 const* input, AP4_Size input_size, const AP4_UI08* iv, const AP4_UI08* Keys)
2031+
{
2032+
union {
2033+
AP4_UI08 B[AP4_AES_BLOCK_SIZE];
2034+
__m128i V;
2035+
} Counter;
2036+
2037+
if (iv) {
2038+
memcpy(&Counter.B[0], iv, AP4_AES_BLOCK_SIZE);
2039+
Counter.V = _mm_shuffle_epi8(Counter.V, _mm_set_epi8(8,9,10,11,12,13,14,15,7,6,5,4,3,2,1,0));
2040+
Counter.V = _mm_sub_epi64(Counter.V, _mm_set_epi32(0,1,0,0));
2041+
} else {
2042+
memset(&Counter.B[0], 0, AP4_AES_BLOCK_SIZE);
2043+
}
2044+
2045+
// First, process blocks eight by eight (Intel recommandation)
2046+
const AP4_Size Size8B = 8*AP4_AES_BLOCK_SIZE;
2047+
const AP4_Size End8B = (input_size/Size8B)*Size8B;
2048+
for (AP4_Size i = 0; i < End8B; i += Size8B) {
2049+
aesni_process_NB_blocks<8>(&output[i], &input[i], Counter.V, Keys);
2050+
}
2051+
// Process the remaining blocks!
2052+
const AP4_Size RemBlocks = (input_size-End8B)/AP4_AES_BLOCK_SIZE;
2053+
AP4_Size CurIdx = End8B;
2054+
switch (RemBlocks) {
2055+
#define FINAL_BLOCKS(N)\
2056+
case N:\
2057+
aesni_process_NB_blocks<N>(&output[CurIdx], &input[CurIdx], Counter.V, Keys);\
2058+
CurIdx += N*AP4_AES_BLOCK_SIZE;\
2059+
break;
2060+
2061+
FINAL_BLOCKS(7)
2062+
FINAL_BLOCKS(6)
2063+
FINAL_BLOCKS(5)
2064+
FINAL_BLOCKS(4)
2065+
FINAL_BLOCKS(3)
2066+
FINAL_BLOCKS(2)
2067+
FINAL_BLOCKS(1)
2068+
#undef FINAL_BLOCKS
2069+
}
2070+
const AP4_Size Rem = input_size-CurIdx;
2071+
assert(Rem < 16 && "too many remaining bytes!");
2072+
if (Rem > 0) {
2073+
// Last block
2074+
AP4_UI08 LastBlock[AP4_AES_BLOCK_SIZE];
2075+
memcpy(&LastBlock[0], &input[CurIdx], Rem);
2076+
aesni_process_NB_blocks<1>(&LastBlock[0], &LastBlock[0], Counter.V, Keys);
2077+
memcpy(&output[CurIdx], &LastBlock[0], Rem);
2078+
}
2079+
}
2080+
2081+
class AP4_AesNICtrBlockCipher : public AP4_AesBlockCipher
2082+
{
2083+
public:
2084+
AP4_AesNICtrBlockCipher(CipherDirection direction,
2085+
unsigned int counter_size,
2086+
aes_ctx* context) :
2087+
AP4_AesBlockCipher(direction, CTR, context)
2088+
{
2089+
assert(counter_size == 8 && "counter size must be 8 bytes!");
2090+
}
2091+
2092+
// AP4_BlockCipher methods
2093+
virtual AP4_Result Process(const AP4_UI08* input,
2094+
AP4_Size input_size,
2095+
AP4_UI08* output,
2096+
const AP4_UI08* iv);
2097+
};
2098+
2099+
AP4_Result AP4_AesNICtrBlockCipher::Process(const AP4_UI08* input,
2100+
AP4_Size input_size,
2101+
AP4_UI08* output,
2102+
const AP4_UI08* iv)
2103+
{
2104+
assert(m_Context->n_rnd == 10 && "this only works for AES128!");
2105+
process(output, input, input_size, iv, (const AP4_UI08*) &m_Context->k_sch[0]);
2106+
return AP4_SUCCESS;
2107+
}
2108+
#endif // AP4_ENABLE_AESNI
2109+
19662110
/*----------------------------------------------------------------------
19672111
| AP4_AesBlockCipher::Create
19682112
+---------------------------------------------------------------------*/
@@ -1994,7 +2138,16 @@ AP4_AesBlockCipher::Create(const AP4_UI08* key,
19942138
if (ctr_params) {
19952139
counter_size = ctr_params->counter_size;
19962140
}
1997-
cipher = new AP4_AesCtrBlockCipher(direction, counter_size, context);
2141+
#ifdef AP4_ENABLE_AESNI
2142+
if (g_SupportAesNI && (counter_size == 8)) {
2143+
cipher = new AP4_AesNICtrBlockCipher(direction, counter_size, context);
2144+
}
2145+
else {
2146+
#endif
2147+
cipher = new AP4_AesCtrBlockCipher(direction, counter_size, context);
2148+
#ifdef AP4_ENABLE_AESNI
2149+
}
2150+
#endif
19982151
break;
19992152
}
20002153

0 commit comments

Comments
 (0)