84#ifndef CRYPTOPP_IMPORTS
85#ifndef CRYPTOPP_GENERATE_X64_MASM
93#if (CRYPTOPP_MSC_VERSION >= 1910) && (CRYPTOPP_MSC_VERSION <= 1916)
94# ifndef CRYPTOPP_DEBUG
95# pragma optimize("", off)
96# pragma optimize("ts", on)
103#if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE))
104# define CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS 1
108#define M128I_CAST(x) ((__m128i *)(void *)(x))
109#define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x))
111#if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
112# if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
113namespace rdtable {CRYPTOPP_ALIGN_DATA(16)
word64 Te[256+2];}
114using namespace rdtable;
120# if defined(CRYPTOPP_X64_MASM_AVAILABLE)
122namespace rdtable {CRYPTOPP_ALIGN_DATA(16)
word64 Te[256+2];}
124CRYPTOPP_ALIGN_DATA(16) static
word32 Te[256*4];
125CRYPTOPP_ALIGN_DATA(16) static
word32 Td[256*4];
128static volatile bool s_TeFilled =
false, s_TdFilled =
false;
130ANONYMOUS_NAMESPACE_BEGIN
132#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
157static inline bool AliasedWithTable(
const byte *begin,
const byte *end)
159 ptrdiff_t s0 = uintptr_t(begin)%4096, s1 = uintptr_t(end)%4096;
160 ptrdiff_t t0 = uintptr_t(Te)%4096, t1 = (uintptr_t(Te)+
sizeof(Te))%4096;
162 return (s0 >= t0 && s0 < t1) || (s1 > t0 && s1 <= t1);
164 return (s0 < t1 || s1 <= t1) || (s0 >= t0 || s1 > t0);
169 word32 subkeys[4*12], workspace[8];
170 const byte *inBlocks, *inXorBlocks, *outXorBlocks;
172 size_t inIncrement, inXorIncrement, outXorIncrement, outIncrement;
173 size_t regSpill, lengthAndCounterFlag, keysBegin;
176const size_t s_aliasPageSize = 4096;
177const size_t s_aliasBlockSize = 256;
178const size_t s_sizeToAllocate = s_aliasPageSize + s_aliasBlockSize +
sizeof(Locals);
182ANONYMOUS_NAMESPACE_END
186#define QUARTER_ROUND(L, T, t, a, b, c, d) \
187 a ^= L(T, 3, byte(t)); t >>= 8;\
188 b ^= L(T, 2, byte(t)); t >>= 8;\
189 c ^= L(T, 1, byte(t)); t >>= 8;\
192#define QUARTER_ROUND_LE(t, a, b, c, d) \
193 tempBlock[a] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
194 tempBlock[b] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
195 tempBlock[c] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
196 tempBlock[d] = ((byte *)(Te+t))[1];
198#if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
199 #define QUARTER_ROUND_LD(t, a, b, c, d) \
200 tempBlock[a] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\
201 tempBlock[b] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\
202 tempBlock[c] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\
203 tempBlock[d] = ((byte *)(Td+t))[GetNativeByteOrder()*7];
205 #define QUARTER_ROUND_LD(t, a, b, c, d) \
206 tempBlock[a] = Sd[byte(t)]; t >>= 8;\
207 tempBlock[b] = Sd[byte(t)]; t >>= 8;\
208 tempBlock[c] = Sd[byte(t)]; t >>= 8;\
209 tempBlock[d] = Sd[t];
212#define QUARTER_ROUND_E(t, a, b, c, d) QUARTER_ROUND(TL_M, Te, t, a, b, c, d)
213#define QUARTER_ROUND_D(t, a, b, c, d) QUARTER_ROUND(TL_M, Td, t, a, b, c, d)
215#if (CRYPTOPP_LITTLE_ENDIAN)
216 #define QUARTER_ROUND_FE(t, a, b, c, d) QUARTER_ROUND(TL_F, Te, t, d, c, b, a)
217 #define QUARTER_ROUND_FD(t, a, b, c, d) QUARTER_ROUND(TL_F, Td, t, d, c, b, a)
218 #if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
219 #define TL_F(T, i, x) (*(word32 *)(void *)((byte *)T + x*8 + (6-i)%4+1))
220 #define TL_M(T, i, x) (*(word32 *)(void *)((byte *)T + x*8 + (i+3)%4+1))
222 #define TL_F(T, i, x) rotrFixed(T[x], (3-i)*8)
223 #define TL_M(T, i, x) T[i*256 + x]
226 #define QUARTER_ROUND_FE(t, a, b, c, d) QUARTER_ROUND(TL_F, Te, t, a, b, c, d)
227 #define QUARTER_ROUND_FD(t, a, b, c, d) QUARTER_ROUND(TL_F, Td, t, a, b, c, d)
228 #if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
229 #define TL_F(T, i, x) (*(word32 *)(void *)((byte *)T + x*8 + (4-i)%4))
232 #define TL_F(T, i, x) rotrFixed(T[x], i*8)
233 #define TL_M(T, i, x) T[i*256 + x]
238#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
239#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
240#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
242#define f3(x) (f2(x) ^ x)
243#define f9(x) (f8(x) ^ x)
244#define fb(x) (f8(x) ^ f2(x) ^ x)
245#define fd(x) (f8(x) ^ f4(x) ^ x)
246#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
248unsigned int Rijndael::Base::OptimalDataAlignment()
const
250#if (CRYPTOPP_AESNI_AVAILABLE)
254#if (CRYPTOPP_ARM_AES_AVAILABLE)
258#if (CRYPTOGAMS_ARM_AES)
264#if (CRYPTOPP_POWER8_AES_AVAILABLE)
271void Rijndael::Base::FillEncTable()
273 for (
int i=0; i<256; i++)
276#if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
278 Te[i] =
word64(y | f3(x))<<32 | y;
281 for (
int j=0; j<4; j++)
284 y = rotrConstant<8>(y);
288#if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
289 Te[256] = Te[257] = 0;
294void Rijndael::Base::FillDecTable()
296 for (
int i=0; i<256; i++)
299#if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
301 Td[i] =
word64(y | fb(x))<<32 | y | x;
304 for (
int j=0; j<4; j++)
307 y = rotrConstant<8>(y);
314#if (CRYPTOPP_AESNI_AVAILABLE)
315extern void Rijndael_UncheckedSetKey_SSE4_AESNI(
const byte *userKey,
size_t keyLen,
word32* rk);
316extern void Rijndael_UncheckedSetKeyRev_AESNI(
word32 *key,
unsigned int rounds);
318extern size_t Rijndael_Enc_AdvancedProcessBlocks_AESNI(
const word32 *subkeys,
size_t rounds,
319 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags);
320extern size_t Rijndael_Dec_AdvancedProcessBlocks_AESNI(
const word32 *subkeys,
size_t rounds,
321 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags);
324#if (CRYPTOPP_ARM_AES_AVAILABLE)
325extern size_t Rijndael_Enc_AdvancedProcessBlocks_ARMV8(
const word32 *subkeys,
size_t rounds,
326 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags);
327extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(
const word32 *subkeys,
size_t rounds,
328 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags);
331#if (CRYPTOGAMS_ARM_AES)
332extern "C" int cryptogams_AES_set_encrypt_key(
const unsigned char *userKey,
const int bitLen,
word32 *rkey);
333extern "C" int cryptogams_AES_set_decrypt_key(
const unsigned char *userKey,
const int bitLen,
word32 *rkey);
334extern "C" void cryptogams_AES_encrypt_block(
const unsigned char *in,
unsigned char *out,
const word32 *rkey);
335extern "C" void cryptogams_AES_decrypt_block(
const unsigned char *in,
unsigned char *out,
const word32 *rkey);
338#if (CRYPTOPP_POWER8_AES_AVAILABLE)
339extern void Rijndael_UncheckedSetKey_POWER8(
const byte* userKey,
size_t keyLen,
340 word32* rk,
const byte* Se);
342extern size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(
const word32 *subkeys,
size_t rounds,
343 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags);
344extern size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(
const word32 *subkeys,
size_t rounds,
345 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags);
348#if (CRYPTOGAMS_ARM_AES)
349int CRYPTOGAMS_set_encrypt_key(
const byte *userKey,
const int bitLen,
word32 *rkey)
351 return cryptogams_AES_set_encrypt_key(userKey, bitLen, rkey);
353int CRYPTOGAMS_set_decrypt_key(
const byte *userKey,
const int bitLen,
word32 *rkey)
355 return cryptogams_AES_set_decrypt_key(userKey, bitLen, rkey);
357void CRYPTOGAMS_encrypt(
const byte *inBlock,
const byte *xorBlock,
byte *outBlock,
const word32 *rkey)
359 cryptogams_AES_encrypt_block(inBlock, outBlock, rkey);
361 xorbuf (outBlock, xorBlock, 16);
363void CRYPTOGAMS_decrypt(
const byte *inBlock,
const byte *xorBlock,
byte *outBlock,
const word32 *rkey)
365 cryptogams_AES_decrypt_block(inBlock, outBlock, rkey);
367 xorbuf (outBlock, xorBlock, 16);
371std::string Rijndael::Base::AlgorithmProvider()
const
373#if (CRYPTOPP_AESNI_AVAILABLE)
377#if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
381#if (CRYPTOPP_ARM_AES_AVAILABLE)
385#if (CRYPTOGAMS_ARM_AES)
389#if (CRYPTOPP_POWER8_AES_AVAILABLE)
396void Rijndael::Base::UncheckedSetKey(
const byte *userKey,
unsigned int keyLen,
const NameValuePairs &)
398 AssertValidKeyLength(keyLen);
400#if (CRYPTOGAMS_ARM_AES)
403 m_rounds = keyLen/4 + 6;
404 m_key.New(4*(14+1)+4);
406 if (IsForwardTransformation())
407 CRYPTOGAMS_set_encrypt_key(userKey, keyLen*8, m_key.begin());
409 CRYPTOGAMS_set_decrypt_key(userKey, keyLen*8, m_key.begin());
414#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
415 m_aliasBlock.New(s_sizeToAllocate);
418 m_aliasBlock.SetMark(0);
421 m_rounds = keyLen/4 + 6;
422 m_key.New(4*(m_rounds+1));
425#if (CRYPTOPP_AESNI_AVAILABLE && CRYPTOPP_SSE41_AVAILABLE && (!defined(CRYPTOPP_MSC_VERSION) || CRYPTOPP_MSC_VERSION >= 1600 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32))
427 if (HasAESNI() && HasSSE41())
431 Rijndael_UncheckedSetKey_SSE4_AESNI(userKey, keyLen, rk);
432 if (!IsForwardTransformation())
433 Rijndael_UncheckedSetKeyRev_AESNI(m_key, m_rounds);
439#if CRYPTOPP_POWER8_AES_AVAILABLE
444 Rijndael_UncheckedSetKey_POWER8(userKey, keyLen, rk, Se);
455 temp = rk[keyLen/4-1];
456 word32 x = (
word32(Se[GETBYTE(temp, 2)]) << 24) ^ (
word32(Se[GETBYTE(temp, 1)]) << 16) ^
457 (
word32(Se[GETBYTE(temp, 0)]) << 8) ^ Se[GETBYTE(temp, 3)];
458 rk[keyLen/4] = rk[0] ^ x ^ *(rc++);
459 rk[keyLen/4+1] = rk[1] ^ rk[keyLen/4];
460 rk[keyLen/4+2] = rk[2] ^ rk[keyLen/4+1];
461 rk[keyLen/4+3] = rk[3] ^ rk[keyLen/4+2];
463 if (rk + keyLen/4 + 4 == m_key.end())
468 rk[10] = rk[ 4] ^ rk[ 9];
469 rk[11] = rk[ 5] ^ rk[10];
471 else if (keyLen == 32)
474 rk[12] = rk[ 4] ^ (
word32(Se[GETBYTE(temp, 3)]) << 24) ^ (
word32(Se[GETBYTE(temp, 2)]) << 16) ^ (
word32(Se[GETBYTE(temp, 1)]) << 8) ^ Se[GETBYTE(temp, 0)];
475 rk[13] = rk[ 5] ^ rk[12];
476 rk[14] = rk[ 6] ^ rk[13];
477 rk[15] = rk[ 7] ^ rk[14];
484 if (IsForwardTransformation())
497 #define InverseMixColumn(x) \
498 TL_M(Td, 0, Se[GETBYTE(x, 3)]) ^ TL_M(Td, 1, Se[GETBYTE(x, 2)]) ^ \
499 TL_M(Td, 2, Se[GETBYTE(x, 1)]) ^ TL_M(Td, 3, Se[GETBYTE(x, 0)])
502 for (i = 4, j = 4*m_rounds-4; i < j; i += 4, j -= 4)
504 temp = InverseMixColumn(rk[i ]); rk[i ] = InverseMixColumn(rk[j ]); rk[j ] = temp;
505 temp = InverseMixColumn(rk[i + 1]); rk[i + 1] = InverseMixColumn(rk[j + 1]); rk[j + 1] = temp;
506 temp = InverseMixColumn(rk[i + 2]); rk[i + 2] = InverseMixColumn(rk[j + 2]); rk[j + 2] = temp;
507 temp = InverseMixColumn(rk[i + 3]); rk[i + 3] = InverseMixColumn(rk[j + 3]); rk[j + 3] = temp;
510 rk[i+0] = InverseMixColumn(rk[i+0]);
511 rk[i+1] = InverseMixColumn(rk[i+1]);
512 rk[i+2] = InverseMixColumn(rk[i+2]);
513 rk[i+3] = InverseMixColumn(rk[i+3]);
521#if CRYPTOPP_AESNI_AVAILABLE
525#if CRYPTOPP_ARM_AES_AVAILABLE
531void Rijndael::Enc::ProcessAndXorBlock(
const byte *inBlock,
const byte *xorBlock,
byte *outBlock)
const
533#if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE) || CRYPTOPP_AESNI_AVAILABLE
534# if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
540 (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
545#if (CRYPTOPP_ARM_AES_AVAILABLE)
548 (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
553#if (CRYPTOGAMS_ARM_AES)
556 CRYPTOGAMS_encrypt(inBlock, xorBlock, outBlock, m_key.begin());
561#if (CRYPTOPP_POWER8_AES_AVAILABLE)
564 (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
571 word32 s0, s1, s2, s3, t0, t1, t2, t3;
572 Block::Get(inBlock)(s0)(s1)(s2)(s3);
587 const int cacheLineSize = GetCacheLineSize();
591#if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
592 for (i=0; i<2048; i+=cacheLineSize)
594 for (i=0; i<1024; i+=cacheLineSize)
596 u &= *(
const word32 *)(
const void *)(((
const byte *)Te)+i);
598 s0 |= u; s1 |= u; s2 |= u; s3 |= u;
600 QUARTER_ROUND_FE(s3, t0, t1, t2, t3)
601 QUARTER_ROUND_FE(s2, t3, t0, t1, t2)
602 QUARTER_ROUND_FE(s1, t2, t3, t0, t1)
603 QUARTER_ROUND_FE(s0, t1, t2, t3, t0)
606 unsigned int r = m_rounds/2 - 1;
609 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
611 QUARTER_ROUND_E(t3, s0, s1, s2, s3)
612 QUARTER_ROUND_E(t2, s3, s0, s1, s2)
613 QUARTER_ROUND_E(t1, s2, s3, s0, s1)
614 QUARTER_ROUND_E(t0, s1, s2, s3, s0)
616 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
618 QUARTER_ROUND_E(s3, t0, t1, t2, t3)
619 QUARTER_ROUND_E(s2, t3, t0, t1, t2)
620 QUARTER_ROUND_E(s1, t2, t3, t0, t1)
621 QUARTER_ROUND_E(s0, t1, t2, t3, t0)
627 byte *
const tempBlock = (
byte *)tbw;
629 QUARTER_ROUND_LE(t2, 15, 2, 5, 8)
630 QUARTER_ROUND_LE(t1, 11, 14, 1, 4)
631 QUARTER_ROUND_LE(t0, 7, 10, 13, 0)
632 QUARTER_ROUND_LE(t3, 3, 6, 9, 12)
634 Block::Put(xorBlock, outBlock)(tbw[0]^rk[0])(tbw[1]^rk[1])(tbw[2]^rk[2])(tbw[3]^rk[3]);
637void Rijndael::Dec::ProcessAndXorBlock(const
byte *inBlock, const
byte *xorBlock,
byte *outBlock)
const
639#if CRYPTOPP_AESNI_AVAILABLE
642 (void)Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
647#if (CRYPTOPP_ARM_AES_AVAILABLE)
650 (void)Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
655#if (CRYPTOGAMS_ARM_AES)
658 CRYPTOGAMS_decrypt(inBlock, xorBlock, outBlock, m_key.begin());
663#if (CRYPTOPP_POWER8_AES_AVAILABLE)
666 (void)Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
673 word32 s0, s1, s2, s3, t0, t1, t2, t3;
674 Block::Get(inBlock)(s0)(s1)(s2)(s3);
689 const int cacheLineSize = GetCacheLineSize();
693#if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
694 for (i=0; i<2048; i+=cacheLineSize)
696 for (i=0; i<1024; i+=cacheLineSize)
698 u &= *(
const word32 *)(
const void *)(((
const byte *)Td)+i);
700 s0 |= u; s1 |= u; s2 |= u; s3 |= u;
702 QUARTER_ROUND_FD(s3, t2, t1, t0, t3)
703 QUARTER_ROUND_FD(s2, t1, t0, t3, t2)
704 QUARTER_ROUND_FD(s1, t0, t3, t2, t1)
705 QUARTER_ROUND_FD(s0, t3, t2, t1, t0)
708 unsigned int r = m_rounds/2 - 1;
711 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
713 QUARTER_ROUND_D(t3, s2, s1, s0, s3)
714 QUARTER_ROUND_D(t2, s1, s0, s3, s2)
715 QUARTER_ROUND_D(t1, s0, s3, s2, s1)
716 QUARTER_ROUND_D(t0, s3, s2, s1, s0)
718 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
720 QUARTER_ROUND_D(s3, t2, t1, t0, t3)
721 QUARTER_ROUND_D(s2, t1, t0, t3, t2)
722 QUARTER_ROUND_D(s1, t0, t3, t2, t1)
723 QUARTER_ROUND_D(s0, t3, t2, t1, t0)
728#if !(defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS))
733 for (i=0; i<256; i+=cacheLineSize)
734 u &= *(
const word32 *)(
const void *)(Sd+i);
735 u &= *(
const word32 *)(
const void *)(Sd+252);
736 t0 |= u; t1 |= u; t2 |= u; t3 |= u;
740 byte *
const tempBlock = (
byte *)tbw;
742 QUARTER_ROUND_LD(t2, 7, 2, 13, 8)
743 QUARTER_ROUND_LD(t1, 3, 14, 9, 4)
744 QUARTER_ROUND_LD(t0, 15, 10, 5, 0)
745 QUARTER_ROUND_LD(t3, 11, 6, 1, 12)
747 Block::Put(xorBlock, outBlock)(tbw[0]^rk[0])(tbw[1]^rk[1])(tbw[2]^rk[2])(tbw[3]^rk[3]);
752#if CRYPTOPP_MSC_VERSION
753# pragma warning(disable: 4731)
758#if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
760CRYPTOPP_NAKED
void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks_SSE2(
void *locals,
const word32 *k)
762 CRYPTOPP_UNUSED(locals); CRYPTOPP_UNUSED(k);
767#define L_INDEX(i) (L_REG+768+i)
768#define L_INXORBLOCKS L_INBLOCKS+4
769#define L_OUTXORBLOCKS L_INBLOCKS+8
770#define L_OUTBLOCKS L_INBLOCKS+12
771#define L_INCREMENTS L_INDEX(16*15)
772#define L_SP L_INDEX(16*16)
773#define L_LENGTH L_INDEX(16*16+4)
774#define L_KEYS_BEGIN L_INDEX(16*16+8)
781 AS2( movd mm7, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
782 AS2( pxor MM(a), mm7)\
786 AS2( movd MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
791#define L_INDEX(i) (L_REG+i)
792#define L_INXORBLOCKS L_INBLOCKS+8
793#define L_OUTXORBLOCKS L_INBLOCKS+16
794#define L_OUTBLOCKS L_INBLOCKS+24
795#define L_INCREMENTS L_INDEX(16*16)
796#define L_LENGTH L_INDEX(16*18+8)
797#define L_KEYS_BEGIN L_INDEX(16*19)
811 AS2( xor MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
815 AS2( mov MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
819#define L_SUBKEYS L_INDEX(0)
820#define L_SAVED_X L_SUBKEYS
821#define L_KEY12 L_INDEX(16*12)
822#define L_LASTROUND L_INDEX(16*13)
823#define L_INBLOCKS L_INDEX(16*14)
824#define MAP0TO4(i) (ASM_MOD(i+3,4)+1)
828 AS2( xor a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
832 AS2( mov a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
834#ifdef CRYPTOPP_GENERATE_X64_MASM
836 Rijndael_Enc_AdvancedProcessBlocks PROC FRAME
843 mov AS_REG_7, ?Te@rdtable@
CryptoPP@@3PA_KA
844 mov edi, DWORD PTR [?g_cacheLineSize@
CryptoPP@@3IA]
845#elif defined(__GNUC__)
854 AS2( mov AS_REG_7, WORD_REG(si))
860 AS2( lea AS_REG_7, [Te])
861 AS2( mov edi, [g_cacheLineSize])
865 AS2( mov [ecx+16*12+16*4], esp)
866 AS2( lea esp, [ecx-768])
870 AS2( mov WORD_REG(si), [L_KEYS_BEGIN])
871 AS2( mov WORD_REG(ax), 16)
872 AS2( and WORD_REG(ax), WORD_REG(si))
873 AS2( movdqa xmm3, XMMWORD_PTR [WORD_REG(dx)+16+WORD_REG(ax)])
874 AS2( movdqa [L_KEY12], xmm3)
875 AS2( lea WORD_REG(ax), [WORD_REG(dx)+WORD_REG(ax)+2*16])
876 AS2( sub WORD_REG(ax), WORD_REG(si))
878 AS2( movdqa xmm0, [WORD_REG(ax)+WORD_REG(si)])
879 AS2( movdqa XMMWORD_PTR [L_SUBKEYS+WORD_REG(si)], xmm0)
880 AS2( add WORD_REG(si), 16)
881 AS2( cmp WORD_REG(si), 16*12)
887 AS2( movdqa xmm4, [WORD_REG(ax)+WORD_REG(si)])
888 AS2( movdqa xmm1, [WORD_REG(dx)])
889 AS2( MOVD MM(1), [WORD_REG(dx)+4*4])
890 AS2( mov ebx, [WORD_REG(dx)+5*4])
891 AS2( mov ecx, [WORD_REG(dx)+6*4])
892 AS2( mov edx, [WORD_REG(dx)+7*4])
895 AS2( xor WORD_REG(ax), WORD_REG(ax))
897 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
898 AS2( add WORD_REG(ax), WORD_REG(di))
899 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
900 AS2( add WORD_REG(ax), WORD_REG(di))
901 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
902 AS2( add WORD_REG(ax), WORD_REG(di))
903 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
904 AS2( add WORD_REG(ax), WORD_REG(di))
905 AS2( cmp WORD_REG(ax), 2048)
911 AS2( test DWORD PTR [L_LENGTH], 1)
917 AS2( mov WORD_REG(si), [L_INBLOCKS])
918 AS2( movdqu xmm2, [WORD_REG(si)])
919 AS2( pxor xmm2, xmm1)
920 AS2( psrldq xmm1, 14)
922 AS2( mov al, BYTE PTR [WORD_REG(si)+15])
923 AS2( MOVD MM(2), eax)
962 AS2( mov eax, [L_KEY12+0*4])
963 AS2( mov edi, [L_KEY12+2*4])
964 AS2( MOVD MM(0), [L_KEY12+3*4])
971 AS2( xor ebx, [L_KEY12+1*4])
983 AS2( MOVD edx, MM(1))
984 AS2( MOVD [L_SAVED_X+3*4], MM(0))
985 AS2( mov [L_SAVED_X+0*4], eax)
986 AS2( mov [L_SAVED_X+1*4], ebx)
987 AS2( mov [L_SAVED_X+2*4], edi)
993 AS2( MOVD MM(1), [L_KEY12+0*4])
994 AS2( mov ebx, [L_KEY12+1*4])
995 AS2( mov ecx, [L_KEY12+2*4])
996 AS2( mov edx, [L_KEY12+3*4])
998 AS2( mov WORD_REG(ax), [L_INBLOCKS])
999 AS2( movdqu xmm2, [WORD_REG(ax)])
1000 AS2( mov WORD_REG(si), [L_INXORBLOCKS])
1001 AS2( movdqu xmm5, [WORD_REG(si)])
1002 AS2( pxor xmm2, xmm1)
1003 AS2( pxor xmm2, xmm5)
1006 AS2( movd eax, xmm2)
1007 AS2( psrldq xmm2, 4)
1008 AS2( movd edi, xmm2)
1009 AS2( psrldq xmm2, 4)
1016 AS2( movd edi, xmm2)
1017 AS2( psrldq xmm2, 4)
1024 AS2( movd edi, xmm2)
1036 AS2( MOVD eax, MM(1))
1038 AS2( add L_REG, [L_KEYS_BEGIN])
1039 AS2( add L_REG, 4*16)
1045 AS2( MOVD ecx, MM(2))
1046 AS2( MOVD edx, MM(1))
1047 AS2( mov eax, [L_SAVED_X+0*4])
1048 AS2( mov ebx, [L_SAVED_X+1*4])
1050 AS2( and WORD_REG(cx), 255)
1053 AS2( paddb MM(2), mm3)
1058 AS2( xor edx, DWORD PTR [AS_REG_7+WORD_REG(cx)*8+3])
1062 AS2( xor ecx, [L_SAVED_X+2*4])
1065 AS2( xor edx, [L_SAVED_X+3*4])
1067 AS2( add L_REG, [L_KEYS_BEGIN])
1068 AS2( add L_REG, 3*16)
1099 AS2( MOVD MM(0), [L_SUBKEYS-4*16+3*4])
1100 AS2( mov edi, [L_SUBKEYS-4*16+2*4])
1103 AS2( xor eax, [L_SUBKEYS-4*16+0*4])
1104 AS2( xor ebx, [L_SUBKEYS-4*16+1*4])
1105 AS2( MOVD edx, MM(0))
1108 AS2( MOVD MM(0), [L_SUBKEYS-4*16+7*4])
1109 AS2( mov edi, [L_SUBKEYS-4*16+6*4])
1112 AS2( xor eax, [L_SUBKEYS-4*16+4*4])
1113 AS2( xor ebx, [L_SUBKEYS-4*16+5*4])
1114 AS2( MOVD edx, MM(0))
1117 AS2( test L_REG, 255)
1121 AS2( sub L_REG, 16*16)
1123#define LAST(a, b, c) \
1124 AS2( movzx esi, a )\
1125 AS2( movzx edi, BYTE PTR [AS_REG_7+WORD_REG(si)*8+1] )\
1126 AS2( movzx esi, b )\
1127 AS2( xor edi, DWORD PTR [AS_REG_7+WORD_REG(si)*8+0] )\
1128 AS2( mov WORD PTR [L_LASTROUND+c], di )\
1144 AS2( mov WORD_REG(ax), [L_OUTXORBLOCKS])
1145 AS2( mov WORD_REG(bx), [L_OUTBLOCKS])
1147 AS2( mov WORD_REG(cx), [L_LENGTH])
1148 AS2( sub WORD_REG(cx), 16)
1150 AS2( movdqu xmm2, [WORD_REG(ax)])
1151 AS2( pxor xmm2, xmm4)
1154 AS2( movdqa xmm0, [L_INCREMENTS])
1155 AS2( paddd xmm0, [L_INBLOCKS])
1156 AS2( movdqa [L_INBLOCKS], xmm0)
1158 AS2( movdqa xmm0, [L_INCREMENTS+16])
1159 AS2( paddq xmm0, [L_INBLOCKS+16])
1160 AS2( movdqa [L_INBLOCKS+16], xmm0)
1163 AS2( pxor xmm2, [L_LASTROUND])
1164 AS2( movdqu [WORD_REG(bx)], xmm2)
1169 AS2( mov [L_LENGTH], WORD_REG(cx))
1170 AS2( test WORD_REG(cx), 1)
1175 AS2( movdqa xmm0, [L_INCREMENTS])
1176 AS2( paddq xmm0, [L_INBLOCKS])
1177 AS2( movdqa [L_INBLOCKS], xmm0)
1185 AS2( xorps xmm0, xmm0)
1186 AS2( lea WORD_REG(ax), [L_SUBKEYS+7*16])
1187 AS2( movaps [WORD_REG(ax)-7*16], xmm0)
1188 AS2( movaps [WORD_REG(ax)-6*16], xmm0)
1189 AS2( movaps [WORD_REG(ax)-5*16], xmm0)
1190 AS2( movaps [WORD_REG(ax)-4*16], xmm0)
1191 AS2( movaps [WORD_REG(ax)-3*16], xmm0)
1192 AS2( movaps [WORD_REG(ax)-2*16], xmm0)
1193 AS2( movaps [WORD_REG(ax)-1*16], xmm0)
1194 AS2( movaps [WORD_REG(ax)+0*16], xmm0)
1195 AS2( movaps [WORD_REG(ax)+1*16], xmm0)
1196 AS2( movaps [WORD_REG(ax)+2*16], xmm0)
1197 AS2( movaps [WORD_REG(ax)+3*16], xmm0)
1198 AS2( movaps [WORD_REG(ax)+4*16], xmm0)
1199 AS2( movaps [WORD_REG(ax)+5*16], xmm0)
1200 AS2( movaps [WORD_REG(ax)+6*16], xmm0)
1202 AS2( mov esp, [L_SP])
1212#ifdef CRYPTOPP_GENERATE_X64_MASM
1218 Rijndael_Enc_AdvancedProcessBlocks ENDP
1223 :
"c" (locals),
"d" (k),
"S" (Te),
"D" (g_cacheLineSize)
1224 :
"memory",
"cc",
"%eax"
1226 ,
"%rbx",
"%r8",
"%r9",
"%r10",
"%r11",
"%r12"
1234#ifndef CRYPTOPP_GENERATE_X64_MASM
1236#ifdef CRYPTOPP_X64_MASM_AVAILABLE
1238void Rijndael_Enc_AdvancedProcessBlocks_SSE2(
void *locals,
const word32 *k);
1242#if CRYPTOPP_RIJNDAEL_ADVANCED_PROCESS_BLOCKS
1243size_t Rijndael::Enc::AdvancedProcessBlocks(
const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
const
1245#if CRYPTOPP_AESNI_AVAILABLE
1247 return Rijndael_Enc_AdvancedProcessBlocks_AESNI(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1249#if CRYPTOPP_ARM_AES_AVAILABLE
1251 return Rijndael_Enc_AdvancedProcessBlocks_ARMV8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1253#if CRYPTOPP_POWER8_AES_AVAILABLE
1255 return Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1258#if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
1261 if (length < BLOCKSIZE)
1264 static const byte *zeros = (
const byte*)(Te+256);
1265 m_aliasBlock.SetMark(m_aliasBlock.size());
1266 byte *space = NULLPTR, *originalSpace =
const_cast<byte*
>(m_aliasBlock.data());
1269 space = originalSpace + (s_aliasBlockSize - (uintptr_t)originalSpace % s_aliasBlockSize) % s_aliasBlockSize;
1270 while (AliasedWithTable(space, space +
sizeof(Locals)))
1276 size_t increment = BLOCKSIZE;
1277 if (flags & BT_ReverseDirection)
1280 inBlocks += length - BLOCKSIZE;
1281 xorBlocks += length - BLOCKSIZE;
1282 outBlocks += length - BLOCKSIZE;
1283 increment = 0-increment;
1286 Locals &locals = *(Locals *)(
void *)space;
1288 locals.inBlocks = inBlocks;
1289 locals.inXorBlocks = (flags & BT_XorInput) && xorBlocks ? xorBlocks : zeros;
1290 locals.outXorBlocks = (flags & BT_XorInput) || !xorBlocks ? zeros : xorBlocks;
1291 locals.outBlocks = outBlocks;
1293 locals.inIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment;
1294 locals.inXorIncrement = (flags & BT_XorInput) && xorBlocks ? increment : 0;
1295 locals.outXorIncrement = (flags & BT_XorInput) || !xorBlocks ? 0 : increment;
1296 locals.outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment;
1298 locals.lengthAndCounterFlag = length - (length%16) -
bool(flags & BT_InBlockIsCounter);
1299 int keysToCopy = m_rounds - (flags & BT_InBlockIsCounter ? 3 : 2);
1300 locals.keysBegin = (12-keysToCopy)*16;
1302 Rijndael_Enc_AdvancedProcessBlocks_SSE2(&locals, m_key);
1304 return length % BLOCKSIZE;
1311size_t Rijndael::Dec::AdvancedProcessBlocks(
const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
const
1313#if CRYPTOPP_AESNI_AVAILABLE
1315 return Rijndael_Dec_AdvancedProcessBlocks_AESNI(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1317#if CRYPTOPP_ARM_AES_AVAILABLE
1319 return Rijndael_Dec_AdvancedProcessBlocks_ARMV8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1321#if CRYPTOPP_POWER8_AES_AVAILABLE
1323 return Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
Interface for retrieving values given their names.
Library configuration file.
#define CRYPTOPP_BOOL_X86
32-bit x86 platform
#define CRYPTOPP_BOOL_X64
32-bit x86 platform
unsigned int word32
32-bit unsigned datatype
unsigned long long word64
64-bit unsigned datatype
Functions for CPU features and intrinsics.
@ BIG_ENDIAN_ORDER
byte order is big-endian
Utility functions for the Crypto++ library.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
void GetUserKey(ByteOrder order, T *out, size_t outlen, const byte *in, size_t inlen)
Copy bytes in a buffer to an array of elements in big-endian order.
CRYPTOPP_DLL void xorbuf(byte *buf, const byte *mask, size_t count)
Performs an XOR of a buffer with a mask.
Crypto++ library namespace.
Classes for Rijndael encryption algorithm.
Access a block of memory.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.