20#if (CRYPTOPP_SHANI_AVAILABLE)
21# include <nmmintrin.h>
22# include <immintrin.h>
26extern const char SHACAL2_SIMD_FNAME[] = __FILE__;
30#if CRYPTOPP_SHANI_AVAILABLE
31void SHACAL2_Enc_ProcessAndXorBlock_SHANI(
const word32* subKeys,
const byte *inBlock,
const byte *xorBlock,
byte *outBlock)
37 const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7);
38 const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
40 __m128i B0 = _mm_shuffle_epi8(_mm_loadu_si128(
CONST_M128_CAST(inBlock + 0)), MASK1);
41 __m128i B1 = _mm_shuffle_epi8(_mm_loadu_si128(
CONST_M128_CAST(inBlock + 16)), MASK2);
43 __m128i TMP = _mm_alignr_epi8(B0, B1, 8);
44 B1 = _mm_blend_epi16(B1, B0, 0xF0);
49 const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7);
50 const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
55 __m128i TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK2);
56 B1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK2);
60 const byte* keys =
reinterpret_cast<const byte*
>(subKeys);
61 for (
size_t i = 0; i != 8; ++i)
65 const __m128i RK1 = _mm_srli_si128(RK0, 8);
66 const __m128i RK3 = _mm_srli_si128(RK2, 8);
68 B1 = _mm_sha256rnds2_epu32(B1, B0, RK0);
69 B0 = _mm_sha256rnds2_epu32(B0, B1, RK1);
70 B1 = _mm_sha256rnds2_epu32(B1, B0, RK2);
71 B0 = _mm_sha256rnds2_epu32(B0, B1, RK3);
74 TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK1);
75 B1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK1);
83 _mm_storeu_si128(
M128_CAST(outBlock + 16),
88 _mm_storeu_si128(
M128_CAST(outBlock + 0), B0);
89 _mm_storeu_si128(
M128_CAST(outBlock + 16), B1);
#define M128_CAST(x)
Clang workaround.
#define CONST_M128_CAST(x)
Clang workaround.
Library configuration file.
unsigned int word32
32-bit unsigned datatype
Utility functions for the Crypto++ library.
Crypto++ library namespace.
Classes for SHA-1 and SHA-2 family of message digests.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.