19extern const char LSH256_AVX_FNAME[] = __FILE__;
21#if defined(CRYPTOPP_AVX2_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
23#if defined(CRYPTOPP_AVX2_AVAILABLE)
24# include <emmintrin.h>
25# include <immintrin.h>
28#if defined(CRYPTOPP_GCC_COMPATIBLE)
29# include <x86intrin.h>
32ANONYMOUS_NAMESPACE_BEGIN
36const unsigned int LSH256_MSG_BLK_BYTE_LEN = 128;
39const unsigned int LSH256_HASH_VAL_MAX_BYTE_LEN = 32;
42const unsigned int CV_WORD_LEN = 16;
43const unsigned int CONST_WORD_LEN = 8;
46const unsigned int NUM_STEPS = 26;
48const unsigned int ROT_EVEN_ALPHA = 29;
49const unsigned int ROT_EVEN_BETA = 1;
50const unsigned int ROT_ODD_ALPHA = 5;
51const unsigned int ROT_ODD_BETA = 17;
53const unsigned int LSH_TYPE_256_256 = 0x0000020;
54const unsigned int LSH_TYPE_256_224 = 0x000001C;
61const unsigned int LSH_SUCCESS = 0x0;
64const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
65const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
69const unsigned int AlgorithmType = 80;
70const unsigned int RemainingBits = 81;
78extern const word32 LSH256_IV224[CV_WORD_LEN];
79extern const word32 LSH256_IV256[CV_WORD_LEN];
80extern const word32 LSH256_StepConstants[CONST_WORD_LEN * NUM_STEPS];
85ANONYMOUS_NAMESPACE_BEGIN
88using CryptoPP::word32;
89using CryptoPP::rotlFixed;
90using CryptoPP::rotlConstant;
92using CryptoPP::GetBlock;
93using CryptoPP::LittleEndian;
94using CryptoPP::ConditionalByteReverse;
95using CryptoPP::LITTLE_ENDIAN_ORDER;
103using CryptoPP::LSH::LSH256_IV224;
104using CryptoPP::LSH::LSH256_IV256;
105using CryptoPP::LSH::LSH256_StepConstants;
107struct LSH256_AVX2_Context
110 cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
111 last_block(reinterpret_cast<
byte*>(state+48)),
112 remain_databitlen(remainingBitLength),
113 alg_type(static_cast<lsh_type>(algType)) {}
119 lsh_u32& remain_databitlen;
123struct LSH256_AVX2_Internal
125 LSH256_AVX2_Internal(
word32* state) :
126 submsg_e_l(state+16), submsg_e_r(state+24),
127 submsg_o_l(state+32), submsg_o_r(state+40) { }
151inline bool LSH_IS_LSH512(lsh_uint val) {
152 return (val & 0xf0000) == 0;
155inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
159inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
163inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
164 return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
167inline lsh_u32 loadLE32(lsh_u32 v) {
171lsh_u32 ROTL(lsh_u32 x, lsh_u32 r) {
176inline void load_msg_blk(LSH256_AVX2_Internal* i_state,
const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])
180 lsh_u32* submsg_e_l = i_state->submsg_e_l;
181 lsh_u32* submsg_e_r = i_state->submsg_e_r;
182 lsh_u32* submsg_o_l = i_state->submsg_o_l;
183 lsh_u32* submsg_o_r = i_state->submsg_o_r;
185 _mm256_storeu_si256(M256_CAST(submsg_e_l+0),
186 _mm256_loadu_si256(CONST_M256_CAST(msgblk+0)));
187 _mm256_storeu_si256(M256_CAST(submsg_e_r+0),
188 _mm256_loadu_si256(CONST_M256_CAST(msgblk+32)));
189 _mm256_storeu_si256(M256_CAST(submsg_o_l+0),
190 _mm256_loadu_si256(CONST_M256_CAST(msgblk+64)));
191 _mm256_storeu_si256(M256_CAST(submsg_o_r+0),
192 _mm256_loadu_si256(CONST_M256_CAST(msgblk+96)));
195inline void msg_exp_even(LSH256_AVX2_Internal* i_state)
199 lsh_u32* submsg_e_l = i_state->submsg_e_l;
200 lsh_u32* submsg_e_r = i_state->submsg_e_r;
201 lsh_u32* submsg_o_l = i_state->submsg_o_l;
202 lsh_u32* submsg_o_r = i_state->submsg_o_r;
204 const __m256i mask = _mm256_set_epi32(0x1b1a1918, 0x17161514,
205 0x13121110, 0x1f1e1d1c, 0x07060504, 0x03020100, 0x0b0a0908, 0x0f0e0d0c);
207 _mm256_storeu_si256(M256_CAST(submsg_e_l+0), _mm256_add_epi32(
208 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)),
210 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)), mask)));
211 _mm256_storeu_si256(M256_CAST(submsg_e_r+0), _mm256_add_epi32(
212 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)),
214 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)), mask)));
217inline void msg_exp_odd(LSH256_AVX2_Internal* i_state)
221 lsh_u32* submsg_e_l = i_state->submsg_e_l;
222 lsh_u32* submsg_e_r = i_state->submsg_e_r;
223 lsh_u32* submsg_o_l = i_state->submsg_o_l;
224 lsh_u32* submsg_o_r = i_state->submsg_o_r;
226 const __m256i mask = _mm256_set_epi32(0x1b1a1918, 0x17161514,
227 0x13121110, 0x1f1e1d1c, 0x07060504, 0x03020100, 0x0b0a0908, 0x0f0e0d0c);
229 _mm256_storeu_si256(M256_CAST(submsg_o_l+0), _mm256_add_epi32(
230 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)),
232 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)), mask)));
233 _mm256_storeu_si256(M256_CAST(submsg_o_r+0), _mm256_add_epi32(
234 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)),
236 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)), mask)));
239inline void load_sc(
const lsh_u32** p_const_v,
size_t i)
243 *p_const_v = &LSH256_StepConstants[i];
246inline void msg_add_even(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_AVX2_Internal* i_state)
250 lsh_u32* submsg_e_l = i_state->submsg_e_l;
251 lsh_u32* submsg_e_r = i_state->submsg_e_r;
253 _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_xor_si256(
254 _mm256_loadu_si256(CONST_M256_CAST(cv_l+0)),
255 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0))));
256 _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_xor_si256(
257 _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
258 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0))));
261inline void msg_add_odd(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_AVX2_Internal* i_state)
265 lsh_u32* submsg_o_l = i_state->submsg_o_l;
266 lsh_u32* submsg_o_r = i_state->submsg_o_r;
268 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
269 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
270 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l))));
271 _mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256(
272 _mm256_loadu_si256(CONST_M256_CAST(cv_r)),
273 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r))));
276inline void add_blk(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
278 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_add_epi32(
279 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
280 _mm256_loadu_si256(CONST_M256_CAST(cv_r))));
283template <
unsigned int R>
284inline void rotate_blk(lsh_u32 cv[8])
286 _mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256(
287 _mm256_slli_epi32(_mm256_loadu_si256(CONST_M256_CAST(cv)), R),
288 _mm256_srli_epi32(_mm256_loadu_si256(CONST_M256_CAST(cv)), 32-R)));
291inline void xor_with_const(lsh_u32 cv_l[8],
const lsh_u32 const_v[8])
293 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
294 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
295 _mm256_loadu_si256(CONST_M256_CAST(const_v))));
298inline void rotate_msg_gamma(lsh_u32 cv_r[8])
301 _mm256_storeu_si256(M256_CAST(cv_r+0),
302 _mm256_shuffle_epi8(_mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
304 15,14,13,12, 10,9,8,11, 5,4,7,6, 0,3,2,1,
305 12,15,14,13, 9,8,11,10, 6,5,4,7, 3,2,1,0)));
308inline void word_perm(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
310 __m256i temp = _mm256_shuffle_epi32(
311 _mm256_loadu_si256(CONST_M256_CAST(cv_l)), _MM_SHUFFLE(3,1,0,2));
312 _mm256_storeu_si256(M256_CAST(cv_r),
313 _mm256_shuffle_epi32(
314 _mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(1,2,3,0)));
315 _mm256_storeu_si256(M256_CAST(cv_l),
316 _mm256_permute2x128_si256(temp,
317 _mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(0,3,0,1)));
318 _mm256_storeu_si256(M256_CAST(cv_r),
319 _mm256_permute2x128_si256(temp,
320 _mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(0,2,0,0)));
327template <
unsigned int Alpha,
unsigned int Beta>
328inline void mix(lsh_u32 cv_l[8], lsh_u32 cv_r[8],
const lsh_u32 const_v[8])
331 rotate_blk<Alpha>(cv_l);
332 xor_with_const(cv_l, const_v);
334 rotate_blk<Beta>(cv_r);
336 rotate_msg_gamma(cv_r);
343inline void compress(LSH256_AVX2_Context* ctx,
const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])
347 LSH256_AVX2_Internal s_state(ctx->cv_l);
348 LSH256_AVX2_Internal* i_state = &s_state;
350 const lsh_u32* const_v = NULL;
351 lsh_u32* cv_l = ctx->cv_l;
352 lsh_u32* cv_r = ctx->cv_r;
354 load_msg_blk(i_state, pdMsgBlk);
356 msg_add_even(cv_l, cv_r, i_state);
357 load_sc(&const_v, 0);
358 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
359 word_perm(cv_l, cv_r);
361 msg_add_odd(cv_l, cv_r, i_state);
362 load_sc(&const_v, 8);
363 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
364 word_perm(cv_l, cv_r);
366 for (
size_t i = 1; i < NUM_STEPS / 2; i++)
368 msg_exp_even(i_state);
369 msg_add_even(cv_l, cv_r, i_state);
370 load_sc(&const_v, 16 * i);
371 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
372 word_perm(cv_l, cv_r);
374 msg_exp_odd(i_state);
375 msg_add_odd(cv_l, cv_r, i_state);
376 load_sc(&const_v, 16 * i + 8);
377 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
378 word_perm(cv_l, cv_r);
381 msg_exp_even(i_state);
382 msg_add_even(cv_l, cv_r, i_state);
390 _mm256_storeu_si256(M256_CAST(cv_l+0),
391 _mm256_load_si256(CONST_M256_CAST(iv+0)));
392 _mm256_storeu_si256(M256_CAST(cv_r+0),
393 _mm256_load_si256(CONST_M256_CAST(iv+8)));
396inline void zero_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
398 _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_setzero_si256());
399 _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_setzero_si256());
402inline void zero_submsgs(LSH256_AVX2_Context* ctx)
404 lsh_u32* sub_msgs = ctx->sub_msgs;
406 _mm256_storeu_si256(M256_CAST(sub_msgs+ 0), _mm256_setzero_si256());
407 _mm256_storeu_si256(M256_CAST(sub_msgs+ 8), _mm256_setzero_si256());
408 _mm256_storeu_si256(M256_CAST(sub_msgs+16), _mm256_setzero_si256());
409 _mm256_storeu_si256(M256_CAST(sub_msgs+24), _mm256_setzero_si256());
412inline void init224(LSH256_AVX2_Context* ctx)
417 load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV224);
420inline void init256(LSH256_AVX2_Context* ctx)
425 load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV256);
430inline void fin(LSH256_AVX2_Context* ctx)
434 _mm256_storeu_si256(M256_CAST(ctx->cv_l+0), _mm256_xor_si256(
435 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+0)),
436 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+0))));
441inline void get_hash(LSH256_AVX2_Context* ctx, lsh_u8* pbHashVal)
447 lsh_uint alg_type = ctx->alg_type;
448 lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
449 lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
452 std::memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
453 if (hash_val_bit_len){
454 pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
460lsh_err lsh256_init_avx2(LSH256_AVX2_Context* ctx)
465 lsh_u32 alg_type = ctx->alg_type;
466 const lsh_u32* const_v = NULL;
467 ctx->remain_databitlen = 0;
474 case LSH_TYPE_256_256:
477 case LSH_TYPE_256_224:
484 lsh_u32* cv_l = ctx->cv_l;
485 lsh_u32* cv_r = ctx->cv_r;
488 cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN;
489 cv_l[1] = LSH_GET_HASHBIT(alg_type);
491 for (
size_t i = 0; i < NUM_STEPS / 2; i++)
494 load_sc(&const_v, i * 16);
495 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
496 word_perm(cv_l, cv_r);
498 load_sc(&const_v, i * 16 + 8);
499 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
500 word_perm(cv_l, cv_r);
506lsh_err lsh256_update_avx2(LSH256_AVX2_Context* ctx,
const lsh_u8* data,
size_t databitlen)
516 if (databitlen == 0){
521 size_t databytelen = databitlen >> 3;
523 const size_t pos2 = 0;
525 size_t remain_msg_byte = ctx->remain_databitlen >> 3;
527 const size_t remain_msg_bit = 0;
529 if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
530 return LSH_ERR_INVALID_STATE;
532 if (remain_msg_bit > 0){
533 return LSH_ERR_INVALID_DATABITLEN;
536 if (databytelen + remain_msg_byte < LSH256_MSG_BLK_BYTE_LEN)
538 std::memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
539 ctx->remain_databitlen += (lsh_uint)databitlen;
540 remain_msg_byte += (lsh_uint)databytelen;
542 ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
547 if (remain_msg_byte > 0){
548 size_t more_byte = LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte;
549 std::memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
550 compress(ctx, ctx->last_block);
552 databytelen -= more_byte;
554 ctx->remain_databitlen = 0;
557 while (databytelen >= LSH256_MSG_BLK_BYTE_LEN)
563 data += LSH256_MSG_BLK_BYTE_LEN;
564 databytelen -= LSH256_MSG_BLK_BYTE_LEN;
567 if (databytelen > 0){
568 std::memcpy(ctx->last_block, data, databytelen);
569 ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
573 ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
574 ctx->remain_databitlen += pos2;
580lsh_err lsh256_final_avx2(LSH256_AVX2_Context* ctx, lsh_u8* hashval)
589 size_t remain_msg_byte = ctx->remain_databitlen >> 3;
591 const size_t remain_msg_bit = 0;
593 if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
594 return LSH_ERR_INVALID_STATE;
598 ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
601 ctx->last_block[remain_msg_byte] = 0x80;
603 std::memset(ctx->last_block + remain_msg_byte + 1, 0, LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
605 compress(ctx, ctx->last_block);
608 get_hash(ctx, hashval);
613ANONYMOUS_NAMESPACE_END
618void LSH256_Base_Restart_AVX2(
word32* state)
620 state[RemainingBits] = 0;
621 LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
622 lsh_err err = lsh256_init_avx2(&ctx);
624 if (err != LSH_SUCCESS)
629void LSH256_Base_Update_AVX2(
word32* state,
const byte *input,
size_t size)
631 LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
632 lsh_err err = lsh256_update_avx2(&ctx, input, 8*size);
634 if (err != LSH_SUCCESS)
639void LSH256_Base_TruncatedFinal_AVX2(
word32* state,
byte *hash,
size_t)
641 LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
642 lsh_err err = lsh256_final_avx2(&ctx, hash);
644 if (err != LSH_SUCCESS)
Base class for all exceptions thrown by the library.
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Library configuration file.
unsigned char byte
8-bit unsigned datatype
unsigned int word32
32-bit unsigned datatype
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Crypto++ library namespace.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.