Crypto++ 8.9
Free C++ class library of cryptographic schemes
lsh512_avx.cpp
1// lsh.cpp - written and placed in the public domain by Jeffrey Walton
2// Based on the specification and source code provided by
3// Korea Internet & Security Agency (KISA) website. Also
4// see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5// and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6
7// We are hitting some sort of GCC bug in the LSH AVX2 code path.
8// Clang is OK on the AVX2 code path. We believe it is GCC Issue
9// 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10// makes using zeroupper a little tricky.
11
12#include "pch.h"
13#include "config.h"
14
15#include "lsh.h"
16#include "misc.h"
17
18// Squash MS LNK4221 and libtool warnings
19extern const char LSH512_AVX_FNAME[] = __FILE__;
20
21#if defined(CRYPTOPP_AVX2_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
22
23#if defined(CRYPTOPP_AVX2_AVAILABLE)
24# include <emmintrin.h>
25# include <immintrin.h>
26#endif
27
28#if defined(CRYPTOPP_GCC_COMPATIBLE)
29# include <x86intrin.h>
30#endif
31
32ANONYMOUS_NAMESPACE_BEGIN
33
34/* LSH Constants */
35
36const unsigned int LSH512_MSG_BLK_BYTE_LEN = 256;
37// const unsigned int LSH512_MSG_BLK_BIT_LEN = 2048;
38// const unsigned int LSH512_CV_BYTE_LEN = 128;
39const unsigned int LSH512_HASH_VAL_MAX_BYTE_LEN = 64;
40
41// const unsigned int MSG_BLK_WORD_LEN = 32;
42const unsigned int CV_WORD_LEN = 16;
43const unsigned int CONST_WORD_LEN = 8;
44// const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
45const unsigned int NUM_STEPS = 28;
46
47const unsigned int ROT_EVEN_ALPHA = 23;
48const unsigned int ROT_EVEN_BETA = 59;
49const unsigned int ROT_ODD_ALPHA = 7;
50const unsigned int ROT_ODD_BETA = 3;
51
52const unsigned int LSH_TYPE_512_512 = 0x0010040;
53const unsigned int LSH_TYPE_512_384 = 0x0010030;
54const unsigned int LSH_TYPE_512_256 = 0x0010020;
55const unsigned int LSH_TYPE_512_224 = 0x001001C;
56
57// const unsigned int LSH_TYPE_384 = LSH_TYPE_512_384;
58// const unsigned int LSH_TYPE_512 = LSH_TYPE_512_512;
59
60/* Error Code */
61
62const unsigned int LSH_SUCCESS = 0x0;
63// const unsigned int LSH_ERR_NULL_PTR = 0x2401;
64// const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
65const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
66const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
67
68/* Index into our state array */
69
70const unsigned int AlgorithmType = 80;
71const unsigned int RemainingBits = 81;
72
73NAMESPACE_END
74
75NAMESPACE_BEGIN(CryptoPP)
76NAMESPACE_BEGIN(LSH)
77
78// lsh512.cpp
79extern const word64 LSH512_IV224[CV_WORD_LEN];
80extern const word64 LSH512_IV256[CV_WORD_LEN];
81extern const word64 LSH512_IV384[CV_WORD_LEN];
82extern const word64 LSH512_IV512[CV_WORD_LEN];
83extern const word64 LSH512_StepConstants[CONST_WORD_LEN * NUM_STEPS];
84
85NAMESPACE_END // LSH
86NAMESPACE_END // Crypto++
87
88ANONYMOUS_NAMESPACE_BEGIN
89
90using CryptoPP::byte;
91using CryptoPP::word32;
92using CryptoPP::word64;
93using CryptoPP::rotlFixed;
94using CryptoPP::rotlConstant;
95
96using CryptoPP::GetBlock;
97using CryptoPP::LittleEndian;
98using CryptoPP::ConditionalByteReverse;
99using CryptoPP::LITTLE_ENDIAN_ORDER;
100
101using CryptoPP::LSH::LSH512_IV224;
102using CryptoPP::LSH::LSH512_IV256;
103using CryptoPP::LSH::LSH512_IV384;
104using CryptoPP::LSH::LSH512_IV512;
105using CryptoPP::LSH::LSH512_StepConstants;
106
107typedef byte lsh_u8;
108typedef word32 lsh_u32;
109typedef word64 lsh_u64;
110typedef word32 lsh_uint;
111typedef word32 lsh_err;
112typedef word32 lsh_type;
113
114struct LSH512_AVX2_Context
115{
116 LSH512_AVX2_Context(word64* state, word64 algType, word64& remainingBitLength) :
117 cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
118 last_block(reinterpret_cast<byte*>(state+48)),
119 remain_databitlen(remainingBitLength),
120 alg_type(static_cast<lsh_type>(algType)) {}
121
122 lsh_u64* cv_l; // start of our state block
123 lsh_u64* cv_r;
124 lsh_u64* sub_msgs;
125 lsh_u8* last_block;
126 lsh_u64& remain_databitlen;
127 lsh_type alg_type;
128};
129
130struct LSH512_AVX2_Internal
131{
132 LSH512_AVX2_Internal(word64* state) :
133 submsg_e_l(state+16), submsg_e_r(state+24),
134 submsg_o_l(state+32), submsg_o_r(state+40) { }
135
136 lsh_u64* submsg_e_l; /* even left sub-message */
137 lsh_u64* submsg_e_r; /* even right sub-message */
138 lsh_u64* submsg_o_l; /* odd left sub-message */
139 lsh_u64* submsg_o_r; /* odd right sub-message */
140};
141
142// Zero the upper 128 bits of all YMM registers on exit.
143// It avoids AVX state transition penalties when saving state.
144// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735
145// makes using zeroupper a little tricky.
146
147struct AVX_Cleanup
148{
149 ~AVX_Cleanup() {
150 _mm256_zeroupper();
151 }
152};
153
154// const lsh_u32 g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
155
156/* LSH AlgType Macro */
157
158inline bool LSH_IS_LSH512(lsh_uint val) {
159 return (val & 0xf0000) == 0x10000;
160}
161
162inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
163 return val >> 24;
164}
165
166inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
167 return val & 0xffff;
168}
169
170inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
171 return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
172}
173
174inline lsh_u64 loadLE64(lsh_u64 v) {
176}
177
178lsh_u64 ROTL64(lsh_u64 x, lsh_u32 r) {
179 return rotlFixed(x, r);
180}
181
182// Original code relied upon unaligned lsh_u64 buffer
183inline void load_msg_blk(LSH512_AVX2_Internal* i_state, const lsh_u8 msgblk[LSH512_MSG_BLK_BYTE_LEN])
184{
185 lsh_u64* submsg_e_l = i_state->submsg_e_l;
186 lsh_u64* submsg_e_r = i_state->submsg_e_r;
187 lsh_u64* submsg_o_l = i_state->submsg_o_l;
188 lsh_u64* submsg_o_r = i_state->submsg_o_r;
189
190 _mm256_storeu_si256(M256_CAST(submsg_e_l+0),
191 _mm256_loadu_si256(CONST_M256_CAST(msgblk+0)));
192 _mm256_storeu_si256(M256_CAST(submsg_e_l+4),
193 _mm256_loadu_si256(CONST_M256_CAST(msgblk+32)));
194
195 _mm256_storeu_si256(M256_CAST(submsg_e_r+0),
196 _mm256_loadu_si256(CONST_M256_CAST(msgblk+64)));
197 _mm256_storeu_si256(M256_CAST(submsg_e_r+4),
198 _mm256_loadu_si256(CONST_M256_CAST(msgblk+96)));
199
200 _mm256_storeu_si256(M256_CAST(submsg_o_l+0),
201 _mm256_loadu_si256(CONST_M256_CAST(msgblk+128)));
202 _mm256_storeu_si256(M256_CAST(submsg_o_l+4),
203 _mm256_loadu_si256(CONST_M256_CAST(msgblk+160)));
204
205 _mm256_storeu_si256(M256_CAST(submsg_o_r+0),
206 _mm256_loadu_si256(CONST_M256_CAST(msgblk+192)));
207 _mm256_storeu_si256(M256_CAST(submsg_o_r+4),
208 _mm256_loadu_si256(CONST_M256_CAST(msgblk+224)));
209}
210
211inline void msg_exp_even(LSH512_AVX2_Internal* i_state)
212{
213 CRYPTOPP_ASSERT(i_state != NULLPTR);
214
215 lsh_u64* submsg_e_l = i_state->submsg_e_l;
216 lsh_u64* submsg_e_r = i_state->submsg_e_r;
217 lsh_u64* submsg_o_l = i_state->submsg_o_l;
218 lsh_u64* submsg_o_r = i_state->submsg_o_r;
219
220 _mm256_storeu_si256(M256_CAST(submsg_e_l+0), _mm256_add_epi64(
221 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)),
222 _mm256_permute4x64_epi64(
223 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)),
224 _MM_SHUFFLE(1,0,2,3))));
225 _mm256_storeu_si256(M256_CAST(submsg_e_l+4), _mm256_add_epi64(
226 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+4)),
227 _mm256_permute4x64_epi64(
228 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+4)),
229 _MM_SHUFFLE(2,1,0,3))));
230
231 _mm256_storeu_si256(M256_CAST(submsg_e_r+0), _mm256_add_epi64(
232 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)),
233 _mm256_permute4x64_epi64(
234 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)),
235 _MM_SHUFFLE(1,0,2,3))));
236 _mm256_storeu_si256(M256_CAST(submsg_e_r+4), _mm256_add_epi64(
237 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+4)),
238 _mm256_permute4x64_epi64(
239 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+4)),
240 _MM_SHUFFLE(2,1,0,3))));
241}
242
243inline void msg_exp_odd(LSH512_AVX2_Internal* i_state)
244{
245 CRYPTOPP_ASSERT(i_state != NULLPTR);
246
247 lsh_u64* submsg_e_l = i_state->submsg_e_l;
248 lsh_u64* submsg_e_r = i_state->submsg_e_r;
249 lsh_u64* submsg_o_l = i_state->submsg_o_l;
250 lsh_u64* submsg_o_r = i_state->submsg_o_r;
251
252 _mm256_storeu_si256(M256_CAST(submsg_o_l+0),
253 _mm256_add_epi64(
254 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)),
255 _mm256_permute4x64_epi64(
256 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)),
257 _MM_SHUFFLE(1,0,2,3))));
258 _mm256_storeu_si256(M256_CAST(submsg_o_l+4),
259 _mm256_add_epi64(
260 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+4)),
261 _mm256_permute4x64_epi64(
262 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+4)),
263 _MM_SHUFFLE(2,1,0,3))));
264
265 _mm256_storeu_si256(M256_CAST(submsg_o_r+0),
266 _mm256_add_epi64(
267 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)),
268 _mm256_permute4x64_epi64(
269 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)),
270 _MM_SHUFFLE(1,0,2,3))));
271 _mm256_storeu_si256(M256_CAST(submsg_o_r+4),
272 _mm256_add_epi64(
273 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+4)),
274 _mm256_permute4x64_epi64(
275 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+4)),
276 _MM_SHUFFLE(2,1,0,3))));
277}
278
279inline void load_sc(const lsh_u64** p_const_v, size_t i)
280{
281 *p_const_v = &LSH512_StepConstants[i];
282}
283
284inline void msg_add_even(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_AVX2_Internal* i_state)
285{
286 CRYPTOPP_ASSERT(i_state != NULLPTR);
287
288 lsh_u64* submsg_e_l = i_state->submsg_e_l;
289 lsh_u64* submsg_e_r = i_state->submsg_e_r;
290
291 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
292 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
293 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l))));
294 _mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256(
295 _mm256_loadu_si256(CONST_M256_CAST(cv_r)),
296 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r))));
297
298 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_xor_si256(
299 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)),
300 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+4))));
301 _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_xor_si256(
302 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)),
303 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+4))));
304}
305
306inline void msg_add_odd(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_AVX2_Internal* i_state)
307{
308 CRYPTOPP_ASSERT(i_state != NULLPTR);
309
310 lsh_u64* submsg_o_l = i_state->submsg_o_l;
311 lsh_u64* submsg_o_r = i_state->submsg_o_r;
312
313 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
314 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
315 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l))));
316 _mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256(
317 _mm256_loadu_si256(CONST_M256_CAST(cv_r)),
318 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r))));
319
320 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_xor_si256(
321 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)),
322 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+4))));
323 _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_xor_si256(
324 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)),
325 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+4))));
326}
327
328inline void add_blk(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
329{
330 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_add_epi64(
331 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
332 _mm256_loadu_si256(CONST_M256_CAST(cv_r))));
333 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_add_epi64(
334 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)),
335 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4))));
336}
337
338template <unsigned int R>
339inline void rotate_blk(lsh_u64 cv[8])
340{
341 _mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256(
342 _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), R),
343 _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), 64-R)));
344 _mm256_storeu_si256(M256_CAST(cv+4), _mm256_or_si256(
345 _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), R),
346 _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), 64-R)));
347}
348
349inline void xor_with_const(lsh_u64 cv_l[8], const lsh_u64 const_v[8])
350{
351 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
352 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
353 _mm256_loadu_si256(CONST_M256_CAST(const_v))));
354 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_xor_si256(
355 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)),
356 _mm256_loadu_si256(CONST_M256_CAST(const_v+4))));
357}
358
359inline void rotate_msg_gamma(lsh_u64 cv_r[8])
360{
361 // g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
362 _mm256_storeu_si256(M256_CAST(cv_r+0),
363 _mm256_shuffle_epi8(
364 _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
365 _mm256_set_epi8(
366 /* hi lane */ 9,8,15,14, 13,12,11,10, 3,2,1,0, 7,6,5,4,
367 /* lo lane */ 13,12,11,10, 9,8,15,14, 7,6,5,4, 3,2,1,0)));
368 _mm256_storeu_si256(M256_CAST(cv_r+4),
369 _mm256_shuffle_epi8(
370 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)),
371 _mm256_set_epi8(
372 /* hi lane */ 8,15,14,13, 12,11,10,9, 2,1,0,7, 6,5,4,3,
373 /* lo lane */ 12,11,10,9, 8,15,14,13, 6,5,4,3, 2,1,0,7)));
374}
375
376inline void word_perm(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
377{
378 __m256i temp[2];
379 _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_permute4x64_epi64(
380 _mm256_loadu_si256(CONST_M256_CAST(cv_l+0)), _MM_SHUFFLE(3,1,0,2)));
381 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_permute4x64_epi64(
382 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)), _MM_SHUFFLE(3,1,0,2)));
383 _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_permute4x64_epi64(
384 _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)), _MM_SHUFFLE(1,2,3,0)));
385 _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_permute4x64_epi64(
386 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)), _MM_SHUFFLE(1,2,3,0)));
387
388 temp[0] = _mm256_loadu_si256(CONST_M256_CAST(cv_l+0));
389 temp[1] = _mm256_loadu_si256(CONST_M256_CAST(cv_r+0));
390
391 _mm256_storeu_si256(M256_CAST(cv_l+0),
392 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)));
393 _mm256_storeu_si256(M256_CAST(cv_l+4),
394 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)));
395
396 _mm256_storeu_si256(M256_CAST(cv_r+0), temp[0]);
397 _mm256_storeu_si256(M256_CAST(cv_r+4), temp[1]);
398}
399
400/* -------------------------------------------------------- *
401* step function
402* -------------------------------------------------------- */
403
404template <unsigned int Alpha, unsigned int Beta>
405inline void mix(lsh_u64 cv_l[8], lsh_u64 cv_r[8], const lsh_u64 const_v[8])
406{
407 add_blk(cv_l, cv_r);
408 rotate_blk<Alpha>(cv_l);
409 xor_with_const(cv_l, const_v);
410 add_blk(cv_r, cv_l);
411 rotate_blk<Beta>(cv_r);
412 add_blk(cv_l, cv_r);
413 rotate_msg_gamma(cv_r);
414}
415
416/* -------------------------------------------------------- *
417* compression function
418* -------------------------------------------------------- */
419
420inline void compress(LSH512_AVX2_Context* ctx, const lsh_u8 pdMsgBlk[LSH512_MSG_BLK_BYTE_LEN])
421{
422 CRYPTOPP_ASSERT(ctx != NULLPTR);
423
424 LSH512_AVX2_Internal s_state(ctx->cv_l);
425 LSH512_AVX2_Internal* i_state = &s_state;
426
427 const lsh_u64* const_v = NULL;
428 lsh_u64 *cv_l = ctx->cv_l;
429 lsh_u64 *cv_r = ctx->cv_r;
430
431 load_msg_blk(i_state, pdMsgBlk);
432
433 msg_add_even(cv_l, cv_r, i_state);
434 load_sc(&const_v, 0);
435 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
436 word_perm(cv_l, cv_r);
437
438 msg_add_odd(cv_l, cv_r, i_state);
439 load_sc(&const_v, 8);
440 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
441 word_perm(cv_l, cv_r);
442
443 for (size_t i = 1; i < NUM_STEPS / 2; i++)
444 {
445 msg_exp_even(i_state);
446 msg_add_even(cv_l, cv_r, i_state);
447 load_sc(&const_v, 16 * i);
448 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
449 word_perm(cv_l, cv_r);
450
451 msg_exp_odd(i_state);
452 msg_add_odd(cv_l, cv_r, i_state);
453 load_sc(&const_v, 16 * i + 8);
454 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
455 word_perm(cv_l, cv_r);
456 }
457
458 msg_exp_even(i_state);
459 msg_add_even(cv_l, cv_r, i_state);
460}
461
462/* -------------------------------------------------------- */
463
464inline void load_iv(word64 cv_l[8], word64 cv_r[8], const word64 iv[16])
465{
466 // The IV's are 32-byte aligned so we can use aligned loads.
467 _mm256_storeu_si256(M256_CAST(cv_l+0),
468 _mm256_load_si256(CONST_M256_CAST(iv+0)));
469 _mm256_storeu_si256(M256_CAST(cv_l+4),
470 _mm256_load_si256(CONST_M256_CAST(iv+4)));
471
472 _mm256_storeu_si256(M256_CAST(cv_r+0),
473 _mm256_load_si256(CONST_M256_CAST(iv+8)));
474 _mm256_storeu_si256(M256_CAST(cv_r+4),
475 _mm256_load_si256(CONST_M256_CAST(iv+12)));
476}
477
478inline void zero_iv(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
479{
480 _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_setzero_si256());
481 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_setzero_si256());
482 _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_setzero_si256());
483 _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_setzero_si256());
484}
485
486inline void zero_submsgs(LSH512_AVX2_Context* ctx)
487{
488 lsh_u64* sub_msgs = ctx->sub_msgs;
489
490 _mm256_storeu_si256(M256_CAST(sub_msgs+ 0),
491 _mm256_setzero_si256());
492 _mm256_storeu_si256(M256_CAST(sub_msgs+ 4),
493 _mm256_setzero_si256());
494
495 _mm256_storeu_si256(M256_CAST(sub_msgs+ 8),
496 _mm256_setzero_si256());
497 _mm256_storeu_si256(M256_CAST(sub_msgs+12),
498 _mm256_setzero_si256());
499}
500
501inline void init224(LSH512_AVX2_Context* ctx)
502{
503 CRYPTOPP_ASSERT(ctx != NULLPTR);
504
505 zero_submsgs(ctx);
506 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV224);
507}
508
509inline void init256(LSH512_AVX2_Context* ctx)
510{
511 CRYPTOPP_ASSERT(ctx != NULLPTR);
512
513 zero_submsgs(ctx);
514 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV256);
515}
516
517inline void init384(LSH512_AVX2_Context* ctx)
518{
519 CRYPTOPP_ASSERT(ctx != NULLPTR);
520
521 zero_submsgs(ctx);
522 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV384);
523}
524
525inline void init512(LSH512_AVX2_Context* ctx)
526{
527 CRYPTOPP_ASSERT(ctx != NULLPTR);
528
529 zero_submsgs(ctx);
530 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV512);
531}
532
533/* -------------------------------------------------------- */
534
535inline void fin(LSH512_AVX2_Context* ctx)
536{
537 CRYPTOPP_ASSERT(ctx != NULLPTR);
538
539 _mm256_storeu_si256(M256_CAST(ctx->cv_l+0), _mm256_xor_si256(
540 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+0)),
541 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+0))));
542
543 _mm256_storeu_si256(M256_CAST(ctx->cv_l+4), _mm256_xor_si256(
544 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+4)),
545 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+4))));
546}
547
548/* -------------------------------------------------------- */
549
550inline void get_hash(LSH512_AVX2_Context* ctx, lsh_u8* pbHashVal)
551{
552 CRYPTOPP_ASSERT(ctx != NULLPTR);
553 CRYPTOPP_ASSERT(ctx->alg_type != 0);
554 CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
555
556 lsh_uint alg_type = ctx->alg_type;
557 lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
558 lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
559
560 // Multiplying by sizeof(lsh_u8) looks odd...
561 std::memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
562 if (hash_val_bit_len){
563 pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
564 }
565}
566
567/* -------------------------------------------------------- */
568
569lsh_err lsh512_init_avx2(LSH512_AVX2_Context* ctx)
570{
571 CRYPTOPP_ASSERT(ctx != NULLPTR);
572 CRYPTOPP_ASSERT(ctx->alg_type != 0);
573
574 lsh_u32 alg_type = ctx->alg_type;
575 const lsh_u64* const_v = NULL;
576 ctx->remain_databitlen = 0;
577
578 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
579 AVX_Cleanup cleanup;
580
581 switch (alg_type){
582 case LSH_TYPE_512_512:
583 init512(ctx);
584 return LSH_SUCCESS;
585 case LSH_TYPE_512_384:
586 init384(ctx);
587 return LSH_SUCCESS;
588 case LSH_TYPE_512_256:
589 init256(ctx);
590 return LSH_SUCCESS;
591 case LSH_TYPE_512_224:
592 init224(ctx);
593 return LSH_SUCCESS;
594 default:
595 break;
596 }
597
598 lsh_u64* cv_l = ctx->cv_l;
599 lsh_u64* cv_r = ctx->cv_r;
600
601 zero_iv(cv_l, cv_r);
602 cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN;
603 cv_l[1] = LSH_GET_HASHBIT(alg_type);
604
605 for (size_t i = 0; i < NUM_STEPS / 2; i++)
606 {
607 //Mix
608 load_sc(&const_v, i * 16);
609 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
610 word_perm(cv_l, cv_r);
611
612 load_sc(&const_v, i * 16 + 8);
613 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
614 word_perm(cv_l, cv_r);
615 }
616
617 return LSH_SUCCESS;
618}
619
620lsh_err lsh512_update_avx2(LSH512_AVX2_Context* ctx, const lsh_u8* data, size_t databitlen)
621{
622 CRYPTOPP_ASSERT(ctx != NULLPTR);
623 CRYPTOPP_ASSERT(data != NULLPTR);
624 CRYPTOPP_ASSERT(databitlen % 8 == 0);
625 CRYPTOPP_ASSERT(ctx->alg_type != 0);
626
627 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
628 AVX_Cleanup cleanup;
629
630 if (databitlen == 0){
631 return LSH_SUCCESS;
632 }
633
634 // We are byte oriented. tail bits will always be 0.
635 size_t databytelen = databitlen >> 3;
636 // lsh_uint pos2 = databitlen & 0x7;
637 const size_t pos2 = 0;
638
639 size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
640 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
641 const size_t remain_msg_bit = 0;
642
643 if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
644 return LSH_ERR_INVALID_STATE;
645 }
646 if (remain_msg_bit > 0){
647 return LSH_ERR_INVALID_DATABITLEN;
648 }
649
650 if (databytelen + remain_msg_byte < LSH512_MSG_BLK_BYTE_LEN){
651 std::memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
652 ctx->remain_databitlen += (lsh_uint)databitlen;
653 remain_msg_byte += (lsh_uint)databytelen;
654 if (pos2){
655 ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
656 }
657 return LSH_SUCCESS;
658 }
659
660 if (remain_msg_byte > 0){
661 size_t more_byte = LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte;
662 std::memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
663 compress(ctx, ctx->last_block);
664 data += more_byte;
665 databytelen -= more_byte;
666 remain_msg_byte = 0;
667 ctx->remain_databitlen = 0;
668 }
669
670 while (databytelen >= LSH512_MSG_BLK_BYTE_LEN)
671 {
672 // This call to compress caused some trouble.
673 // The data pointer can become unaligned in the
674 // previous block.
675 compress(ctx, data);
676 data += LSH512_MSG_BLK_BYTE_LEN;
677 databytelen -= LSH512_MSG_BLK_BYTE_LEN;
678 }
679
680 if (databytelen > 0){
681 std::memcpy(ctx->last_block, data, databytelen);
682 ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
683 }
684
685 if (pos2){
686 ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
687 ctx->remain_databitlen += pos2;
688 }
689 return LSH_SUCCESS;
690}
691
692lsh_err lsh512_final_avx2(LSH512_AVX2_Context* ctx, lsh_u8* hashval)
693{
694 CRYPTOPP_ASSERT(ctx != NULLPTR);
695 CRYPTOPP_ASSERT(hashval != NULLPTR);
696
697 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
698 AVX_Cleanup cleanup;
699
700 // We are byte oriented. tail bits will always be 0.
701 size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
702 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
703 const size_t remain_msg_bit = 0;
704
705 if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
706 return LSH_ERR_INVALID_STATE;
707 }
708
709 if (remain_msg_bit){
710 ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
711 }
712 else{
713 ctx->last_block[remain_msg_byte] = 0x80;
714 }
715 std::memset(ctx->last_block + remain_msg_byte + 1, 0, LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
716
717 compress(ctx, ctx->last_block);
718
719 fin(ctx);
720 get_hash(ctx, hashval);
721
722 return LSH_SUCCESS;
723}
724
725ANONYMOUS_NAMESPACE_END
726
727NAMESPACE_BEGIN(CryptoPP)
728
729extern
730void LSH512_Base_Restart_AVX2(word64* state)
731{
732 state[RemainingBits] = 0;
733 LSH512_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
734 lsh_err err = lsh512_init_avx2(&ctx);
735
736 if (err != LSH_SUCCESS)
737 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_init_avx2 failed");
738}
739
740extern
741void LSH512_Base_Update_AVX2(word64* state, const byte *input, size_t size)
742{
743 LSH512_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
744 lsh_err err = lsh512_update_avx2(&ctx, input, 8*size);
745
746 if (err != LSH_SUCCESS)
747 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_update_avx2 failed");
748}
749
750extern
751void LSH512_Base_TruncatedFinal_AVX2(word64* state, byte *hash, size_t)
752{
753 LSH512_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
754 lsh_err err = lsh512_final_avx2(&ctx, hash);
755
756 if (err != LSH_SUCCESS)
757 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_final_avx2 failed");
758}
759
760NAMESPACE_END
761
762#endif // CRYPTOPP_AVX2_AVAILABLE
Base class for all exceptions thrown by the library.
Definition cryptlib.h:164
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition cryptlib.h:182
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition config_int.h:66
unsigned int word32
32-bit unsigned datatype
Definition config_int.h:72
unsigned long long word64
64-bit unsigned datatype
Definition config_int.h:101
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition cryptlib.h:150
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition misc.h:2417
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition misc.h:1808
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition trap.h:68