Crypto++ 8.9
Free C++ class library of cryptographic schemes
lsh256_sse.cpp
1// lsh.cpp - written and placed in the public domain by Jeffrey Walton
2// Based on the specification and source code provided by
3// Korea Internet & Security Agency (KISA) website. Also
4// see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5// and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6
7// We are hitting some sort of GCC bug in the LSH AVX2 code path.
8// Clang is OK on the AVX2 code path. We believe it is GCC Issue
9// 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10// makes using zeroupper a little tricky.
11
12#include "pch.h"
13#include "config.h"
14
15#include "lsh.h"
16#include "cpu.h"
17#include "misc.h"
18
19// Squash MS LNK4221 and libtool warnings
20extern const char LSH256_SSE_FNAME[] = __FILE__;
21
22#if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
23
24#if defined(CRYPTOPP_SSSE3_AVAILABLE)
25# include <emmintrin.h>
26# include <tmmintrin.h>
27#endif
28
29#if defined(CRYPTOPP_XOP_AVAILABLE)
30# include <ammintrin.h>
31#endif
32
33#if defined(CRYPTOPP_GCC_COMPATIBLE)
34# include <x86intrin.h>
35#endif
36
37ANONYMOUS_NAMESPACE_BEGIN
38
39/* LSH Constants */
40
41const unsigned int LSH256_MSG_BLK_BYTE_LEN = 128;
42// const unsigned int LSH256_MSG_BLK_BIT_LEN = 1024;
43// const unsigned int LSH256_CV_BYTE_LEN = 64;
44const unsigned int LSH256_HASH_VAL_MAX_BYTE_LEN = 32;
45
46// const unsigned int MSG_BLK_WORD_LEN = 32;
47const unsigned int CV_WORD_LEN = 16;
48const unsigned int CONST_WORD_LEN = 8;
49// const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
50// const unsigned int WORD_BIT_LEN = 32;
51const unsigned int NUM_STEPS = 26;
52
53const unsigned int ROT_EVEN_ALPHA = 29;
54const unsigned int ROT_EVEN_BETA = 1;
55const unsigned int ROT_ODD_ALPHA = 5;
56const unsigned int ROT_ODD_BETA = 17;
57
58const unsigned int LSH_TYPE_256_256 = 0x0000020;
59const unsigned int LSH_TYPE_256_224 = 0x000001C;
60
61// const unsigned int LSH_TYPE_224 = LSH_TYPE_256_224;
62// const unsigned int LSH_TYPE_256 = LSH_TYPE_256_256;
63
64/* Error Code */
65
66const unsigned int LSH_SUCCESS = 0x0;
67// const unsigned int LSH_ERR_NULL_PTR = 0x2401;
68// const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
69const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
70const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
71
72/* Index into our state array */
73
74const unsigned int AlgorithmType = 80;
75const unsigned int RemainingBits = 81;
76
77NAMESPACE_END
78
79NAMESPACE_BEGIN(CryptoPP)
80NAMESPACE_BEGIN(LSH)
81
82// lsh256.cpp
83extern const word32 LSH256_IV224[CV_WORD_LEN];
84extern const word32 LSH256_IV256[CV_WORD_LEN];
85extern const word32 LSH256_StepConstants[CONST_WORD_LEN * NUM_STEPS];
86
87NAMESPACE_END // LSH
88NAMESPACE_END // Crypto++
89
90ANONYMOUS_NAMESPACE_BEGIN
91
92using CryptoPP::byte;
93using CryptoPP::word32;
94using CryptoPP::rotlFixed;
95using CryptoPP::rotlConstant;
96
97using CryptoPP::GetBlock;
98using CryptoPP::LittleEndian;
99using CryptoPP::ConditionalByteReverse;
100using CryptoPP::LITTLE_ENDIAN_ORDER;
101
102typedef byte lsh_u8;
103typedef word32 lsh_u32;
104typedef word32 lsh_uint;
105typedef word32 lsh_err;
106typedef word32 lsh_type;
107
108using CryptoPP::LSH::LSH256_IV224;
109using CryptoPP::LSH::LSH256_IV256;
110using CryptoPP::LSH::LSH256_StepConstants;
111
112struct LSH256_SSSE3_Context
113{
114 LSH256_SSSE3_Context(word32* state, word32 algType, word32& remainingBitLength) :
115 cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
116 last_block(reinterpret_cast<byte*>(state+48)),
117 remain_databitlen(remainingBitLength),
118 alg_type(static_cast<lsh_type>(algType)) {}
119
120 lsh_u32* cv_l; // start of our state block
121 lsh_u32* cv_r;
122 lsh_u32* sub_msgs;
123 lsh_u8* last_block;
124 lsh_u32& remain_databitlen;
125 lsh_type alg_type;
126};
127
128struct LSH256_SSSE3_Internal
129{
130 LSH256_SSSE3_Internal(word32* state) :
131 submsg_e_l(state+16), submsg_e_r(state+24),
132 submsg_o_l(state+32), submsg_o_r(state+40) { }
133
134 lsh_u32* submsg_e_l; /* even left sub-message */
135 lsh_u32* submsg_e_r; /* even right sub-message */
136 lsh_u32* submsg_o_l; /* odd left sub-message */
137 lsh_u32* submsg_o_r; /* odd right sub-message */
138};
139
140// const word32 g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
141
142/* LSH AlgType Macro */
143
144inline bool LSH_IS_LSH512(lsh_uint val) {
145 return (val & 0xf0000) == 0;
146}
147
148inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
149 return val >> 24;
150}
151
152inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
153 return val & 0xffff;
154}
155
156inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
157 return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
158}
159
160inline lsh_u32 loadLE32(lsh_u32 v) {
162}
163
164lsh_u32 ROTL(lsh_u32 x, lsh_u32 r) {
165 return rotlFixed(x, r);
166}
167
168// Original code relied upon unaligned lsh_u32 buffer
169inline void load_msg_blk(LSH256_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])
170{
171 CRYPTOPP_ASSERT(i_state != NULLPTR);
172 lsh_u32* submsg_e_l = i_state->submsg_e_l;
173 lsh_u32* submsg_e_r = i_state->submsg_e_r;
174 lsh_u32* submsg_o_l = i_state->submsg_o_l;
175 lsh_u32* submsg_o_r = i_state->submsg_o_r;
176
177 _mm_storeu_si128(M128_CAST(submsg_e_l+0),
178 _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
179 _mm_storeu_si128(M128_CAST(submsg_e_l+4),
180 _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
181 _mm_storeu_si128(M128_CAST(submsg_e_r+0),
182 _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
183 _mm_storeu_si128(M128_CAST(submsg_e_r+4),
184 _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
185 _mm_storeu_si128(M128_CAST(submsg_o_l+0),
186 _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
187 _mm_storeu_si128(M128_CAST(submsg_o_l+4),
188 _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
189 _mm_storeu_si128(M128_CAST(submsg_o_r+0),
190 _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
191 _mm_storeu_si128(M128_CAST(submsg_o_r+4),
192 _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
193}
194
195inline void msg_exp_even(LSH256_SSSE3_Internal* i_state)
196{
197 CRYPTOPP_ASSERT(i_state != NULLPTR);
198
199 lsh_u32* submsg_e_l = i_state->submsg_e_l;
200 lsh_u32* submsg_e_r = i_state->submsg_e_r;
201 lsh_u32* submsg_o_l = i_state->submsg_o_l;
202 lsh_u32* submsg_o_r = i_state->submsg_o_r;
203
204 _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi32(
205 _mm_shuffle_epi32(
206 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(3,2,1,0)),
207 _mm_shuffle_epi32(
208 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(1,0,2,3))));
209
210 _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi32(
211 _mm_shuffle_epi32(
212 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(3,2,1,0)),
213 _mm_shuffle_epi32(
214 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(2,1,0,3))));
215
216 _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi32(
217 _mm_shuffle_epi32(
218 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(3,2,1,0)),
219 _mm_shuffle_epi32(
220 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(1,0,2,3))));
221
222 _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi32(
223 _mm_shuffle_epi32(
224 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(3,2,1,0)),
225 _mm_shuffle_epi32(
226 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(2,1,0,3))));
227}
228
229inline void msg_exp_odd(LSH256_SSSE3_Internal* i_state)
230{
231 CRYPTOPP_ASSERT(i_state != NULLPTR);
232
233 lsh_u32* submsg_e_l = i_state->submsg_e_l;
234 lsh_u32* submsg_e_r = i_state->submsg_e_r;
235 lsh_u32* submsg_o_l = i_state->submsg_o_l;
236 lsh_u32* submsg_o_r = i_state->submsg_o_r;
237
238 _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi32(
239 _mm_shuffle_epi32(
240 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(3,2,1,0)),
241 _mm_shuffle_epi32(
242 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(1,0,2,3))));
243
244 _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi32(
245 _mm_shuffle_epi32(
246 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(3,2,1,0)),
247 _mm_shuffle_epi32(
248 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(2,1,0,3))));
249
250 _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi32(
251 _mm_shuffle_epi32(
252 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(3,2,1,0)),
253 _mm_shuffle_epi32(
254 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(1,0,2,3))));
255
256 _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi32(
257 _mm_shuffle_epi32(
258 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(3,2,1,0)),
259 _mm_shuffle_epi32(
260 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(2,1,0,3))));
261}
262
263inline void load_sc(const lsh_u32** p_const_v, size_t i)
264{
265 CRYPTOPP_ASSERT(p_const_v != NULLPTR);
266
267 *p_const_v = &LSH256_StepConstants[i];
268}
269
270inline void msg_add_even(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
271{
272 CRYPTOPP_ASSERT(i_state != NULLPTR);
273
274 lsh_u32* submsg_e_l = i_state->submsg_e_l;
275 lsh_u32* submsg_e_r = i_state->submsg_e_r;
276
277 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_xor_si128(
278 _mm_loadu_si128(CONST_M128_CAST(cv_l+0)),
279 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
280 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
281 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
282 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
283 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_xor_si128(
284 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
285 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
286 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
287 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
288 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
289}
290
291inline void msg_add_odd(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
292{
293 CRYPTOPP_ASSERT(i_state != NULLPTR);
294
295 lsh_u32* submsg_o_l = i_state->submsg_o_l;
296 lsh_u32* submsg_o_r = i_state->submsg_o_r;
297
298 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
299 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
300 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
301 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
302 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
303 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
304 _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
305 _mm_loadu_si128(CONST_M128_CAST(cv_r)),
306 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
307 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
308 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
309 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
310}
311
312inline void add_blk(lsh_u32 cv_l[8], const lsh_u32 cv_r[8])
313{
314 _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi32(
315 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
316 _mm_loadu_si128(CONST_M128_CAST(cv_r))));
317 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi32(
318 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
319 _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
320}
321
322template <unsigned int R>
323inline void rotate_blk(lsh_u32 cv[8])
324{
325#if defined(CRYPTOPP_XOP_AVAILABLE)
326 _mm_storeu_si128(M128_CAST(cv),
327 _mm_roti_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
328 _mm_storeu_si128(M128_CAST(cv+4),
329 _mm_roti_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
330#else
331 _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
332 _mm_slli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
333 _mm_srli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), 32-R)));
334 _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
335 _mm_slli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
336 _mm_srli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 32-R)));
337#endif
338}
339
340inline void xor_with_const(lsh_u32* cv_l, const lsh_u32* const_v)
341{
342 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
343 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
344 _mm_loadu_si128(CONST_M128_CAST(const_v))));
345 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
346 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
347 _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
348}
349
350inline void rotate_msg_gamma(lsh_u32 cv_r[8])
351{
352 // g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
353 _mm_storeu_si128(M128_CAST(cv_r+0),
354 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
355 _mm_set_epi8(12,15,14,13, 9,8,11,10, 6,5,4,7, 3,2,1,0)));
356 _mm_storeu_si128(M128_CAST(cv_r+4),
357 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
358 _mm_set_epi8(15,14,13,12, 10,9,8,11, 5,4,7,6, 0,3,2,1)));
359}
360
361inline void word_perm(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
362{
363 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_shuffle_epi32(
364 _mm_loadu_si128(CONST_M128_CAST(cv_l+0)), _MM_SHUFFLE(3,1,0,2)));
365 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_shuffle_epi32(
366 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)), _MM_SHUFFLE(3,1,0,2)));
367 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_shuffle_epi32(
368 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)), _MM_SHUFFLE(1,2,3,0)));
369 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_shuffle_epi32(
370 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)), _MM_SHUFFLE(1,2,3,0)));
371
372 __m128i temp = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
373 _mm_storeu_si128(M128_CAST(cv_l+0),
374 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
375 _mm_storeu_si128(M128_CAST(cv_l+4),
376 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
377 _mm_storeu_si128(M128_CAST(cv_r+4),
378 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
379 _mm_storeu_si128(M128_CAST(cv_r+0), temp);
380}
381
382/* -------------------------------------------------------- *
383* step function
384* -------------------------------------------------------- */
385
386template <unsigned int Alpha, unsigned int Beta>
387inline void mix(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 const_v[8])
388{
389 add_blk(cv_l, cv_r);
390 rotate_blk<Alpha>(cv_l);
391 xor_with_const(cv_l, const_v);
392 add_blk(cv_r, cv_l);
393 rotate_blk<Beta>(cv_r);
394 add_blk(cv_l, cv_r);
395 rotate_msg_gamma(cv_r);
396}
397
398/* -------------------------------------------------------- *
399* compression function
400* -------------------------------------------------------- */
401
402inline void compress(LSH256_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])
403{
404 CRYPTOPP_ASSERT(ctx != NULLPTR);
405
406 LSH256_SSSE3_Internal s_state(ctx->cv_l);
407 LSH256_SSSE3_Internal* i_state = &s_state;
408
409 const lsh_u32* const_v = NULL;
410 lsh_u32* cv_l = ctx->cv_l;
411 lsh_u32* cv_r = ctx->cv_r;
412
413 load_msg_blk(i_state, pdMsgBlk);
414
415 msg_add_even(cv_l, cv_r, i_state);
416 load_sc(&const_v, 0);
417 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
418 word_perm(cv_l, cv_r);
419
420 msg_add_odd(cv_l, cv_r, i_state);
421 load_sc(&const_v, 8);
422 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
423 word_perm(cv_l, cv_r);
424
425 for (size_t i = 1; i < NUM_STEPS / 2; i++)
426 {
427 msg_exp_even(i_state);
428 msg_add_even(cv_l, cv_r, i_state);
429 load_sc(&const_v, 16 * i);
430 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
431 word_perm(cv_l, cv_r);
432
433 msg_exp_odd(i_state);
434 msg_add_odd(cv_l, cv_r, i_state);
435 load_sc(&const_v, 16 * i + 8);
436 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
437 word_perm(cv_l, cv_r);
438 }
439
440 msg_exp_even(i_state);
441 msg_add_even(cv_l, cv_r, i_state);
442}
443
444/* -------------------------------------------------------- */
445
446inline void load_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 iv[16])
447{
448 _mm_storeu_si128(M128_CAST(cv_l+ 0),
449 _mm_load_si128(CONST_M128_CAST(iv+ 0)));
450 _mm_storeu_si128(M128_CAST(cv_l+ 4),
451 _mm_load_si128(CONST_M128_CAST(iv+ 4)));
452 _mm_storeu_si128(M128_CAST(cv_r+ 0),
453 _mm_load_si128(CONST_M128_CAST(iv+ 8)));
454 _mm_storeu_si128(M128_CAST(cv_r+ 4),
455 _mm_load_si128(CONST_M128_CAST(iv+12)));
456}
457
458inline void zero_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
459{
460 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
461 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
462 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
463 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
464}
465
466inline void zero_submsgs(LSH256_SSSE3_Context* ctx)
467{
468 lsh_u32* sub_msgs = ctx->sub_msgs;
469
470 _mm_storeu_si128(M128_CAST(sub_msgs+ 0), _mm_setzero_si128());
471 _mm_storeu_si128(M128_CAST(sub_msgs+ 4), _mm_setzero_si128());
472 _mm_storeu_si128(M128_CAST(sub_msgs+ 8), _mm_setzero_si128());
473 _mm_storeu_si128(M128_CAST(sub_msgs+12), _mm_setzero_si128());
474 _mm_storeu_si128(M128_CAST(sub_msgs+16), _mm_setzero_si128());
475 _mm_storeu_si128(M128_CAST(sub_msgs+20), _mm_setzero_si128());
476 _mm_storeu_si128(M128_CAST(sub_msgs+24), _mm_setzero_si128());
477 _mm_storeu_si128(M128_CAST(sub_msgs+28), _mm_setzero_si128());
478}
479
480inline void init224(LSH256_SSSE3_Context* ctx)
481{
482 CRYPTOPP_ASSERT(ctx != NULLPTR);
483
484 zero_submsgs(ctx);
485 load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV224);
486}
487
488inline void init256(LSH256_SSSE3_Context* ctx)
489{
490 CRYPTOPP_ASSERT(ctx != NULLPTR);
491
492 zero_submsgs(ctx);
493 load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV256);
494}
495
496/* -------------------------------------------------------- */
497
498inline void fin(LSH256_SSSE3_Context* ctx)
499{
500 CRYPTOPP_ASSERT(ctx != NULLPTR);
501
502 _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
503 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
504 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
505 _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
506 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
507 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
508}
509
510/* -------------------------------------------------------- */
511
512inline void get_hash(LSH256_SSSE3_Context* ctx, lsh_u8* pbHashVal)
513{
514 CRYPTOPP_ASSERT(ctx != NULLPTR);
515 CRYPTOPP_ASSERT(ctx->alg_type != 0);
516 CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
517
518 lsh_uint alg_type = ctx->alg_type;
519 lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
520 lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
521
522 // Multiplying by sizeof(lsh_u8) looks odd...
523 std::memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
524 if (hash_val_bit_len){
525 pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
526 }
527}
528
529/* -------------------------------------------------------- */
530
531lsh_err lsh256_ssse3_init(LSH256_SSSE3_Context* ctx)
532{
533 CRYPTOPP_ASSERT(ctx != NULLPTR);
534 CRYPTOPP_ASSERT(ctx->alg_type != 0);
535
536 lsh_u32 alg_type = ctx->alg_type;
537 const lsh_u32* const_v = NULL;
538 ctx->remain_databitlen = 0;
539
540 switch (alg_type)
541 {
542 case LSH_TYPE_256_256:
543 init256(ctx);
544 return LSH_SUCCESS;
545 case LSH_TYPE_256_224:
546 init224(ctx);
547 return LSH_SUCCESS;
548 default:
549 break;
550 }
551
552 lsh_u32* cv_l = ctx->cv_l;
553 lsh_u32* cv_r = ctx->cv_r;
554
555 zero_iv(cv_l, cv_r);
556 cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN;
557 cv_l[1] = LSH_GET_HASHBIT(alg_type);
558
559 for (size_t i = 0; i < NUM_STEPS / 2; i++)
560 {
561 //Mix
562 load_sc(&const_v, i * 16);
563 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
564 word_perm(cv_l, cv_r);
565
566 load_sc(&const_v, i * 16 + 8);
567 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
568 word_perm(cv_l, cv_r);
569 }
570
571 return LSH_SUCCESS;
572}
573
574lsh_err lsh256_ssse3_update(LSH256_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
575{
576 CRYPTOPP_ASSERT(ctx != NULLPTR);
577 CRYPTOPP_ASSERT(data != NULLPTR);
578 CRYPTOPP_ASSERT(databitlen % 8 == 0);
579 CRYPTOPP_ASSERT(ctx->alg_type != 0);
580
581 if (databitlen == 0){
582 return LSH_SUCCESS;
583 }
584
585 // We are byte oriented. tail bits will always be 0.
586 size_t databytelen = databitlen >> 3;
587 // lsh_uint pos2 = databitlen & 0x7;
588 const size_t pos2 = 0;
589
590 size_t remain_msg_byte = ctx->remain_databitlen >> 3;
591 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
592 const size_t remain_msg_bit = 0;
593
594 if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
595 return LSH_ERR_INVALID_STATE;
596 }
597 if (remain_msg_bit > 0){
598 return LSH_ERR_INVALID_DATABITLEN;
599 }
600
601 if (databytelen + remain_msg_byte < LSH256_MSG_BLK_BYTE_LEN)
602 {
603 std::memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
604 ctx->remain_databitlen += (lsh_uint)databitlen;
605 remain_msg_byte += (lsh_uint)databytelen;
606 if (pos2){
607 ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
608 }
609 return LSH_SUCCESS;
610 }
611
612 if (remain_msg_byte > 0){
613 size_t more_byte = LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte;
614 std::memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
615 compress(ctx, ctx->last_block);
616 data += more_byte;
617 databytelen -= more_byte;
618 remain_msg_byte = 0;
619 ctx->remain_databitlen = 0;
620 }
621
622 while (databytelen >= LSH256_MSG_BLK_BYTE_LEN)
623 {
624 // This call to compress caused some trouble.
625 // The data pointer can become unaligned in the
626 // previous block.
627 compress(ctx, data);
628 data += LSH256_MSG_BLK_BYTE_LEN;
629 databytelen -= LSH256_MSG_BLK_BYTE_LEN;
630 }
631
632 if (databytelen > 0){
633 std::memcpy(ctx->last_block, data, databytelen);
634 ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
635 }
636
637 if (pos2){
638 ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
639 ctx->remain_databitlen += pos2;
640 }
641
642 return LSH_SUCCESS;
643}
644
645lsh_err lsh256_ssse3_final(LSH256_SSSE3_Context* ctx, lsh_u8* hashval)
646{
647 CRYPTOPP_ASSERT(ctx != NULLPTR);
648 CRYPTOPP_ASSERT(hashval != NULLPTR);
649
650 // We are byte oriented. tail bits will always be 0.
651 size_t remain_msg_byte = ctx->remain_databitlen >> 3;
652 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
653 const size_t remain_msg_bit = 0;
654
655 if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
656 return LSH_ERR_INVALID_STATE;
657 }
658
659 if (remain_msg_bit){
660 ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
661 }
662 else{
663 ctx->last_block[remain_msg_byte] = 0x80;
664 }
665 std::memset(ctx->last_block + remain_msg_byte + 1, 0, LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
666
667 compress(ctx, ctx->last_block);
668
669 fin(ctx);
670 get_hash(ctx, hashval);
671
672 return LSH_SUCCESS;
673}
674
675ANONYMOUS_NAMESPACE_END // Anonymous
676
677NAMESPACE_BEGIN(CryptoPP)
678
679extern
680void LSH256_Base_Restart_SSSE3(word32* state)
681{
682 state[RemainingBits] = 0;
683 LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
684 lsh_err err = lsh256_ssse3_init(&ctx);
685
686 if (err != LSH_SUCCESS)
687 throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_init failed");
688}
689
690extern
691void LSH256_Base_Update_SSSE3(word32* state, const byte *input, size_t size)
692{
693 LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
694 lsh_err err = lsh256_ssse3_update(&ctx, input, 8*size);
695
696 if (err != LSH_SUCCESS)
697 throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_update failed");
698}
699
700extern
701void LSH256_Base_TruncatedFinal_SSSE3(word32* state, byte *hash, size_t)
702{
703 LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
704 lsh_err err = lsh256_ssse3_final(&ctx, hash);
705
706 if (err != LSH_SUCCESS)
707 throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_final failed");
708}
709
710NAMESPACE_END
711
712#endif // CRYPTOPP_SSSE3_AVAILABLE
#define M128_CAST(x)
Clang workaround.
Definition adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition adv_simd.h:614
Base class for all exceptions thrown by the library.
Definition cryptlib.h:164
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition cryptlib.h:182
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition config_int.h:66
unsigned int word32
32-bit unsigned datatype
Definition config_int.h:72
Functions for CPU features and intrinsics.
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition cryptlib.h:150
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition misc.h:2417
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition misc.h:1808
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition trap.h:68