Crypto++ 8.9
Free C++ class library of cryptographic schemes
lsh512_sse.cpp
1// lsh.cpp - written and placed in the public domain by Jeffrey Walton
2// Based on the specification and source code provided by
3// Korea Internet & Security Agency (KISA) website. Also
4// see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5// and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6
7// We are hitting some sort of GCC bug in the LSH AVX2 code path.
8// Clang is OK on the AVX2 code path. We believe it is GCC Issue
9// 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10// makes using zeroupper a little tricky.
11
12#include "pch.h"
13#include "config.h"
14
15#include "lsh.h"
16#include "misc.h"
17
18// Squash MS LNK4221 and libtool warnings
19extern const char LSH512_SSE_FNAME[] = __FILE__;
20
21#if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
22
23#if defined(CRYPTOPP_SSSE3_AVAILABLE)
24# include <emmintrin.h>
25# include <tmmintrin.h>
26#endif
27
28#if defined(CRYPTOPP_XOP_AVAILABLE)
29# include <ammintrin.h>
30#endif
31
32#if defined(CRYPTOPP_GCC_COMPATIBLE)
33# include <x86intrin.h>
34#endif
35
36ANONYMOUS_NAMESPACE_BEGIN
37
38/* LSH Constants */
39
40const unsigned int LSH512_MSG_BLK_BYTE_LEN = 256;
41// const unsigned int LSH512_MSG_BLK_BIT_LEN = 2048;
42// const unsigned int LSH512_CV_BYTE_LEN = 128;
43const unsigned int LSH512_HASH_VAL_MAX_BYTE_LEN = 64;
44
45// const unsigned int MSG_BLK_WORD_LEN = 32;
46const unsigned int CV_WORD_LEN = 16;
47const unsigned int CONST_WORD_LEN = 8;
48// const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
49const unsigned int NUM_STEPS = 28;
50
51const unsigned int ROT_EVEN_ALPHA = 23;
52const unsigned int ROT_EVEN_BETA = 59;
53const unsigned int ROT_ODD_ALPHA = 7;
54const unsigned int ROT_ODD_BETA = 3;
55
56const unsigned int LSH_TYPE_512_512 = 0x0010040;
57const unsigned int LSH_TYPE_512_384 = 0x0010030;
58const unsigned int LSH_TYPE_512_256 = 0x0010020;
59const unsigned int LSH_TYPE_512_224 = 0x001001C;
60
61// const unsigned int LSH_TYPE_384 = LSH_TYPE_512_384;
62// const unsigned int LSH_TYPE_512 = LSH_TYPE_512_512;
63
64/* Error Code */
65
66const unsigned int LSH_SUCCESS = 0x0;
67// const unsigned int LSH_ERR_NULL_PTR = 0x2401;
68// const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
69const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
70const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
71
72/* Index into our state array */
73
74const unsigned int AlgorithmType = 80;
75const unsigned int RemainingBits = 81;
76
77NAMESPACE_END
78
79NAMESPACE_BEGIN(CryptoPP)
80NAMESPACE_BEGIN(LSH)
81
82// lsh512.cpp
83extern const word64 LSH512_IV224[CV_WORD_LEN];
84extern const word64 LSH512_IV256[CV_WORD_LEN];
85extern const word64 LSH512_IV384[CV_WORD_LEN];
86extern const word64 LSH512_IV512[CV_WORD_LEN];
87extern const word64 LSH512_StepConstants[CONST_WORD_LEN * NUM_STEPS];
88
89NAMESPACE_END // LSH
90NAMESPACE_END // Crypto++
91
92ANONYMOUS_NAMESPACE_BEGIN
93
94using CryptoPP::byte;
95using CryptoPP::word32;
96using CryptoPP::word64;
97using CryptoPP::rotlFixed;
98using CryptoPP::rotlConstant;
99
100using CryptoPP::GetBlock;
101using CryptoPP::LittleEndian;
102using CryptoPP::ConditionalByteReverse;
103using CryptoPP::LITTLE_ENDIAN_ORDER;
104
105using CryptoPP::LSH::LSH512_IV224;
106using CryptoPP::LSH::LSH512_IV256;
107using CryptoPP::LSH::LSH512_IV384;
108using CryptoPP::LSH::LSH512_IV512;
109using CryptoPP::LSH::LSH512_StepConstants;
110
111typedef byte lsh_u8;
112typedef word32 lsh_u32;
113typedef word64 lsh_u64;
114typedef word32 lsh_uint;
115typedef word32 lsh_err;
116typedef word32 lsh_type;
117
118struct LSH512_SSSE3_Context
119{
120 LSH512_SSSE3_Context(word64* state, word64 algType, word64& remainingBitLength) :
121 cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
122 last_block(reinterpret_cast<byte*>(state+48)),
123 remain_databitlen(remainingBitLength),
124 alg_type(static_cast<lsh_type>(algType)) {}
125
126 lsh_u64* cv_l; // start of our state block
127 lsh_u64* cv_r;
128 lsh_u64* sub_msgs;
129 lsh_u8* last_block;
130 lsh_u64& remain_databitlen;
131 lsh_type alg_type;
132};
133
134struct LSH512_SSSE3_Internal
135{
136 LSH512_SSSE3_Internal(word64* state) :
137 submsg_e_l(state+16), submsg_e_r(state+24),
138 submsg_o_l(state+32), submsg_o_r(state+40) { }
139
140 lsh_u64* submsg_e_l; /* even left sub-message */
141 lsh_u64* submsg_e_r; /* even right sub-message */
142 lsh_u64* submsg_o_l; /* odd left sub-message */
143 lsh_u64* submsg_o_r; /* odd right sub-message */
144};
145
146// const lsh_u32 g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
147
148/* LSH AlgType Macro */
149
150inline bool LSH_IS_LSH512(lsh_uint val) {
151 return (val & 0xf0000) == 0x10000;
152}
153
154inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
155 return val >> 24;
156}
157
158inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
159 return val & 0xffff;
160}
161
162inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
163 return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
164}
165
166inline lsh_u64 loadLE64(lsh_u64 v) {
168}
169
170lsh_u64 ROTL64(lsh_u64 x, lsh_u32 r) {
171 return rotlFixed(x, r);
172}
173
174// Original code relied upon unaligned lsh_u64 buffer
175inline void load_msg_blk(LSH512_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH512_MSG_BLK_BYTE_LEN])
176{
177 lsh_u64* submsg_e_l = i_state->submsg_e_l;
178 lsh_u64* submsg_e_r = i_state->submsg_e_r;
179 lsh_u64* submsg_o_l = i_state->submsg_o_l;
180 lsh_u64* submsg_o_r = i_state->submsg_o_r;
181
182 _mm_storeu_si128(M128_CAST(submsg_e_l+0),
183 _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
184 _mm_storeu_si128(M128_CAST(submsg_e_l+2),
185 _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
186 _mm_storeu_si128(M128_CAST(submsg_e_l+4),
187 _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
188 _mm_storeu_si128(M128_CAST(submsg_e_l+6),
189 _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
190
191 _mm_storeu_si128(M128_CAST(submsg_e_r+0),
192 _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
193 _mm_storeu_si128(M128_CAST(submsg_e_r+2),
194 _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
195 _mm_storeu_si128(M128_CAST(submsg_e_r+4),
196 _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
197 _mm_storeu_si128(M128_CAST(submsg_e_r+6),
198 _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
199
200 _mm_storeu_si128(M128_CAST(submsg_o_l+0),
201 _mm_loadu_si128(CONST_M128_CAST(msgblk+128)));
202 _mm_storeu_si128(M128_CAST(submsg_o_l+2),
203 _mm_loadu_si128(CONST_M128_CAST(msgblk+144)));
204 _mm_storeu_si128(M128_CAST(submsg_o_l+4),
205 _mm_loadu_si128(CONST_M128_CAST(msgblk+160)));
206 _mm_storeu_si128(M128_CAST(submsg_o_l+6),
207 _mm_loadu_si128(CONST_M128_CAST(msgblk+176)));
208
209 _mm_storeu_si128(M128_CAST(submsg_o_r+0),
210 _mm_loadu_si128(CONST_M128_CAST(msgblk+192)));
211 _mm_storeu_si128(M128_CAST(submsg_o_r+2),
212 _mm_loadu_si128(CONST_M128_CAST(msgblk+208)));
213 _mm_storeu_si128(M128_CAST(submsg_o_r+4),
214 _mm_loadu_si128(CONST_M128_CAST(msgblk+224)));
215 _mm_storeu_si128(M128_CAST(submsg_o_r+6),
216 _mm_loadu_si128(CONST_M128_CAST(msgblk+240)));
217}
218
219inline void msg_exp_even(LSH512_SSSE3_Internal* i_state)
220{
221 CRYPTOPP_ASSERT(i_state != NULLPTR);
222
223 lsh_u64* submsg_e_l = i_state->submsg_e_l;
224 lsh_u64* submsg_e_r = i_state->submsg_e_r;
225 lsh_u64* submsg_o_l = i_state->submsg_o_l;
226 lsh_u64* submsg_o_r = i_state->submsg_o_r;
227
228 __m128i temp;
229 _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_shuffle_epi32(
230 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)), _MM_SHUFFLE(1,0,3,2)));
231
232 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0));
233 _mm_storeu_si128(M128_CAST(submsg_e_l+0),
234 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)));
235 _mm_storeu_si128(M128_CAST(submsg_e_l+2), temp);
236 _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_shuffle_epi32(
237 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)), _MM_SHUFFLE(1,0,3,2)));
238
239 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4));
240 _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_unpacklo_epi64(
241 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
242 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
243 _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_unpackhi_epi64(
244 temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
245 _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_shuffle_epi32(
246 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)), _MM_SHUFFLE(1,0,3,2)));
247
248 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0));
249 _mm_storeu_si128(M128_CAST(submsg_e_r+0),
250 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)));
251 _mm_storeu_si128(M128_CAST(submsg_e_r+2), temp);
252 _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_shuffle_epi32(
253 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)), _MM_SHUFFLE(1,0,3,2)));
254
255 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4));
256 _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_unpacklo_epi64(
257 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
258 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
259 _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_unpackhi_epi64(
260 temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
261
262 _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi64(
263 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)),
264 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
265 _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_add_epi64(
266 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)),
267 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
268 _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi64(
269 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)),
270 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
271 _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_add_epi64(
272 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
273 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
274
275 _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi64(
276 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)),
277 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
278 _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_add_epi64(
279 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)),
280 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
281 _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi64(
282 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)),
283 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
284 _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_add_epi64(
285 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
286 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
287}
288
289inline void msg_exp_odd(LSH512_SSSE3_Internal* i_state)
290{
291 CRYPTOPP_ASSERT(i_state != NULLPTR);
292
293 lsh_u64* submsg_e_l = i_state->submsg_e_l;
294 lsh_u64* submsg_e_r = i_state->submsg_e_r;
295 lsh_u64* submsg_o_l = i_state->submsg_o_l;
296 lsh_u64* submsg_o_r = i_state->submsg_o_r;
297
298 __m128i temp;
299 _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_shuffle_epi32(
300 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)), _MM_SHUFFLE(1,0,3,2)));
301
302 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0));
303 _mm_storeu_si128(M128_CAST(submsg_o_l+0),
304 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)));
305 _mm_storeu_si128(M128_CAST(submsg_o_l+2), temp);
306 _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_shuffle_epi32(
307 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)), _MM_SHUFFLE(1,0,3,2)));
308
309 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4));
310 _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_unpacklo_epi64(
311 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
312 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
313 _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_unpackhi_epi64(
314 temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
315 _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_shuffle_epi32(
316 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)), _MM_SHUFFLE(1,0,3,2)));
317
318 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0));
319 _mm_storeu_si128(M128_CAST(submsg_o_r+0),
320 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)));
321 _mm_storeu_si128(M128_CAST(submsg_o_r+2), temp);
322 _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_shuffle_epi32(
323 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)), _MM_SHUFFLE(1,0,3,2)));
324
325 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4));
326 _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_unpacklo_epi64(
327 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
328 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
329 _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_unpackhi_epi64(
330 temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
331
332 _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi64(
333 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)),
334 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0))));
335 _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_add_epi64(
336 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)),
337 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
338 _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi64(
339 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)),
340 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
341 _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_add_epi64(
342 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
343 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
344
345 _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi64(
346 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)),
347 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0))));
348 _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_add_epi64(
349 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)),
350 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
351 _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi64(
352 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)),
353 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
354 _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_add_epi64(
355 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
356 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
357}
358
359inline void load_sc(const lsh_u64** p_const_v, size_t i)
360{
361 *p_const_v = &LSH512_StepConstants[i];
362}
363
364inline void msg_add_even(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
365{
366 CRYPTOPP_ASSERT(i_state != NULLPTR);
367
368 lsh_u64* submsg_e_l = i_state->submsg_e_l;
369 lsh_u64* submsg_e_r = i_state->submsg_e_r;
370
371 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
372 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
373 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l))));
374 _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
375 _mm_loadu_si128(CONST_M128_CAST(cv_r)),
376 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r))));
377 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
378 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
379 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
380 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
381 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
382 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
383 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
384 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
385 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
386 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
387 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
388 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
389 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
390 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
391 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
392 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
393 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
394 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
395}
396
397inline void msg_add_odd(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
398{
399 CRYPTOPP_ASSERT(i_state != NULLPTR);
400
401 lsh_u64* submsg_o_l = i_state->submsg_o_l;
402 lsh_u64* submsg_o_r = i_state->submsg_o_r;
403
404 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
405 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
406 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
407 _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
408 _mm_loadu_si128(CONST_M128_CAST(cv_r)),
409 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
410 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
411 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
412 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
413 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
414 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
415 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
416 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
417 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
418 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
419 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
420 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
421 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
422 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
423 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
424 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
425 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
426 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
427 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
428}
429
430inline void add_blk(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
431{
432 _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi64(
433 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
434 _mm_loadu_si128(CONST_M128_CAST(cv_r))));
435 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_add_epi64(
436 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
437 _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
438 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi64(
439 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
440 _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
441 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_add_epi64(
442 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
443 _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
444}
445
446template <unsigned int R>
447inline void rotate_blk(lsh_u64 cv[8])
448{
449#if defined(CRYPTOPP_XOP_AVAILABLE)
450 _mm_storeu_si128(M128_CAST(cv),
451 _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
452 _mm_storeu_si128(M128_CAST(cv+2),
453 _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R));
454 _mm_storeu_si128(M128_CAST(cv+4),
455 _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
456 _mm_storeu_si128(M128_CAST(cv+6),
457 _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R));
458
459#else
460 _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
461 _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
462 _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), 64-R)));
463 _mm_storeu_si128(M128_CAST(cv+2), _mm_or_si128(
464 _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R),
465 _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), 64-R)));
466 _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
467 _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
468 _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 64-R)));
469 _mm_storeu_si128(M128_CAST(cv+6), _mm_or_si128(
470 _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R),
471 _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), 64-R)));
472#endif
473}
474
475inline void xor_with_const(lsh_u64 cv_l[8], const lsh_u64 const_v[8])
476{
477 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
478 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
479 _mm_loadu_si128(CONST_M128_CAST(const_v))));
480 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
481 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
482 _mm_loadu_si128(CONST_M128_CAST(const_v+2))));
483 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
484 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
485 _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
486 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
487 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
488 _mm_loadu_si128(CONST_M128_CAST(const_v+6))));
489}
490
491inline void rotate_msg_gamma(lsh_u64 cv_r[8])
492{
493 // g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
494 _mm_storeu_si128(M128_CAST(cv_r+0),
495 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
496 _mm_set_epi8(13,12,11,10, 9,8,15,14, 7,6,5,4, 3,2,1,0)));
497 _mm_storeu_si128(M128_CAST(cv_r+2),
498 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
499 _mm_set_epi8(9,8,15,14, 13,12,11,10, 3,2,1,0, 7,6,5,4)));
500
501 _mm_storeu_si128(M128_CAST(cv_r+4),
502 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
503 _mm_set_epi8(12,11,10,9, 8,15,14,13, 6,5,4,3, 2,1,0,7)));
504 _mm_storeu_si128(M128_CAST(cv_r+6),
505 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
506 _mm_set_epi8(8,15,14,13, 12,11,10,9, 2,1,0,7, 6,5,4,3)));
507}
508
509inline void word_perm(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
510{
511 __m128i temp[2];
512 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
513 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_unpacklo_epi64(
514 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
515 _mm_loadu_si128(CONST_M128_CAST(cv_l+0))));
516 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_unpackhi_epi64(
517 temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+2))));
518
519 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+4));
520 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_unpacklo_epi64(
521 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
522 _mm_loadu_si128(CONST_M128_CAST(cv_l+4))));
523 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_unpackhi_epi64(
524 temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+6))));
525 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_shuffle_epi32(
526 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), _MM_SHUFFLE(1,0,3,2)));
527
528 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+0));
529 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_unpacklo_epi64(
530 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
531 _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
532 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_unpackhi_epi64(
533 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), temp[0]));
534 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_shuffle_epi32(
535 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), _MM_SHUFFLE(1,0,3,2)));
536
537 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+4));
538 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_unpacklo_epi64(
539 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
540 _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
541 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_unpackhi_epi64(
542 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), temp[0]));
543
544 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
545 temp[1] = _mm_loadu_si128(CONST_M128_CAST(cv_l+2));
546
547 _mm_storeu_si128(M128_CAST(cv_l+0),
548 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
549 _mm_storeu_si128(M128_CAST(cv_l+2),
550 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)));
551 _mm_storeu_si128(M128_CAST(cv_l+4),
552 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
553 _mm_storeu_si128(M128_CAST(cv_l+6),
554 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)));
555 _mm_storeu_si128(M128_CAST(cv_r+4),
556 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
557 _mm_storeu_si128(M128_CAST(cv_r+6),
558 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)));
559
560 _mm_storeu_si128(M128_CAST(cv_r+0), temp[0]);
561 _mm_storeu_si128(M128_CAST(cv_r+2), temp[1]);
562}
563
564/* -------------------------------------------------------- *
565* step function
566* -------------------------------------------------------- */
567
568template <unsigned int Alpha, unsigned int Beta>
569inline void mix(lsh_u64 cv_l[8], lsh_u64 cv_r[8], const lsh_u64 const_v[8])
570{
571 add_blk(cv_l, cv_r);
572 rotate_blk<Alpha>(cv_l);
573 xor_with_const(cv_l, const_v);
574 add_blk(cv_r, cv_l);
575 rotate_blk<Beta>(cv_r);
576 add_blk(cv_l, cv_r);
577 rotate_msg_gamma(cv_r);
578}
579
580/* -------------------------------------------------------- *
581* compression function
582* -------------------------------------------------------- */
583
584inline void compress(LSH512_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH512_MSG_BLK_BYTE_LEN])
585{
586 CRYPTOPP_ASSERT(ctx != NULLPTR);
587
588 LSH512_SSSE3_Internal s_state(ctx->cv_l);
589 LSH512_SSSE3_Internal* i_state = &s_state;
590
591 const lsh_u64* const_v = NULL;
592 lsh_u64 *cv_l = ctx->cv_l;
593 lsh_u64 *cv_r = ctx->cv_r;
594
595 load_msg_blk(i_state, pdMsgBlk);
596
597 msg_add_even(cv_l, cv_r, i_state);
598 load_sc(&const_v, 0);
599 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
600 word_perm(cv_l, cv_r);
601
602 msg_add_odd(cv_l, cv_r, i_state);
603 load_sc(&const_v, 8);
604 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
605 word_perm(cv_l, cv_r);
606
607 for (size_t i = 1; i < NUM_STEPS / 2; i++)
608 {
609 msg_exp_even(i_state);
610 msg_add_even(cv_l, cv_r, i_state);
611 load_sc(&const_v, 16 * i);
612 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
613 word_perm(cv_l, cv_r);
614
615 msg_exp_odd(i_state);
616 msg_add_odd(cv_l, cv_r, i_state);
617 load_sc(&const_v, 16 * i + 8);
618 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
619 word_perm(cv_l, cv_r);
620 }
621
622 msg_exp_even(i_state);
623 msg_add_even(cv_l, cv_r, i_state);
624}
625
626/* -------------------------------------------------------- */
627
628inline void load_iv(word64 cv_l[8], word64 cv_r[8], const word64 iv[16])
629{
630 // The IV's are 32-byte aligned so we can use aligned loads.
631 _mm_storeu_si128(M128_CAST(cv_l+0),
632 _mm_load_si128(CONST_M128_CAST(iv+0)));
633 _mm_storeu_si128(M128_CAST(cv_l+2),
634 _mm_load_si128(CONST_M128_CAST(iv+2)));
635 _mm_storeu_si128(M128_CAST(cv_l+4),
636 _mm_load_si128(CONST_M128_CAST(iv+4)));
637 _mm_storeu_si128(M128_CAST(cv_l+6),
638 _mm_load_si128(CONST_M128_CAST(iv+6)));
639 _mm_storeu_si128(M128_CAST(cv_r+0),
640 _mm_load_si128(CONST_M128_CAST(iv+8)));
641 _mm_storeu_si128(M128_CAST(cv_r+2),
642 _mm_load_si128(CONST_M128_CAST(iv+10)));
643 _mm_storeu_si128(M128_CAST(cv_r+4),
644 _mm_load_si128(CONST_M128_CAST(iv+12)));
645 _mm_storeu_si128(M128_CAST(cv_r+6),
646 _mm_load_si128(CONST_M128_CAST(iv+14)));
647}
648
649inline void zero_iv(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
650{
651 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
652 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_setzero_si128());
653 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
654 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_setzero_si128());
655 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
656 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_setzero_si128());
657 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
658 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_setzero_si128());
659}
660
661inline void zero_submsgs(LSH512_SSSE3_Context* ctx)
662{
663 lsh_u64* sub_msgs = ctx->sub_msgs;
664
665 _mm_storeu_si128(M128_CAST(sub_msgs+ 0),
666 _mm_setzero_si128());
667 _mm_storeu_si128(M128_CAST(sub_msgs+ 2),
668 _mm_setzero_si128());
669 _mm_storeu_si128(M128_CAST(sub_msgs+ 4),
670 _mm_setzero_si128());
671 _mm_storeu_si128(M128_CAST(sub_msgs+ 6),
672 _mm_setzero_si128());
673 _mm_storeu_si128(M128_CAST(sub_msgs+ 8),
674 _mm_setzero_si128());
675 _mm_storeu_si128(M128_CAST(sub_msgs+10),
676 _mm_setzero_si128());
677 _mm_storeu_si128(M128_CAST(sub_msgs+12),
678 _mm_setzero_si128());
679 _mm_storeu_si128(M128_CAST(sub_msgs+14),
680 _mm_setzero_si128());
681}
682
683inline void init224(LSH512_SSSE3_Context* ctx)
684{
685 CRYPTOPP_ASSERT(ctx != NULLPTR);
686
687 zero_submsgs(ctx);
688 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV224);
689}
690
691inline void init256(LSH512_SSSE3_Context* ctx)
692{
693 CRYPTOPP_ASSERT(ctx != NULLPTR);
694
695 zero_submsgs(ctx);
696 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV256);
697}
698
699inline void init384(LSH512_SSSE3_Context* ctx)
700{
701 CRYPTOPP_ASSERT(ctx != NULLPTR);
702
703 zero_submsgs(ctx);
704 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV384);
705}
706
707inline void init512(LSH512_SSSE3_Context* ctx)
708{
709 CRYPTOPP_ASSERT(ctx != NULLPTR);
710
711 zero_submsgs(ctx);
712 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV512);
713}
714
715/* -------------------------------------------------------- */
716
717inline void fin(LSH512_SSSE3_Context* ctx)
718{
719 CRYPTOPP_ASSERT(ctx != NULLPTR);
720
721 _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
722 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
723 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
724 _mm_storeu_si128(M128_CAST(ctx->cv_l+2), _mm_xor_si128(
725 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+2)),
726 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+2))));
727 _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
728 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
729 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
730 _mm_storeu_si128(M128_CAST(ctx->cv_l+6), _mm_xor_si128(
731 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+6)),
732 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+6))));
733}
734
735/* -------------------------------------------------------- */
736
737inline void get_hash(LSH512_SSSE3_Context* ctx, lsh_u8* pbHashVal)
738{
739 CRYPTOPP_ASSERT(ctx != NULLPTR);
740 CRYPTOPP_ASSERT(ctx->alg_type != 0);
741 CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
742
743 lsh_uint alg_type = ctx->alg_type;
744 lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
745 lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
746
747 // Multiplying by sizeof(lsh_u8) looks odd...
748 std::memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
749 if (hash_val_bit_len){
750 pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
751 }
752}
753
754/* -------------------------------------------------------- */
755
756lsh_err lsh512_init_ssse3(LSH512_SSSE3_Context* ctx)
757{
758 CRYPTOPP_ASSERT(ctx != NULLPTR);
759 CRYPTOPP_ASSERT(ctx->alg_type != 0);
760
761 lsh_u32 alg_type = ctx->alg_type;
762 const lsh_u64* const_v = NULL;
763 ctx->remain_databitlen = 0;
764
765 switch (alg_type){
766 case LSH_TYPE_512_512:
767 init512(ctx);
768 return LSH_SUCCESS;
769 case LSH_TYPE_512_384:
770 init384(ctx);
771 return LSH_SUCCESS;
772 case LSH_TYPE_512_256:
773 init256(ctx);
774 return LSH_SUCCESS;
775 case LSH_TYPE_512_224:
776 init224(ctx);
777 return LSH_SUCCESS;
778 default:
779 break;
780 }
781
782 lsh_u64* cv_l = ctx->cv_l;
783 lsh_u64* cv_r = ctx->cv_r;
784
785 zero_iv(cv_l, cv_r);
786 cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN;
787 cv_l[1] = LSH_GET_HASHBIT(alg_type);
788
789 for (size_t i = 0; i < NUM_STEPS / 2; i++)
790 {
791 //Mix
792 load_sc(&const_v, i * 16);
793 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
794 word_perm(cv_l, cv_r);
795
796 load_sc(&const_v, i * 16 + 8);
797 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
798 word_perm(cv_l, cv_r);
799 }
800
801 return LSH_SUCCESS;
802}
803
804lsh_err lsh512_update_ssse3(LSH512_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
805{
806 CRYPTOPP_ASSERT(ctx != NULLPTR);
807 CRYPTOPP_ASSERT(data != NULLPTR);
808 CRYPTOPP_ASSERT(databitlen % 8 == 0);
809 CRYPTOPP_ASSERT(ctx->alg_type != 0);
810
811 if (databitlen == 0){
812 return LSH_SUCCESS;
813 }
814
815 // We are byte oriented. tail bits will always be 0.
816 size_t databytelen = databitlen >> 3;
817 // lsh_uint pos2 = databitlen & 0x7;
818 const size_t pos2 = 0;
819
820 size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
821 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
822 const size_t remain_msg_bit = 0;
823
824 if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
825 return LSH_ERR_INVALID_STATE;
826 }
827 if (remain_msg_bit > 0){
828 return LSH_ERR_INVALID_DATABITLEN;
829 }
830
831 if (databytelen + remain_msg_byte < LSH512_MSG_BLK_BYTE_LEN){
832 std::memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
833 ctx->remain_databitlen += (lsh_uint)databitlen;
834 remain_msg_byte += (lsh_uint)databytelen;
835 if (pos2){
836 ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
837 }
838 return LSH_SUCCESS;
839 }
840
841 if (remain_msg_byte > 0){
842 size_t more_byte = LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte;
843 std::memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
844 compress(ctx, ctx->last_block);
845 data += more_byte;
846 databytelen -= more_byte;
847 remain_msg_byte = 0;
848 ctx->remain_databitlen = 0;
849 }
850
851 while (databytelen >= LSH512_MSG_BLK_BYTE_LEN)
852 {
853 // This call to compress caused some trouble.
854 // The data pointer can become unaligned in the
855 // previous block.
856 compress(ctx, data);
857 data += LSH512_MSG_BLK_BYTE_LEN;
858 databytelen -= LSH512_MSG_BLK_BYTE_LEN;
859 }
860
861 if (databytelen > 0){
862 std::memcpy(ctx->last_block, data, databytelen);
863 ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
864 }
865
866 if (pos2){
867 ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
868 ctx->remain_databitlen += pos2;
869 }
870 return LSH_SUCCESS;
871}
872
873lsh_err lsh512_final_ssse3(LSH512_SSSE3_Context* ctx, lsh_u8* hashval)
874{
875 CRYPTOPP_ASSERT(ctx != NULLPTR);
876 CRYPTOPP_ASSERT(hashval != NULLPTR);
877
878 // We are byte oriented. tail bits will always be 0.
879 size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
880 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
881 const size_t remain_msg_bit = 0;
882
883 if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
884 return LSH_ERR_INVALID_STATE;
885 }
886
887 if (remain_msg_bit){
888 ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
889 }
890 else{
891 ctx->last_block[remain_msg_byte] = 0x80;
892 }
893 std::memset(ctx->last_block + remain_msg_byte + 1, 0, LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
894
895 compress(ctx, ctx->last_block);
896
897 fin(ctx);
898 get_hash(ctx, hashval);
899
900 return LSH_SUCCESS;
901}
902
903ANONYMOUS_NAMESPACE_END
904
905NAMESPACE_BEGIN(CryptoPP)
906
907extern
908void LSH512_Base_Restart_SSSE3(word64* state)
909{
910 state[RemainingBits] = 0;
911 LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
912 lsh_err err = lsh512_init_ssse3(&ctx);
913
914 if (err != LSH_SUCCESS)
915 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_init_ssse3 failed");
916}
917
918extern
919void LSH512_Base_Update_SSSE3(word64* state, const byte *input, size_t size)
920{
921 LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
922 lsh_err err = lsh512_update_ssse3(&ctx, input, 8*size);
923
924 if (err != LSH_SUCCESS)
925 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_update_ssse3 failed");
926}
927
928extern
929void LSH512_Base_TruncatedFinal_SSSE3(word64* state, byte *hash, size_t)
930{
931 LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
932 lsh_err err = lsh512_final_ssse3(&ctx, hash);
933
934 if (err != LSH_SUCCESS)
935 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_final_ssse3 failed");
936}
937
938NAMESPACE_END
939
940#endif // CRYPTOPP_SSSE3_AVAILABLE
#define M128_CAST(x)
Clang workaround.
Definition adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition adv_simd.h:614
Base class for all exceptions thrown by the library.
Definition cryptlib.h:164
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition cryptlib.h:182
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition config_int.h:66
unsigned int word32
32-bit unsigned datatype
Definition config_int.h:72
unsigned long long word64
64-bit unsigned datatype
Definition config_int.h:101
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition cryptlib.h:150
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition misc.h:2417
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition misc.h:1808
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition trap.h:68