Skip to content

Commit 0801c15

Browse files
committed
Performance optimisation in DSTU algorithms
1 parent 6cc4a16 commit 0801c15

File tree

2 files changed

+87
-102
lines changed

2 files changed

+87
-102
lines changed

crypto/src/crypto/digests/DSTU7564Digest.cs

Lines changed: 50 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -155,32 +155,33 @@ protected virtual byte[] Pad(byte[] input, int inOff, int length)
155155

156156
protected virtual void ProcessBlock(byte[] input, int inOff)
157157
{
158-
byte[][] temp1 = new byte[STATE_BYTE_SIZE_1024][];
159-
byte[][] temp2 = new byte[STATE_BYTE_SIZE_1024][];
158+
byte[][] temp1 = new byte[columns][];
159+
byte[][] temp2 = new byte[columns][];
160160

161-
for (int i = 0; i < state_.Length; i++)
161+
int pos = inOff;
162+
for (int i = 0; i < columns; i++)
162163
{
163-
temp1[i] = new byte[ROWS];
164-
temp2[i] = new byte[ROWS];
165-
}
164+
byte[] S = state_[i];
165+
byte[] T1 = temp1[i] = new byte[ROWS];
166+
byte[] T2 = temp2[i] = new byte[ROWS];
166167

167-
for (int i = 0; i < ROWS; ++i)
168-
{
169-
for (int j = 0; j < columns; ++j)
168+
for (int j = 0; j < ROWS; ++j)
170169
{
171-
temp1[j][i] = (byte)(state_[j][i] ^ input[j * ROWS + i + inOff]);
172-
temp2[j][i] = input[j * ROWS + i + inOff];
170+
byte inVal = input[pos++];
171+
T1[j] = (byte)(S[j] ^ inVal);
172+
T2[j] = inVal;
173173
}
174174
}
175175

176176
P(temp1);
177177
Q(temp2);
178178

179-
for (int i = 0; i < ROWS; ++i)
179+
for (int i = 0; i < columns; ++i)
180180
{
181-
for (int j = 0; j < columns; ++j)
181+
byte[] S = state_[i], T1 = temp1[i], T2 = temp2[i];
182+
for (int j = 0; j < ROWS; ++j)
182183
{
183-
state_[j][i] ^= (byte)(temp1[j][i] ^ temp2[j][i]);
184+
S[j] ^= (byte)(T1[j] ^ T2[j]);
184185
}
185186
}
186187
}
@@ -313,50 +314,55 @@ private void ShiftBytes(byte[][] state)
313314
}
314315
}
315316

316-
private static byte MultiplyGF(byte x, byte y)
317+
/* Pair-wise GF multiplication of 4 byte-pairs (at bits 0, 16, 32, 48 within x, y) */
318+
private static ulong MultiplyGFx4(ulong u, ulong v)
317319
{
318-
// REDUCTION_POLYNOMIAL = 0x011d; /* x^8 + x^4 + x^3 + x^2 + 1 */
319-
320-
uint u = x, v = y;
321-
uint r = u & (0U - (v & 1));
320+
ulong r = u & ((v & 0x0001000100010001UL) * 0xFFFFUL);
322321

323-
for (int i = 1; i < BITS_IN_BYTE; i++)
322+
for (int i = 1; i < 8; ++i)
324323
{
325324
u <<= 1;
326325
v >>= 1;
327-
r ^= u & (0U - (v & 1));
326+
r ^= u & ((v & 0x0001000100010001L) * 0xFFFFL);
328327
}
329328

330-
uint hi = r & 0xFF00U;
329+
// REDUCTION_POLYNOMIAL = 0x011d; /* x^8 + x^4 + x^3 + x^2 + 1 */
330+
331+
ulong hi = r & 0xFF00FF00FF00FF00UL;
331332
r ^= hi ^ (hi >> 4) ^ (hi >> 5) ^ (hi >> 6) ^ (hi >> 8);
332-
hi = r & 0x0F00U;
333+
hi = r & 0x0F000F000F000F00UL;
333334
r ^= hi ^ (hi >> 4) ^ (hi >> 5) ^ (hi >> 6) ^ (hi >> 8);
334-
335-
return (byte)r;
335+
return r;
336336
}
337337

338338
private void MixColumns(byte[][] state)
339339
{
340-
int i, row, col, b;
341-
byte product;
342-
byte[] result = new byte[ROWS];
343-
344-
for (col = 0; col < columns; ++col)
340+
for (int col = 0; col < columns; ++col)
345341
{
346-
Array.Clear(result, 0, ROWS);
347-
for (row = ROWS - 1; row >= 0; --row)
348-
{
349-
product = 0;
350-
for (b = ROWS - 1; b >= 0; --b)
351-
{
352-
product ^= MultiplyGF(state[col][b], mds_matrix[row][b]);
353-
}
354-
result[row] = product;
355-
}
356-
for (i = 0; i < ROWS; ++i)
342+
ulong colVal = Pack.LE_To_UInt64(state[col]);
343+
ulong colEven = colVal & 0x00FF00FF00FF00FFUL;
344+
ulong colOdd = (colVal >> 8) & 0x00FF00FF00FF00FFUL;
345+
346+
//ulong rowMatrix = (mdsMatrix >> 8) | (mdsMatrix << 56);
347+
ulong rowMatrix = mdsMatrix;
348+
349+
ulong result = 0;
350+
for (int row = 7; row >= 0; --row)
357351
{
358-
state[col][i] = result[i];
352+
ulong product = MultiplyGFx4(colEven, rowMatrix & 0x00FF00FF00FF00FFUL);
353+
354+
rowMatrix = (rowMatrix >> 8) | (rowMatrix << 56);
355+
356+
product ^= MultiplyGFx4(colOdd, rowMatrix & 0x00FF00FF00FF00FFUL);
357+
358+
product ^= (product >> 32);
359+
product ^= (product >> 16);
360+
361+
result <<= 8;
362+
result |= (product & 0xFFUL);
359363
}
364+
365+
Pack.UInt64_To_LE(result, state[col]);
360366
}
361367
}
362368

@@ -420,17 +426,8 @@ public virtual void Reset(IMemoable other)
420426
CopyIn(d);
421427
}
422428

423-
private static readonly byte[][] mds_matrix = new byte[][]
424-
{
425-
new byte[] { 0x01, 0x01, 0x05, 0x01, 0x08, 0x06, 0x07, 0x04 },
426-
new byte[] { 0x04, 0x01, 0x01, 0x05, 0x01, 0x08, 0x06, 0x07 },
427-
new byte[] { 0x07, 0x04, 0x01, 0x01, 0x05, 0x01, 0x08, 0x06 },
428-
new byte[] { 0x06, 0x07, 0x04, 0x01, 0x01, 0x05, 0x01, 0x08 },
429-
new byte[] { 0x08, 0x06, 0x07, 0x04, 0x01, 0x01, 0x05, 0x01 },
430-
new byte[] { 0x01, 0x08, 0x06, 0x07, 0x04, 0x01, 0x01, 0x05 },
431-
new byte[] { 0x05, 0x01, 0x08, 0x06, 0x07, 0x04, 0x01, 0x01 },
432-
new byte[] { 0x01, 0x05, 0x01, 0x08, 0x06, 0x07, 0x04, 0x01 }
433-
};
429+
//private const ulong mdsMatrix = 0x0407060801050101UL;
430+
private const ulong mdsMatrix = 0x0104070608010501UL;
434431

435432
private static readonly byte[][] sBoxes = new byte[][]
436433
{

crypto/src/crypto/engines/Dstu7624Engine.cs

Lines changed: 37 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -470,49 +470,56 @@ private void InvMixColumns()
470470
MatrixMultiply(mdsInvMatrix);
471471
}
472472

473-
private void MatrixMultiply(byte[][] matrix)
473+
private void MatrixMultiply(ulong matrix)
474474
{
475-
int col, row, b;
476-
byte product;
477-
ulong result;
478-
byte[] stateBytes = Pack.UInt64_To_LE(internalState);
479-
480-
for (col = 0; col < wordsInBlock; ++col)
475+
for (int col = 0; col < wordsInBlock; ++col)
481476
{
482-
result = 0;
483-
for (row = 8 - 1; row >= 0; --row)
477+
ulong colVal = internalState[col];
478+
ulong colEven = colVal & 0x00FF00FF00FF00FFUL;
479+
ulong colOdd = (colVal >> 8) & 0x00FF00FF00FF00FFUL;
480+
481+
//ulong rowMatrix = (matrix >> 8) | (matrix << 56);
482+
ulong rowMatrix = matrix;
483+
484+
ulong result = 0;
485+
for (int row = 7; row >= 0; --row)
484486
{
485-
product = 0;
486-
for (b = 8 - 1; b >= 0; --b)
487-
{
488-
product ^= MultiplyGF(stateBytes[b + col * 8], matrix[row][b]);
489-
}
490-
result |= (ulong)product << (row * 8);
487+
ulong product = MultiplyGFx4(colEven, rowMatrix & 0x00FF00FF00FF00FFUL);
488+
489+
rowMatrix = (rowMatrix >> 8) | (rowMatrix << 56);
490+
491+
product ^= MultiplyGFx4(colOdd, rowMatrix & 0x00FF00FF00FF00FFUL);
492+
493+
product ^= (product >> 32);
494+
product ^= (product >> 16);
495+
496+
result <<= 8;
497+
result |= (product & 0xFFUL);
491498
}
499+
492500
internalState[col] = result;
493501
}
494502
}
495503

496-
private static byte MultiplyGF(byte x, byte y)
504+
/* Pair-wise GF multiplication of 4 byte-pairs (at bits 0, 16, 32, 48 within x, y) */
505+
private static ulong MultiplyGFx4(ulong u, ulong v)
497506
{
498-
// REDUCTION_POLYNOMIAL = 0x011d; /* x^8 + x^4 + x^3 + x^2 + 1 */
499-
500-
uint u = x, v = y;
501-
uint r = u & (0U - (v & 1));
507+
ulong r = u & ((v & 0x0001000100010001UL) * 0xFFFFUL);
502508

503-
for (int i = 1; i < BITS_IN_BYTE; i++)
509+
for (int i = 1; i < 8; ++i)
504510
{
505511
u <<= 1;
506512
v >>= 1;
507-
r ^= u & (0U - (v & 1));
513+
r ^= u & ((v & 0x0001000100010001L) * 0xFFFFL);
508514
}
509515

510-
uint hi = r & 0xFF00U;
516+
// REDUCTION_POLYNOMIAL = 0x011d; /* x^8 + x^4 + x^3 + x^2 + 1 */
517+
518+
ulong hi = r & 0xFF00FF00FF00FF00UL;
511519
r ^= hi ^ (hi >> 4) ^ (hi >> 5) ^ (hi >> 6) ^ (hi >> 8);
512-
hi = r & 0x0F00U;
520+
hi = r & 0x0F000F000F000F00UL;
513521
r ^= hi ^ (hi >> 4) ^ (hi >> 5) ^ (hi >> 6) ^ (hi >> 8);
514-
515-
return (byte)r;
522+
return r;
516523
}
517524

518525
private void SubBytes()
@@ -547,29 +554,10 @@ private void InvSubBytes()
547554

548555
#region TABLES AND S-BOXES
549556

550-
private byte[][] mdsMatrix =
551-
{
552-
new byte[] { 0x01, 0x01, 0x05, 0x01, 0x08, 0x06, 0x07, 0x04 },
553-
new byte[] { 0x04, 0x01, 0x01, 0x05, 0x01, 0x08, 0x06, 0x07 },
554-
new byte[] { 0x07, 0x04, 0x01, 0x01, 0x05, 0x01, 0x08, 0x06 },
555-
new byte[] { 0x06, 0x07, 0x04, 0x01, 0x01, 0x05, 0x01, 0x08 },
556-
new byte[] { 0x08, 0x06, 0x07, 0x04, 0x01, 0x01, 0x05, 0x01 },
557-
new byte[] { 0x01, 0x08, 0x06, 0x07, 0x04, 0x01, 0x01, 0x05 },
558-
new byte[] { 0x05, 0x01, 0x08, 0x06, 0x07, 0x04, 0x01, 0x01 },
559-
new byte[] { 0x01, 0x05, 0x01, 0x08, 0x06, 0x07, 0x04, 0x01 },
560-
};
561-
562-
private byte[][] mdsInvMatrix =
563-
{
564-
new byte[] { 0xAD, 0x95, 0x76, 0xA8, 0x2F, 0x49, 0xD7, 0xCA },
565-
new byte[] { 0xCA, 0xAD, 0x95, 0x76, 0xA8, 0x2F, 0x49, 0xD7 },
566-
new byte[] { 0xD7, 0xCA, 0xAD, 0x95, 0x76, 0xA8, 0x2F, 0x49 },
567-
new byte[] { 0x49, 0xD7, 0xCA, 0xAD, 0x95, 0x76, 0xA8, 0x2F },
568-
new byte[] { 0x2F, 0x49, 0xD7, 0xCA, 0xAD, 0x95, 0x76, 0xA8 },
569-
new byte[] { 0xA8, 0x2F, 0x49, 0xD7, 0xCA, 0xAD, 0x95, 0x76 },
570-
new byte[] { 0x76, 0xA8, 0x2F, 0x49, 0xD7, 0xCA, 0xAD, 0x95 },
571-
new byte[] { 0x95, 0x76, 0xA8, 0x2F, 0x49, 0xD7, 0xCA, 0xAD },
572-
};
557+
//private const ulong mdsMatrix = 0x0407060801050101UL;
558+
//private const ulong mdsInvMatrix = 0xCAD7492FA87695ADUL;
559+
private const ulong mdsMatrix = 0x0104070608010501UL;
560+
private const ulong mdsInvMatrix = 0xADCAD7492FA87695UL;
573561

574562
private byte[][] sboxesForEncryption =
575563
{

0 commit comments

Comments
 (0)