Skip to content

Commit 8a50bb7

Browse files
committed
Xoodyak perf. opts.
1 parent 1b32536 commit 8a50bb7

File tree

2 files changed

+198
-115
lines changed

2 files changed

+198
-115
lines changed

core/src/main/java/org/bouncycastle/crypto/digests/XoodyakDigest.java

Lines changed: 99 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import org.bouncycastle.crypto.Digest;
77
import org.bouncycastle.crypto.OutputLengthException;
88
import org.bouncycastle.util.Arrays;
9+
import org.bouncycastle.util.Integers;
910
import org.bouncycastle.util.Pack;
1011

1112
/**
@@ -26,9 +27,9 @@ public class XoodyakDigest
2627
private final int Rhash = 16;
2728
private final int PhaseDown = 1;
2829
private final int PhaseUp = 2;
29-
private final int NLANES = 12;
30-
private final int NROWS = 3;
31-
private final int NCOLUMS = 4;
30+
// private final int NLANES = 12;
31+
// private final int NROWS = 3;
32+
// private final int NCOLUMS = 4;
3233
private final int MAXROUNDS = 12;
3334
private final int TAGLEN = 16;
3435
private final int[] RC = {0x00000058, 0x00000038, 0x000003C0, 0x000000D0, 0x00000120, 0x00000014, 0x00000060,
@@ -123,58 +124,109 @@ private void Up(byte[] Yi, int YiOff, int YiLen, int Cu)
123124
{
124125
state[f_bPrime - 1] ^= Cu;
125126
}
126-
int[] a = new int[NLANES];
127-
Pack.littleEndianToInt(state, 0, a, 0, a.length);
128-
int x, y;
129-
int[] b = new int[NLANES];
130-
int[] p = new int[NCOLUMS];
131-
int[] e = new int[NCOLUMS];
127+
128+
int a0 = Pack.littleEndianToInt(state, 0);
129+
int a1 = Pack.littleEndianToInt(state, 4);
130+
int a2 = Pack.littleEndianToInt(state, 8);
131+
int a3 = Pack.littleEndianToInt(state, 12);
132+
int a4 = Pack.littleEndianToInt(state, 16);
133+
int a5 = Pack.littleEndianToInt(state, 20);
134+
int a6 = Pack.littleEndianToInt(state, 24);
135+
int a7 = Pack.littleEndianToInt(state, 28);
136+
int a8 = Pack.littleEndianToInt(state, 32);
137+
int a9 = Pack.littleEndianToInt(state, 36);
138+
int a10 = Pack.littleEndianToInt(state, 40);
139+
int a11 = Pack.littleEndianToInt(state, 44);
140+
132141
for (int i = 0; i < MAXROUNDS; ++i)
133142
{
134143
/* Theta: Column Parity Mixer */
135-
for (x = 0; x < NCOLUMS; ++x)
136-
{
137-
p[x] = a[index(x, 0)] ^ a[index(x, 1)] ^ a[index(x, 2)];
138-
}
139-
for (x = 0; x < NCOLUMS; ++x)
140-
{
141-
y = p[(x + 3) & 3];
142-
e[x] = ROTL32(y, 5) ^ ROTL32(y, 14);
143-
}
144-
for (x = 0; x < NCOLUMS; ++x)
145-
{
146-
for (y = 0; y < NROWS; ++y)
147-
{
148-
a[index(x, y)] ^= e[x];
149-
}
150-
}
144+
int p0 = a0 ^ a4 ^ a8;
145+
int p1 = a1 ^ a5 ^ a9;
146+
int p2 = a2 ^ a6 ^ a10;
147+
int p3 = a3 ^ a7 ^ a11;
148+
149+
int e0 = Integers.rotateLeft(p3, 5) ^ Integers.rotateLeft(p3, 14);
150+
int e1 = Integers.rotateLeft(p0, 5) ^ Integers.rotateLeft(p0, 14);
151+
int e2 = Integers.rotateLeft(p1, 5) ^ Integers.rotateLeft(p1, 14);
152+
int e3 = Integers.rotateLeft(p2, 5) ^ Integers.rotateLeft(p2, 14);
153+
154+
a0 ^= e0;
155+
a4 ^= e0;
156+
a8 ^= e0;
157+
158+
a1 ^= e1;
159+
a5 ^= e1;
160+
a9 ^= e1;
161+
162+
a2 ^= e2;
163+
a6 ^= e2;
164+
a10 ^= e2;
165+
166+
a3 ^= e3;
167+
a7 ^= e3;
168+
a11 ^= e3;
169+
151170
/* Rho-west: plane shift */
152-
for (x = 0; x < NCOLUMS; ++x)
153-
{
154-
b[index(x, 0)] = a[index(x, 0)];
155-
b[index(x, 1)] = a[index(x + 3, 1)];
156-
b[index(x, 2)] = ROTL32(a[index(x, 2)], 11);
157-
}
171+
int b0 = a0;
172+
int b1 = a1;
173+
int b2 = a2;
174+
int b3 = a3;
175+
176+
int b4 = a7;
177+
int b5 = a4;
178+
int b6 = a5;
179+
int b7 = a6;
180+
181+
int b8 = Integers.rotateLeft(a8, 11);
182+
int b9 = Integers.rotateLeft(a9, 11);
183+
int b10 = Integers.rotateLeft(a10, 11);
184+
int b11 = Integers.rotateLeft(a11, 11);
185+
158186
/* Iota: round ant */
159-
b[0] ^= RC[i];
187+
b0 ^= RC[i];
188+
160189
/* Chi: non linear layer */
161-
for (x = 0; x < NCOLUMS; ++x)
162-
{
163-
for (y = 0; y < NROWS; ++y)
164-
{
165-
a[index(x, y)] = b[index(x, y)] ^ (~b[index(x, y + 1)] & b[index(x, y + 2)]);
166-
}
167-
}
190+
a0 = b0 ^ (~b4 & b8);
191+
a1 = b1 ^ (~b5 & b9);
192+
a2 = b2 ^ (~b6 & b10);
193+
a3 = b3 ^ (~b7 & b11);
194+
195+
a4 = b4 ^ (~b8 & b0);
196+
a5 = b5 ^ (~b9 & b1);
197+
a6 = b6 ^ (~b10 & b2);
198+
a7 = b7 ^ (~b11 & b3);
199+
200+
b8 ^= (~b0 & b4);
201+
b9 ^= (~b1 & b5);
202+
b10 ^= (~b2 & b6);
203+
b11 ^= (~b3 & b7);
204+
168205
/* Rho-east: plane shift */
169-
for (x = 0; x < NCOLUMS; ++x)
170-
{
171-
b[index(x, 0)] = a[index(x, 0)];
172-
b[index(x, 1)] = ROTL32(a[index(x, 1)], 1);
173-
b[index(x, 2)] = ROTL32(a[index(x + 2, 2)], 8);
174-
}
175-
System.arraycopy(b, 0, a, 0, NLANES);
206+
a4 = Integers.rotateLeft(a4, 1);
207+
a5 = Integers.rotateLeft(a5, 1);
208+
a6 = Integers.rotateLeft(a6, 1);
209+
a7 = Integers.rotateLeft(a7, 1);
210+
211+
a8 = Integers.rotateLeft(b10, 8);
212+
a9 = Integers.rotateLeft(b11, 8);
213+
a10 = Integers.rotateLeft(b8, 8);
214+
a11 = Integers.rotateLeft(b9, 8);
176215
}
177-
Pack.intToLittleEndian(a, 0, a.length, state, 0);
216+
217+
Pack.intToLittleEndian(a0, state, 0);
218+
Pack.intToLittleEndian(a1, state, 4);
219+
Pack.intToLittleEndian(a2, state, 8);
220+
Pack.intToLittleEndian(a3, state, 12);
221+
Pack.intToLittleEndian(a4, state, 16);
222+
Pack.intToLittleEndian(a5, state, 20);
223+
Pack.intToLittleEndian(a6, state, 24);
224+
Pack.intToLittleEndian(a7, state, 28);
225+
Pack.intToLittleEndian(a8, state, 32);
226+
Pack.intToLittleEndian(a9, state, 36);
227+
Pack.intToLittleEndian(a10, state, 40);
228+
Pack.intToLittleEndian(a11, state, 44);
229+
178230
phase = PhaseUp;
179231
if (Yi != null)
180232
{
@@ -192,15 +244,4 @@ void Down(byte[] Xi, int XiOff, int XiLen, int Cd)
192244
state[f_bPrime - 1] ^= (mode == MODE.ModeHash) ? (Cd & 0x01) : Cd;
193245
phase = PhaseDown;
194246
}
195-
196-
private int index(int x, int y)
197-
{
198-
return (((y % NROWS) * NCOLUMS) + ((x) % NCOLUMS));
199-
}
200-
201-
private int ROTL32(int a, int offset)
202-
{
203-
return (a << (offset & 31)) ^ (a >>> ((32 - (offset)) & 31));
204-
}
205-
206247
}

core/src/main/java/org/bouncycastle/crypto/engines/XoodyakEngine.java

Lines changed: 99 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.bouncycastle.crypto.params.KeyParameter;
1313
import org.bouncycastle.crypto.params.ParametersWithIV;
1414
import org.bouncycastle.util.Arrays;
15+
import org.bouncycastle.util.Integers;
1516
import org.bouncycastle.util.Pack;
1617

1718
/**
@@ -35,9 +36,9 @@ public class XoodyakEngine
3536
private byte[] iv;
3637
private final int PhaseDown = 1;
3738
private final int PhaseUp = 2;
38-
private final int NLANES = 12;
39-
private final int NROWS = 3;
40-
private final int NCOLUMS = 4;
39+
// private final int NLANES = 12;
40+
// private final int NROWS = 3;
41+
// private final int NCOLUMS = 4;
4142
private final int MAXROUNDS = 12;
4243
private final int TAGLEN = 16;
4344
final int Rkin = 44;
@@ -328,58 +329,109 @@ private void Up(byte[] Yi, int YiLen, int Cu)
328329
{
329330
state[f_bPrime - 1] ^= Cu;
330331
}
331-
int[] a = new int[NLANES];
332-
Pack.littleEndianToInt(state, 0, a, 0, a.length);
333-
int x, y;
334-
int[] b = new int[NLANES];
335-
int[] p = new int[NCOLUMS];
336-
int[] e = new int[NCOLUMS];
332+
333+
int a0 = Pack.littleEndianToInt(state, 0);
334+
int a1 = Pack.littleEndianToInt(state, 4);
335+
int a2 = Pack.littleEndianToInt(state, 8);
336+
int a3 = Pack.littleEndianToInt(state, 12);
337+
int a4 = Pack.littleEndianToInt(state, 16);
338+
int a5 = Pack.littleEndianToInt(state, 20);
339+
int a6 = Pack.littleEndianToInt(state, 24);
340+
int a7 = Pack.littleEndianToInt(state, 28);
341+
int a8 = Pack.littleEndianToInt(state, 32);
342+
int a9 = Pack.littleEndianToInt(state, 36);
343+
int a10 = Pack.littleEndianToInt(state, 40);
344+
int a11 = Pack.littleEndianToInt(state, 44);
345+
337346
for (int i = 0; i < MAXROUNDS; ++i)
338347
{
339348
/* Theta: Column Parity Mixer */
340-
for (x = 0; x < NCOLUMS; ++x)
341-
{
342-
p[x] = a[index(x, 0)] ^ a[index(x, 1)] ^ a[index(x, 2)];
343-
}
344-
for (x = 0; x < NCOLUMS; ++x)
345-
{
346-
y = p[(x + 3) & 3];
347-
e[x] = ROTL32(y, 5) ^ ROTL32(y, 14);
348-
}
349-
for (x = 0; x < NCOLUMS; ++x)
350-
{
351-
for (y = 0; y < NROWS; ++y)
352-
{
353-
a[index(x, y)] ^= e[x];
354-
}
355-
}
349+
int p0 = a0 ^ a4 ^ a8;
350+
int p1 = a1 ^ a5 ^ a9;
351+
int p2 = a2 ^ a6 ^ a10;
352+
int p3 = a3 ^ a7 ^ a11;
353+
354+
int e0 = Integers.rotateLeft(p3, 5) ^ Integers.rotateLeft(p3, 14);
355+
int e1 = Integers.rotateLeft(p0, 5) ^ Integers.rotateLeft(p0, 14);
356+
int e2 = Integers.rotateLeft(p1, 5) ^ Integers.rotateLeft(p1, 14);
357+
int e3 = Integers.rotateLeft(p2, 5) ^ Integers.rotateLeft(p2, 14);
358+
359+
a0 ^= e0;
360+
a4 ^= e0;
361+
a8 ^= e0;
362+
363+
a1 ^= e1;
364+
a5 ^= e1;
365+
a9 ^= e1;
366+
367+
a2 ^= e2;
368+
a6 ^= e2;
369+
a10 ^= e2;
370+
371+
a3 ^= e3;
372+
a7 ^= e3;
373+
a11 ^= e3;
374+
356375
/* Rho-west: plane shift */
357-
for (x = 0; x < NCOLUMS; ++x)
358-
{
359-
b[index(x, 0)] = a[index(x, 0)];
360-
b[index(x, 1)] = a[index(x + 3, 1)];
361-
b[index(x, 2)] = ROTL32(a[index(x, 2)], 11);
362-
}
376+
int b0 = a0;
377+
int b1 = a1;
378+
int b2 = a2;
379+
int b3 = a3;
380+
381+
int b4 = a7;
382+
int b5 = a4;
383+
int b6 = a5;
384+
int b7 = a6;
385+
386+
int b8 = Integers.rotateLeft(a8, 11);
387+
int b9 = Integers.rotateLeft(a9, 11);
388+
int b10 = Integers.rotateLeft(a10, 11);
389+
int b11 = Integers.rotateLeft(a11, 11);
390+
363391
/* Iota: round ant */
364-
b[0] ^= RC[i];
392+
b0 ^= RC[i];
393+
365394
/* Chi: non linear layer */
366-
for (x = 0; x < NCOLUMS; ++x)
367-
{
368-
for (y = 0; y < NROWS; ++y)
369-
{
370-
a[index(x, y)] = b[index(x, y)] ^ (~b[index(x, y + 1)] & b[index(x, y + 2)]);
371-
}
372-
}
395+
a0 = b0 ^ (~b4 & b8);
396+
a1 = b1 ^ (~b5 & b9);
397+
a2 = b2 ^ (~b6 & b10);
398+
a3 = b3 ^ (~b7 & b11);
399+
400+
a4 = b4 ^ (~b8 & b0);
401+
a5 = b5 ^ (~b9 & b1);
402+
a6 = b6 ^ (~b10 & b2);
403+
a7 = b7 ^ (~b11 & b3);
404+
405+
b8 ^= (~b0 & b4);
406+
b9 ^= (~b1 & b5);
407+
b10 ^= (~b2 & b6);
408+
b11 ^= (~b3 & b7);
409+
373410
/* Rho-east: plane shift */
374-
for (x = 0; x < NCOLUMS; ++x)
375-
{
376-
b[index(x, 0)] = a[index(x, 0)];
377-
b[index(x, 1)] = ROTL32(a[index(x, 1)], 1);
378-
b[index(x, 2)] = ROTL32(a[index(x + 2, 2)], 8);
379-
}
380-
System.arraycopy(b, 0, a, 0, NLANES);
411+
a4 = Integers.rotateLeft(a4, 1);
412+
a5 = Integers.rotateLeft(a5, 1);
413+
a6 = Integers.rotateLeft(a6, 1);
414+
a7 = Integers.rotateLeft(a7, 1);
415+
416+
a8 = Integers.rotateLeft(b10, 8);
417+
a9 = Integers.rotateLeft(b11, 8);
418+
a10 = Integers.rotateLeft(b8, 8);
419+
a11 = Integers.rotateLeft(b9, 8);
381420
}
382-
Pack.intToLittleEndian(a, 0, a.length, state, 0);
421+
422+
Pack.intToLittleEndian(a0, state, 0);
423+
Pack.intToLittleEndian(a1, state, 4);
424+
Pack.intToLittleEndian(a2, state, 8);
425+
Pack.intToLittleEndian(a3, state, 12);
426+
Pack.intToLittleEndian(a4, state, 16);
427+
Pack.intToLittleEndian(a5, state, 20);
428+
Pack.intToLittleEndian(a6, state, 24);
429+
Pack.intToLittleEndian(a7, state, 28);
430+
Pack.intToLittleEndian(a8, state, 32);
431+
Pack.intToLittleEndian(a9, state, 36);
432+
Pack.intToLittleEndian(a10, state, 40);
433+
Pack.intToLittleEndian(a11, state, 44);
434+
383435
phase = PhaseUp;
384436
if (Yi != null)
385437
{
@@ -398,16 +450,6 @@ void Down(byte[] Xi, int XiOff, int XiLen, int Cd)
398450
phase = PhaseDown;
399451
}
400452

401-
private int index(int x, int y)
402-
{
403-
return (((y % NROWS) * NCOLUMS) + ((x) % NCOLUMS));
404-
}
405-
406-
private int ROTL32(int a, int offset)
407-
{
408-
return (a << (offset & 31)) ^ (a >>> ((32 - (offset)) & 31));
409-
}
410-
411453
public int getBlockSize()
412454
{
413455
return Rkout;

0 commit comments

Comments
 (0)