Skip to content

Commit dfa28d3

Browse files
author
gefeili
committed
fgemss256 and dualmode256 works now
1 parent d9c947b commit dfa28d3

File tree

1 file changed

+82
-203
lines changed
  • core/src/main/java/org/bouncycastle/pqc/crypto/gemss

1 file changed

+82
-203
lines changed

core/src/main/java/org/bouncycastle/pqc/crypto/gemss/Pointer.java

Lines changed: 82 additions & 203 deletions
Original file line numberDiff line numberDiff line change
@@ -108,17 +108,17 @@ public void setXorRange(int outOff, PointerUnion p, int inOff, int len)
108108
}
109109

110110
//Assume the input is a Pointer not a PointerUnion
111-
public void setXorRangeShift(int outOff, Pointer p, int inOff, int len, int right)
112-
{
113-
114-
outOff += cp;
115-
inOff += p.cp;
116-
int left = 64 - right;
117-
for (int i = 0; i < len; ++i, ++inOff)
118-
{
119-
array[outOff++] ^= (p.array[inOff] >>> right) ^ (p.array[inOff + 1] << left);
120-
}
121-
}
111+
// public void setXorRangeShift(int outOff, Pointer p, int inOff, int len, int right)
112+
// {
113+
//
114+
// outOff += cp;
115+
// inOff += p.cp;
116+
// int left = 64 - right;
117+
// for (int i = 0; i < len; ++i, ++inOff)
118+
// {
119+
// array[outOff++] ^= (p.array[inOff] >>> right) ^ (p.array[inOff + 1] << left);
120+
// }
121+
// }
122122

123123
public void setRangeAndMask(int outOff, Pointer p, int inOff, int len, long mask)
124124
{
@@ -151,36 +151,36 @@ public void setRangeRotate(int outOff, Pointer p, int inOff, int len, int right)
151151
}
152152
}
153153

154-
public void setXorRangeAndMaskRotate(int outOff, Pointer p, int inOff, int len, long mask, int j)
155-
{
156-
int jc = 64 - j;
157-
outOff += cp;
158-
inOff += p.cp;
159-
long A_mask1 = p.array[inOff++] & mask, A_mask2;
160-
array[outOff++] ^= A_mask1 << j;
161-
for (int i = 1; i < len; ++i)
162-
{
163-
A_mask2 = p.array[inOff++] & mask;
164-
array[outOff++] ^= (A_mask1 >>> jc) | (A_mask2 << j);
165-
A_mask1 = A_mask2;
166-
}
167-
}
168-
169-
public void setXorRangeAndMaskRotateOverflow(int outOff, Pointer p, int inOff, int len, long mask, int j)
170-
{
171-
int jc = 64 - j;
172-
inOff += p.cp;
173-
long A_mask1 = p.array[inOff++] & mask, A_mask2;
174-
outOff += cp;
175-
array[outOff++] ^= A_mask1 << j;
176-
for (int i = 1; i < len; ++i)
177-
{
178-
A_mask2 = p.array[inOff++] & mask;
179-
array[outOff++] ^= (A_mask1 >>> jc) | (A_mask2 << j);
180-
A_mask1 = A_mask2;
181-
}
182-
array[outOff] ^= A_mask1 >>> jc;
183-
}
154+
// public void setXorRangeAndMaskRotate(int outOff, Pointer p, int inOff, int len, long mask, int j)
155+
// {
156+
// int jc = 64 - j;
157+
// outOff += cp;
158+
// inOff += p.cp;
159+
// long A_mask1 = p.array[inOff++] & mask, A_mask2;
160+
// array[outOff++] ^= A_mask1 << j;
161+
// for (int i = 1; i < len; ++i)
162+
// {
163+
// A_mask2 = p.array[inOff++] & mask;
164+
// array[outOff++] ^= (A_mask1 >>> jc) | (A_mask2 << j);
165+
// A_mask1 = A_mask2;
166+
// }
167+
// }
168+
//
169+
// public void setXorRangeAndMaskRotateOverflow(int outOff, Pointer p, int inOff, int len, long mask, int j)
170+
// {
171+
// int jc = 64 - j;
172+
// inOff += p.cp;
173+
// long A_mask1 = p.array[inOff++] & mask, A_mask2;
174+
// outOff += cp;
175+
// array[outOff++] ^= A_mask1 << j;
176+
// for (int i = 1; i < len; ++i)
177+
// {
178+
// A_mask2 = p.array[inOff++] & mask;
179+
// array[outOff++] ^= (A_mask1 >>> jc) | (A_mask2 << j);
180+
// A_mask1 = A_mask2;
181+
// }
182+
// array[outOff] ^= A_mask1 >>> jc;
183+
// }
184184

185185
public void move(int p)
186186
{
@@ -192,11 +192,6 @@ public void moveIncremental()
192192
cp++;
193193
}
194194

195-
// public void moveDecremental()
196-
// {
197-
// cp--;
198-
// }
199-
200195
public long[] getArray()
201196
{
202197
return array;
@@ -372,45 +367,44 @@ public void setOneShiftWithMove(int j, int loop, int move)
372367
* @param[out] C C=A*A in GF(2)[x] (the result is not reduced).
373368
* @remark Constant-time implementation.
374369
*/
375-
public void sqr_nocst_gf2x(Pointer A, int NB_WORD_GFqn, int NB_WORD_MUL)
376-
{
377-
long Ci;
378-
int i = NB_WORD_GFqn - 1;
379-
int pos = cp + NB_WORD_MUL - 1;
380-
//int Aoff = A.cp + i;
381-
if ((NB_WORD_MUL & 1) != 0)
382-
{
383-
/* Lower 32 bits of A[i] */
384-
Ci = A.get(i);//A.array[Aoff];//
385-
Ci = (Ci ^ (Ci << 16)) & 0x0000FFFF0000FFFFL;
386-
Ci = square_gf2(Ci);
387-
array[pos--] = Ci;
388-
i = NB_WORD_GFqn - 2;
389-
}
390-
for (; i != -1; --i)
391-
{
392-
/* Higher 32 bits of A[i] */
393-
Ci = A.get(i) >>> 32;//A.array[Aoff] >>> 32;
394-
Ci = (Ci ^ (Ci << 16)) & (0x0000FFFF0000FFFFL);
395-
Ci = square_gf2(Ci);
396-
array[pos--] = Ci;
397-
/* Lower 32 bits of A[i] */
398-
Ci = A.get(i);//A.array[Aoff--];
399-
Ci = ((Ci & 0xFFFFFFFFL) ^ (Ci << 16)) & (0x0000FFFF0000FFFFL);
400-
Ci = square_gf2(Ci);
401-
array[pos--] = Ci;
402-
}
403-
}
404-
405-
private long square_gf2(long Ci)
406-
{
407-
Ci = (Ci ^ (Ci << 8)) & (0x00FF00FF00FF00FFL);
408-
Ci = (Ci ^ (Ci << 4)) & (0x0F0F0F0F0F0F0F0FL);
409-
Ci = (Ci ^ (Ci << 2)) & (0x3333333333333333L);
410-
Ci = (Ci ^ (Ci << 1)) & (0x5555555555555555L);
411-
return Ci;
412-
}
413-
370+
// public void sqr_nocst_gf2x(Pointer A, int NB_WORD_GFqn, int NB_WORD_MUL)
371+
// {
372+
// long Ci;
373+
// int i = NB_WORD_GFqn - 1;
374+
// int pos = cp + NB_WORD_MUL - 1;
375+
// //int Aoff = A.cp + i;
376+
// if ((NB_WORD_MUL & 1) != 0)
377+
// {
378+
// /* Lower 32 bits of A[i] */
379+
// Ci = A.get(i);//A.array[Aoff];//
380+
// Ci = (Ci ^ (Ci << 16)) & 0x0000FFFF0000FFFFL;
381+
// Ci = square_gf2(Ci);
382+
// array[pos--] = Ci;
383+
// i = NB_WORD_GFqn - 2;
384+
// }
385+
// for (; i != -1; --i)
386+
// {
387+
// /* Higher 32 bits of A[i] */
388+
// Ci = A.get(i) >>> 32;//A.array[Aoff] >>> 32;
389+
// Ci = (Ci ^ (Ci << 16)) & (0x0000FFFF0000FFFFL);
390+
// Ci = square_gf2(Ci);
391+
// array[pos--] = Ci;
392+
// /* Lower 32 bits of A[i] */
393+
// Ci = A.get(i);//A.array[Aoff--];
394+
// Ci = ((Ci & 0xFFFFFFFFL) ^ (Ci << 16)) & (0x0000FFFF0000FFFFL);
395+
// Ci = square_gf2(Ci);
396+
// array[pos--] = Ci;
397+
// }
398+
// }
399+
//
400+
// private long square_gf2(long Ci)
401+
// {
402+
// Ci = (Ci ^ (Ci << 8)) & (0x00FF00FF00FF00FFL);
403+
// Ci = (Ci ^ (Ci << 4)) & (0x0F0F0F0F0F0F0F0FL);
404+
// Ci = (Ci ^ (Ci << 2)) & (0x3333333333333333L);
405+
// Ci = (Ci ^ (Ci << 1)) & (0x5555555555555555L);
406+
// return Ci;
407+
// }
414408
public long getDotProduct(int off, Pointer b, int bOff, int len)
415409
{
416410
off += cp;
@@ -464,123 +458,6 @@ public void mul_gf2x(Pointer A, Pointer B)
464458
}
465459
}
466460

467-
public void mul_gf2x(Pointer A, Pointer B, int HFEnq, int NB_WORD_GFqn, int HFEnr)
468-
{
469-
switch (array.length)
470-
{
471-
case 6:
472-
mul192_no_simd_gf2x(array, 0, A.array, A.cp, B.array, B.cp, new long[2], 0);
473-
return;
474-
case 9:
475-
mul288_no_simd_gf2x(array, 0, A.array, A.cp, B.array, B.cp, new long[3], new long[3], new long[7]);
476-
return;
477-
case 12:
478-
mul384_no_simd_gf2x(array, A.array, A.cp, B.array, B.cp, new long[3], new long[3], new long[8]);
479-
return;
480-
case 13:
481-
mul416_no_simd_gf2x(array, A.array, A.cp, B.array, B.cp, new long[4], new long[4], new long[13],
482-
new long[2], new long[2]);
483-
return;
484-
case 17:
485-
mul544_no_simd_gf2x(array, A.array, A.cp, B.array, B.cp, new long[5], new long[5], new long[9],
486-
new long[3], new long[3], new long[7]);
487-
return;
488-
}
489-
int i, j, k, b_cp = B.cp, a_cp, c_cp, jc;
490-
long b, mask, mask1, mask2;
491-
for (i = 0; i < HFEnq; ++i)
492-
{
493-
b = B.array[b_cp];
494-
mask = -(b & 1L);
495-
a_cp = A.cp;
496-
c_cp = cp;
497-
/* j=0 */
498-
for (j = 0; j < NB_WORD_GFqn; ++j)
499-
{
500-
array[c_cp++] ^= A.array[a_cp++] & mask;
501-
}
502-
503-
/* The last 64-bit block BL of A contains HFEnr bits.
504-
So, there is no overflow for BL<<j while j<=(64-HFEnr). */
505-
for (j = 1, jc = 63; j <= 64 - HFEnr; ++j, --jc)
506-
{
507-
a_cp = A.cp;
508-
c_cp = cp;
509-
mask = -((b >>> j) & 1L);
510-
mask1 = A.array[a_cp++] & mask;
511-
array[c_cp++] ^= mask1 << j;
512-
for (k = 1; k < NB_WORD_GFqn; ++k)
513-
{
514-
mask2 = A.array[a_cp++] & mask;
515-
array[c_cp++] ^= (mask1 >>> jc) | (mask2 << j);
516-
mask1 = mask2;
517-
}
518-
}
519-
for (; j < 64; ++j, --jc)
520-
{
521-
a_cp = A.cp;
522-
c_cp = cp;
523-
mask = -((b >>> j) & 1L);
524-
mask1 = A.array[a_cp++] & mask;
525-
array[c_cp++] ^= mask1 << j;
526-
for (k = 1; k < NB_WORD_GFqn; ++k)
527-
{
528-
mask2 = A.array[a_cp++] & mask;
529-
array[c_cp++] ^= (mask1 >>> jc) | (mask2 << j);
530-
mask1 = mask2;
531-
}
532-
array[c_cp] ^= mask1 >>> jc;
533-
}
534-
b_cp++;
535-
cp++;
536-
}
537-
b = B.array[b_cp];
538-
/* j=0 */
539-
mask = -(b & 1L);
540-
a_cp = A.cp;
541-
c_cp = cp;
542-
/* j=0 */
543-
for (j = 0; j < NB_WORD_GFqn; ++j)
544-
{
545-
array[c_cp++] ^= A.array[a_cp++] & mask;
546-
}
547-
/* The last 64-bit block BL of A contains HFEnr bits. So, there is no overflow for BL<<j while j<=(64-HFEnr). */
548-
int loop_end = HFEnr > 32 ? 65 - HFEnr : HFEnr;
549-
for (j = 1, jc = 63; j < loop_end; ++j, --jc)
550-
{
551-
a_cp = A.cp;
552-
c_cp = cp;
553-
mask = -((b >>> j) & 1L);
554-
mask1 = A.array[a_cp++] & mask;
555-
array[c_cp++] ^= mask1 << j;
556-
for (k = 1; k < NB_WORD_GFqn; ++k)
557-
{
558-
mask2 = A.array[a_cp++] & mask;
559-
array[c_cp++] ^= (mask1 >>> jc) | (mask2 << j);
560-
mask1 = mask2;
561-
}
562-
}
563-
if (HFEnr > 32)
564-
{
565-
for (; j < HFEnr; ++j, --jc)
566-
{
567-
a_cp = A.cp;
568-
c_cp = cp;
569-
mask = -((b >>> j) & 1L);
570-
mask1 = A.array[a_cp++] & mask;
571-
array[c_cp++] ^= mask1 << j;
572-
for (k = 1; k < NB_WORD_GFqn; ++k)
573-
{
574-
mask2 = A.array[a_cp++] & mask;
575-
array[c_cp++] ^= (mask1 >>> jc) | (mask2 << j);
576-
mask1 = mask2;
577-
}
578-
array[c_cp] ^= mask1 >>> jc;
579-
}
580-
}
581-
cp = 0;
582-
}
583-
584461
private void MUL64_NO_SIMD_GF2X(long[] C, int c_cp, long A, long B)
585462
{
586463
long c0, c1, tmp;
@@ -791,6 +668,7 @@ private void mul128_no_simd_gf2x(long[] C, int c_cp, long[] A, int a_cp, long[]
791668
C[1] = C0^C1^C2
792669
C[2] = C1^C2^C3
793670
C[3] = C3 */
671+
//TODO: move following code above
794672
AA = A[a_cp] ^ A[a_cp + 1];
795673
BB = B[b_cp] ^ B[b_cp + 1];
796674
MUL64_NO_SIMD_GF2X(RESERVED_BUF, buf_cp, AA, BB);
@@ -897,6 +775,7 @@ public void mul192_no_simd_gf2x(long[] C, int c_cp, long[] A, int a_cp, long[] B
897775
C[c_cp + 4] ^= RESERVED_BUF2[buf_cp + 1];//c4=x4^x3
898776
C[c_cp + 2] = C[c_cp + 4];//c2=c4=x3^X4
899777
C[c_cp + 4] ^= C[c_cp + 5];//c4=x3^x4^x5
778+
//TODO: move here
900779
C[c_cp + 3] = C[c_cp + 1] ^ C[c_cp + 4];//c3=c1^c4=x1^x2^x3^x4^x5
901780
C[c_cp + 1] ^= C[c_cp];//c1=x0^x1^x2
902781
AA = A[a_cp] ^ A[a_cp + 1];
@@ -961,7 +840,7 @@ private void mul288_no_simd_gf2x(long[] C, int c_cp, long[] A, int a_cp, long[]
961840
// long[] BB = new long[3];
962841
// long[] RESERVED_BUF6 = new long[5];
963842
mul128_no_simd_gf2x(C, c_cp, A, a_cp, B, b_cp, RESERVED_BUF, 0);
964-
mul160_no_simd_gf2x(C, 4, A, a_cp + 2, B, b_cp + 2, RESERVED_BUF, 0);
843+
mul160_no_simd_gf2x(C, c_cp + 4, A, a_cp + 2, B, b_cp + 2, RESERVED_BUF, 0);
965844
C[c_cp + 4] ^= C[c_cp + 2];
966845
C[c_cp + 5] ^= C[c_cp + 3];
967846
C[c_cp + 2] = C[c_cp + 4] ^ C[c_cp];

0 commit comments

Comments
 (0)