Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions raid/pq_check_sse.asm
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func(pq_check_sse)
je return_pass
test len, (16-1) ;Check alignment of length
jnz return_fail
mov pos, 0
xor DWORD(pos), DWORD(pos)
movdqa xpoly, [poly]
cmp len, 48
jl loop16
Expand All @@ -148,13 +148,12 @@ loop48:
pxor xq3, xq3 ;q3 = 0

mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
lea tmp, [vec-1] ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
XLDR xs3, [ptr+pos+32] ;Preload last vector (source)

next_vect:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
pxor xp1, xs1 ; p1 ^= s1
pxor xp2, xs2 ; p2 ^= s2
Expand All @@ -180,7 +179,8 @@ next_vect:
pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
pxor xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
jg next_vect ; Loop for each vect except 0
sub tmp, 1 ;Inner loop for each source vector
jae next_vect ; Loop for each vect except 0

pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
pxor xq1, xs1 ;q1 ^= 1 * s1[0]
Expand Down Expand Up @@ -222,11 +222,10 @@ loop16:
XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
pxor xq1, xq1 ;q = 0
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
lea tmp, [vec-1] ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)

next_vect16:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
pxor xq1, xs1 ; q ^= s
pxor xtmp1, xtmp1 ; xtmp = 0
Expand All @@ -236,7 +235,8 @@ next_vect16:
paddb xq1, xq1 ; q = q<<1
pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
XLDR xs1, [ptr+pos] ; Get next vector (source data)
jg next_vect16 ; Loop for each vect except 0
sub tmp, 1 ;Inner loop for each source vector
jae next_vect16 ; Loop for each vect except 0

pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
pxor xq1, xs1 ;q ^= 1 * s[0]
Expand All @@ -254,7 +254,7 @@ next_vect16:


return_pass:
mov return, 0
xor DWORD(return), DWORD(return)
FUNC_RESTORE
ret

Expand Down
16 changes: 8 additions & 8 deletions raid/pq_gen_avx.asm
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func(pq_gen_avx)
je return_pass
test len, (16-1) ;Check alignment of length
jnz return_fail
mov pos, 0
xor DWORD(pos), DWORD(pos)
vmovdqa xpoly, [poly]
vpxor xzero, xzero, xzero
cmp len, 48
Expand All @@ -145,7 +145,7 @@ len_aligned_32bytes:

loop48:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
lea tmp, [vec-1] ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
XLDR xs3, [ptr+pos+32] ;Preload last vector (source)
Expand All @@ -157,7 +157,6 @@ loop48:
vpxor xq3, xq3, xq3 ;q3 = 0

next_vect:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
vpxor xq1, xq1, xs1 ; q1 ^= s1
vpxor xq2, xq2, xs2 ; q2 ^= s2
Expand All @@ -177,7 +176,8 @@ next_vect:
vpxor xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
vpxor xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
vpxor xq3, xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
jg next_vect ; Loop for each vect except 0
sub tmp, 1
jae next_vect ; Loop for each vect except 0

mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
Expand Down Expand Up @@ -205,21 +205,21 @@ next_vect:

loop16:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
lea tmp, [vec-1] ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
vpxor xp1, xp1, xp1 ;p = 0
vpxor xq1, xq1, xq1 ;q = 0

next_vect16:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
vpxor xq1, xq1, xs1 ; q1 ^= s1
vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
vpxor xp1, xp1, xs1 ; p ^= s
vpaddb xq1, xq1, xq1 ; q = q<<1
vpxor xq1, xq1, xtmp1 ; q = q<<1 ^ poly_masked
XLDR xs1, [ptr+pos] ; Get next vector (source data)
jg next_vect16 ; Loop for each vect except 0
sub tmp, 1
jae next_vect16 ; Loop for each vect except 0

mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
Expand All @@ -233,7 +233,7 @@ next_vect16:


return_pass:
mov return, 0
xor DWORD(return), DWORD(return)
FUNC_RESTORE
ret

Expand Down
16 changes: 8 additions & 8 deletions raid/pq_gen_avx2.asm
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func(pq_gen_avx2)
je return_pass
test len, (32-1) ;Check alignment of length
jnz return_fail
mov pos, 0
xor DWORD(pos), DWORD(pos)
vmovdqa xpoly, [poly]
vpxor xzero, xzero, xzero
cmp len, 96
Expand All @@ -146,7 +146,7 @@ len_aligned_32bytes:

loop96:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
lea tmp, [vec-1] ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
XLDR xs2, [ptr+pos+32] ;Preload last vector (source)
XLDR xs3, [ptr+pos+64] ;Preload last vector (source)
Expand All @@ -158,7 +158,6 @@ loop96:
vpxor xq3, xq3, xq3 ;q3 = 0

next_vect:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
vpxor xq1, xq1, xs1 ; q1 ^= s1
vpxor xq2, xq2, xs2 ; q2 ^= s2
Expand All @@ -178,7 +177,8 @@ next_vect:
vpxor xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
vpxor xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
vpxor xq3, xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
jg next_vect ; Loop for each vect except 0
sub tmp, 1
jae next_vect ; Loop for each vect except 0

mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
Expand Down Expand Up @@ -206,21 +206,21 @@ next_vect:

loop32:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
lea tmp, [vec-1] ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
vpxor xp1, xp1, xp1 ;p = 0
vpxor xq1, xq1, xq1 ;q = 0

next_vect32:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
vpxor xq1, xq1, xs1 ; q1 ^= s1
vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
vpxor xp1, xp1, xs1 ; p ^= s
vpaddb xq1, xq1, xq1 ; q = q<<1
vpxor xq1, xq1, xtmp1 ; q = q<<1 ^ poly_masked
XLDR xs1, [ptr+pos] ; Get next vector (source data)
jg next_vect32 ; Loop for each vect except 0
sub tmp, 1
jae next_vect32 ; Loop for each vect except 0

mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
Expand All @@ -234,7 +234,7 @@ next_vect32:


return_pass:
mov return, 0
xor DWORD(return), DWORD(return)
FUNC_RESTORE
ret

Expand Down
16 changes: 8 additions & 8 deletions raid/pq_gen_avx2_gfni.asm
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func(pq_gen_avx2_gfni)

vmovdqa gfmatrix, [rel gf_matrix]

xor pos, pos
xor DWORD(pos), DWORD(pos)
cmp len, 64
jb loop32

Expand All @@ -149,7 +149,7 @@ len_aligned_32bytes:

loop64:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
lea tmp, [vec-1] ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
XLDR xs2, [ptr+pos+32] ;Preload last vector (source)
vpxor xp1, xp1, xp1 ;p1 = 0
Expand All @@ -158,7 +158,6 @@ loop64:
vpxor xq2, xq2, xq2 ;q2 = 0

next_vect:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
vpxor xq1, xq1, xs1 ; q1 ^= s1
vpxor xq2, xq2, xs2 ; q2 ^= s2
Expand All @@ -168,7 +167,8 @@ next_vect:
XLDR xs2, [ptr+pos+32] ; Get next vector (source data2)
vgf2p8affineqb xq1, xq1, gfmatrix, 0x00
vgf2p8affineqb xq2, xq2, gfmatrix, 0x00
jg next_vect ; Loop for each vect except 0
sub tmp, 1
jae next_vect ; Loop for each vect except 0

mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
Expand All @@ -192,19 +192,19 @@ next_vect:

loop32:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
lea tmp, [vec-1] ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
vpxor xp1, xp1, xp1 ;p = 0
vpxor xq1, xq1, xq1 ;q = 0

next_vect32:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
vpxor xq1, xq1, xs1 ; q1 ^= s1
vgf2p8affineqb xq1, xq1, gfmatrix, 0x00
vpxor xp1, xp1, xs1 ; p ^= s
XLDR xs1, [ptr+pos] ; Get next vector (source data)
jg next_vect32 ; Loop for each vect except 0
sub tmp, 1
jae next_vect32 ; Loop for each vect except 0

mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
Expand All @@ -218,7 +218,7 @@ next_vect32:


return_pass:
mov return, 0
xor DWORD(return), DWORD(return)
FUNC_RESTORE
ret

Expand Down
16 changes: 8 additions & 8 deletions raid/pq_gen_avx512.asm
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func(pq_gen_avx512)
je return_pass
test len, (32-1) ;Check alignment of length
jnz return_fail
mov pos, 0
xor DWORD(pos), DWORD(pos)
mov tmp, 0x1d
vpbroadcastb xpoly, tmp
vpxorq xzero, xzero, xzero
Expand All @@ -142,7 +142,7 @@ len_aligned_32bytes:

loop128:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
lea tmp, [vec-1] ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
XLDR xs2, [ptr+pos+64] ;Preload last vector (source)
vpxorq xp1, xp1, xp1 ;p1 = 0
Expand All @@ -151,7 +151,6 @@ loop128:
vpxorq xq2, xq2, xq2 ;q2 = 0

next_vect:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
vpxorq xq1, xq1, xs1 ; q1 ^= s1
vpxorq xq2, xq2, xs2 ; q2 ^= s2
Expand All @@ -167,7 +166,8 @@ next_vect:
vpaddb xq2, xq2, xq2 ; q2 = q2<<1
vpxorq xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
vpxorq xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
jg next_vect ; Loop for each vect except 0
sub tmp, 1
jae next_vect ; Loop for each vect except 0

mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
Expand All @@ -191,21 +191,21 @@ next_vect:

loop32:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
lea tmp, [vec-1] ;Set tmp to point back to last vector
XLDR xs1y, [ptr+pos] ;Preload last vector (source)
vpxorq xp1y, xp1y, xp1y ;p = 0
vpxorq xq1y, xq1y, xq1y ;q = 0

next_vect32:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
vpxorq xq1y, xq1y, xs1y ; q1 ^= s1
vpblendvb xtmp1y, xzeroy, xpolyy, xq1y ; xtmp1 = poly or 0x00
vpxorq xp1y, xp1y, xs1y ; p ^= s
vpaddb xq1y, xq1y, xq1y ; q = q<<1
vpxorq xq1y, xq1y, xtmp1y ; q = q<<1 ^ poly_masked
XLDR xs1y, [ptr+pos] ; Get next vector (source data)
jg next_vect32 ; Loop for each vect except 0
sub tmp, 1
jae next_vect32 ; Loop for each vect except 0

mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
Expand All @@ -219,7 +219,7 @@ next_vect32:


return_pass:
mov return, 0
xor DWORD(return), DWORD(return)
FUNC_RESTORE
ret

Expand Down
Loading