diff --git a/raid/pq_check_sse.asm b/raid/pq_check_sse.asm index 5f5ad90b..ed808528 100644 --- a/raid/pq_check_sse.asm +++ b/raid/pq_check_sse.asm @@ -130,7 +130,7 @@ func(pq_check_sse) je return_pass test len, (16-1) ;Check alignment of length jnz return_fail - mov pos, 0 + xor DWORD(pos), DWORD(pos) movdqa xpoly, [poly] cmp len, 48 jl loop16 @@ -148,13 +148,12 @@ loop48: pxor xq3, xq3 ;q3 = 0 mov ptr, [arg2+vec*8] ;Fetch last source pointer - mov tmp, vec ;Set tmp to point back to last vector + lea tmp, [vec-1] ;Set tmp to point back to last vector XLDR xs1, [ptr+pos] ;Preload last vector (source) XLDR xs2, [ptr+pos+16] ;Preload last vector (source) XLDR xs3, [ptr+pos+32] ;Preload last vector (source) next_vect: - sub tmp, 1 ;Inner loop for each source vector mov ptr, [arg2+tmp*8] ; get pointer to next vect pxor xp1, xs1 ; p1 ^= s1 pxor xp2, xs2 ; p2 ^= s2 @@ -180,7 +179,8 @@ next_vect: pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked pxor xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked - jg next_vect ; Loop for each vect except 0 + sub tmp, 1 ;Inner loop for each source vector + jae next_vect ; Loop for each vect except 0 pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded pxor xq1, xs1 ;q1 ^= 1 * s1[0] @@ -222,11 +222,10 @@ loop16: XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src pxor xq1, xq1 ;q = 0 mov ptr, [arg2+vec*8] ;Fetch last source pointer - mov tmp, vec ;Set tmp to point back to last vector + lea tmp, [vec-1] ;Set tmp to point back to last vector XLDR xs1, [ptr+pos] ;Preload last vector (source) next_vect16: - sub tmp, 1 ;Inner loop for each source vector mov ptr, [arg2+tmp*8] ; get pointer to next vect pxor xq1, xs1 ; q ^= s pxor xtmp1, xtmp1 ; xtmp = 0 @@ -236,7 +235,8 @@ next_vect16: paddb xq1, xq1 ; q = q<<1 pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked XLDR xs1, [ptr+pos] ; Get next vector (source data) - jg next_vect16 ; Loop for each vect except 0 + sub tmp, 1 ;Inner loop for each source vector + jae next_vect16 ; Loop for each vect except 0 pxor xp1, xs1 ;p ^= s[0] - last source is already loaded pxor xq1, xs1 ;q ^= 1 * s[0] @@ -254,7 +254,7 @@ next_vect16: return_pass: - mov return, 0 + xor DWORD(return), DWORD(return) FUNC_RESTORE ret diff --git a/raid/pq_gen_avx.asm b/raid/pq_gen_avx.asm index 3a1da030..65ae05d9 100644 --- a/raid/pq_gen_avx.asm +++ b/raid/pq_gen_avx.asm @@ -134,7 +134,7 @@ func(pq_gen_avx) je return_pass test len, (16-1) ;Check alignment of length jnz return_fail - mov pos, 0 + xor DWORD(pos), DWORD(pos) vmovdqa xpoly, [poly] vpxor xzero, xzero, xzero cmp len, 48 @@ -145,7 +145,7 @@ len_aligned_32bytes: loop48: mov ptr, [arg2+vec*8] ;Fetch last source pointer - mov tmp, vec ;Set tmp to point back to last vector + lea tmp, [vec-1] ;Set tmp to point back to last vector XLDR xs1, [ptr+pos] ;Preload last vector (source) XLDR xs2, [ptr+pos+16] ;Preload last vector (source) XLDR xs3, [ptr+pos+32] ;Preload last vector (source) @@ -157,7 +157,6 @@ loop48: vpxor xq3, xq3, xq3 ;q3 = 0 next_vect: - sub tmp, 1 ;Inner loop for each source vector mov ptr, [arg2+tmp*8] ; get pointer to next vect vpxor xq1, xq1, xs1 ; q1 ^= s1 vpxor xq2, xq2, xs2 ; q2 ^= s2 @@ -177,7 +176,8 @@ next_vect: vpxor xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked vpxor xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked vpxor xq3, xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked - jg next_vect ; Loop for each vect except 0 + sub tmp, 1 + jae next_vect ; Loop for each vect except 0 mov ptr, [arg2+8+vec*8] ;Get address of P parity vector mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector @@ -205,13 +205,12 @@ next_vect: loop16: mov ptr, [arg2+vec*8] ;Fetch last source pointer - mov tmp, vec ;Set tmp to point back to last vector + lea tmp, [vec-1] ;Set tmp to point back to last vector XLDR xs1, [ptr+pos] ;Preload last vector (source) vpxor xp1, xp1, xp1 ;p = 0 vpxor xq1, xq1, xq1 ;q = 0 next_vect16: - sub tmp, 1 ;Inner loop for each source vector mov ptr, [arg2+tmp*8] ; get pointer to next vect vpxor xq1, xq1, xs1 ; q1 ^= s1 vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00 @@ -219,7 +218,8 @@ next_vect16: vpaddb xq1, xq1, xq1 ; q = q<<1 vpxor xq1, xq1, xtmp1 ; q = q<<1 ^ poly_masked XLDR xs1, [ptr+pos] ; Get next vector (source data) - jg next_vect16 ; Loop for each vect except 0 + sub tmp, 1 + jae next_vect16 ; Loop for each vect except 0 mov ptr, [arg2+8+vec*8] ;Get address of P parity vector mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector @@ -233,7 +233,7 @@ next_vect16: return_pass: - mov return, 0 + xor DWORD(return), DWORD(return) FUNC_RESTORE ret diff --git a/raid/pq_gen_avx2.asm b/raid/pq_gen_avx2.asm index 3d51fc44..8b122f22 100644 --- a/raid/pq_gen_avx2.asm +++ b/raid/pq_gen_avx2.asm @@ -135,7 +135,7 @@ func(pq_gen_avx2) je return_pass test len, (32-1) ;Check alignment of length jnz return_fail - mov pos, 0 + xor DWORD(pos), DWORD(pos) vmovdqa xpoly, [poly] vpxor xzero, xzero, xzero cmp len, 96 @@ -146,7 +146,7 @@ len_aligned_32bytes: loop96: mov ptr, [arg2+vec*8] ;Fetch last source pointer - mov tmp, vec ;Set tmp to point back to last vector + lea tmp, [vec-1] ;Set tmp to point back to last vector XLDR xs1, [ptr+pos] ;Preload last vector (source) XLDR xs2, [ptr+pos+32] ;Preload last vector (source) XLDR xs3, [ptr+pos+64] ;Preload last vector (source) @@ -158,7 +158,6 @@ loop96: vpxor xq3, xq3, xq3 ;q3 = 0 next_vect: - sub tmp, 1 ;Inner loop for each source vector mov ptr, [arg2+tmp*8] ; get pointer to next vect vpxor xq1, xq1, xs1 ; q1 ^= s1 vpxor xq2, xq2, xs2 ; q2 ^= s2 @@ -178,7 +177,8 @@ next_vect: vpxor xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked vpxor xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked vpxor xq3, xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked - jg next_vect ; Loop for each vect except 0 + sub tmp, 1 + jae next_vect ; Loop for each vect except 0 mov ptr, [arg2+8+vec*8] ;Get address of P parity vector mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector @@ -206,13 +206,12 @@ next_vect: loop32: mov ptr, [arg2+vec*8] ;Fetch last source pointer - mov tmp, vec ;Set tmp to point back to last vector + lea tmp, [vec-1] ;Set tmp to point back to last vector XLDR xs1, [ptr+pos] ;Preload last vector (source) vpxor xp1, xp1, xp1 ;p = 0 vpxor xq1, xq1, xq1 ;q = 0 next_vect32: - sub tmp, 1 ;Inner loop for each source vector mov ptr, [arg2+tmp*8] ; get pointer to next vect vpxor xq1, xq1, xs1 ; q1 ^= s1 vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00 @@ -220,7 +219,8 @@ next_vect32: vpaddb xq1, xq1, xq1 ; q = q<<1 vpxor xq1, xq1, xtmp1 ; q = q<<1 ^ poly_masked XLDR xs1, [ptr+pos] ; Get next vector (source data) - jg next_vect32 ; Loop for each vect except 0 + sub tmp, 1 + jae next_vect32 ; Loop for each vect except 0 mov ptr, [arg2+8+vec*8] ;Get address of P parity vector mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector @@ -234,7 +234,7 @@ next_vect32: return_pass: - mov return, 0 + xor DWORD(return), DWORD(return) FUNC_RESTORE ret diff --git a/raid/pq_gen_avx2_gfni.asm b/raid/pq_gen_avx2_gfni.asm index ad707b91..5b095c10 100644 --- a/raid/pq_gen_avx2_gfni.asm +++ b/raid/pq_gen_avx2_gfni.asm @@ -140,7 +140,7 @@ func(pq_gen_avx2_gfni) vmovdqa gfmatrix, [rel gf_matrix] - xor pos, pos + xor DWORD(pos), DWORD(pos) cmp len, 64 jb loop32 @@ -149,7 +149,7 @@ len_aligned_32bytes: loop64: mov ptr, [arg2+vec*8] ;Fetch last source pointer - mov tmp, vec ;Set tmp to point back to last vector + lea tmp, [vec-1] ;Set tmp to point back to last vector XLDR xs1, [ptr+pos] ;Preload last vector (source) XLDR xs2, [ptr+pos+32] ;Preload last vector (source) vpxor xp1, xp1, xp1 ;p1 = 0 @@ -158,7 +158,6 @@ loop64: vpxor xq2, xq2, xq2 ;q2 = 0 next_vect: - sub tmp, 1 ;Inner loop for each source vector mov ptr, [arg2+tmp*8] ; get pointer to next vect vpxor xq1, xq1, xs1 ; q1 ^= s1 vpxor xq2, xq2, xs2 ; q2 ^= s2 @@ -168,7 +167,8 @@ next_vect: XLDR xs2, [ptr+pos+32] ; Get next vector (source data2) vgf2p8affineqb xq1, xq1, gfmatrix, 0x00 vgf2p8affineqb xq2, xq2, gfmatrix, 0x00 - jg next_vect ; Loop for each vect except 0 + sub tmp, 1 + jae next_vect ; Loop for each vect except 0 mov ptr, [arg2+8+vec*8] ;Get address of P parity vector mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector @@ -192,19 +192,19 @@ next_vect: loop32: mov ptr, [arg2+vec*8] ;Fetch last source pointer - mov tmp, vec ;Set tmp to point back to last vector + lea tmp, [vec-1] ;Set tmp to point back to last vector XLDR xs1, [ptr+pos] ;Preload last vector (source) vpxor xp1, xp1, xp1 ;p = 0 vpxor xq1, xq1, xq1 ;q = 0 next_vect32: - sub tmp, 1 ;Inner loop for each source vector mov ptr, [arg2+tmp*8] ; get pointer to next vect vpxor xq1, xq1, xs1 ; q1 ^= s1 vgf2p8affineqb xq1, xq1, gfmatrix, 0x00 vpxor xp1, xp1, xs1 ; p ^= s XLDR xs1, [ptr+pos] ; Get next vector (source data) - jg next_vect32 ; Loop for each vect except 0 + sub tmp, 1 + jae next_vect32 ; Loop for each vect except 0 mov ptr, [arg2+8+vec*8] ;Get address of P parity vector mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector @@ -218,7 +218,7 @@ next_vect32: return_pass: - mov return, 0 + xor DWORD(return), DWORD(return) FUNC_RESTORE ret diff --git a/raid/pq_gen_avx512.asm b/raid/pq_gen_avx512.asm index f8b8c3d4..baa58ca8 100644 --- a/raid/pq_gen_avx512.asm +++ b/raid/pq_gen_avx512.asm @@ -130,7 +130,7 @@ func(pq_gen_avx512) je return_pass test len, (32-1) ;Check alignment of length jnz return_fail - mov pos, 0 + xor DWORD(pos), DWORD(pos) mov tmp, 0x1d vpbroadcastb xpoly, tmp vpxorq xzero, xzero, xzero @@ -142,7 +142,7 @@ len_aligned_32bytes: loop128: mov ptr, [arg2+vec*8] ;Fetch last source pointer - mov tmp, vec ;Set tmp to point back to last vector + lea tmp, [vec-1] ;Set tmp to point back to last vector XLDR xs1, [ptr+pos] ;Preload last vector (source) XLDR xs2, [ptr+pos+64] ;Preload last vector (source) vpxorq xp1, xp1, xp1 ;p1 = 0 @@ -151,7 +151,6 @@ loop128: vpxorq xq2, xq2, xq2 ;q2 = 0 next_vect: - sub tmp, 1 ;Inner loop for each source vector mov ptr, [arg2+tmp*8] ; get pointer to next vect vpxorq xq1, xq1, xs1 ; q1 ^= s1 vpxorq xq2, xq2, xs2 ; q2 ^= s2 @@ -167,7 +166,8 @@ next_vect: vpaddb xq2, xq2, xq2 ; q2 = q2<<1 vpxorq xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked vpxorq xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked - jg next_vect ; Loop for each vect except 0 + sub tmp, 1 + jae next_vect ; Loop for each vect except 0 mov ptr, [arg2+8+vec*8] ;Get address of P parity vector mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector @@ -191,13 +191,12 @@ next_vect: loop32: mov ptr, [arg2+vec*8] ;Fetch last source pointer - mov tmp, vec ;Set tmp to point back to last vector + lea tmp, [vec-1] ;Set tmp to point back to last vector XLDR xs1y, [ptr+pos] ;Preload last vector (source) vpxorq xp1y, xp1y, xp1y ;p = 0 vpxorq xq1y, xq1y, xq1y ;q = 0 next_vect32: - sub tmp, 1 ;Inner loop for each source vector mov ptr, [arg2+tmp*8] ; get pointer to next vect vpxorq xq1y, xq1y, xs1y ; q1 ^= s1 vpblendvb xtmp1y, xzeroy, xpolyy, xq1y ; xtmp1 = poly or 0x00 @@ -205,7 +204,8 @@ next_vect32: vpaddb xq1y, xq1y, xq1y ; q = q<<1 vpxorq xq1y, xq1y, xtmp1y ; q = q<<1 ^ poly_masked XLDR xs1y, [ptr+pos] ; Get next vector (source data) - jg next_vect32 ; Loop for each vect except 0 + sub tmp, 1 + jae next_vect32 ; Loop for each vect except 0 mov ptr, [arg2+8+vec*8] ;Get address of P parity vector mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector @@ -219,7 +219,7 @@ next_vect32: return_pass: - mov return, 0 + xor DWORD(return), DWORD(return) FUNC_RESTORE ret diff --git a/raid/pq_gen_avx512_gfni.asm b/raid/pq_gen_avx512_gfni.asm index 534649fb..ca62dd64 100644 --- a/raid/pq_gen_avx512_gfni.asm +++ b/raid/pq_gen_avx512_gfni.asm @@ -144,7 +144,7 @@ func(pq_gen_avx512_gfni) vmovdqa64 gfmatrix, [rel gf_matrix] - xor pos, pos + xor DWORD(pos), DWORD(pos) cmp len, 128 jl loop32 @@ -153,7 +153,7 @@ len_aligned_32bytes: loop128: mov ptr, [arg2+vec*8] ;Fetch last source pointer - mov tmp, vec ;Set tmp to point back to last vector + lea tmp, [vec-1] ;Set tmp to point back to last vector XLDR xs1, [ptr+pos] ;Preload last vector (source) XLDR xs2, [ptr+pos+64] ;Preload last vector (source) vpxorq xp1, xp1, xp1 ;p1 = 0 @@ -162,7 +162,6 @@ loop128: vpxorq xq2, xq2, xq2 ;q2 = 0 next_vect: - sub tmp, 1 ;Inner loop for each source vector mov ptr, [arg2+tmp*8] ; get pointer to next vect vpxorq xq1, xq1, xs1 ; q1 ^= s1 vpxorq xq2, xq2, xs2 ; q2 ^= s2 @@ -172,7 +171,8 @@ next_vect: XLDR xs2, [ptr+pos+64] ; Get next vector (source data2) vgf2p8affineqb xq1, xq1, gfmatrix, 0x00 vgf2p8affineqb xq2, xq2, gfmatrix, 0x00 - jg next_vect ; Loop for each vect except 0 + sub tmp, 1 ;Inner loop for each source vector + jae next_vect ; Loop for each vect except 0 mov ptr, [arg2+8+vec*8] ;Get address of P parity vector mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector @@ -196,19 +196,19 @@ next_vect: loop32: mov ptr, [arg2+vec*8] ;Fetch last source pointer - mov tmp, vec ;Set tmp to point back to last vector + lea tmp, [vec-1] ;Set tmp to point back to last vector XLDR xs1y, [ptr+pos] ;Preload last vector (source) vpxorq xp1y, xp1y, xp1y ;p = 0 vpxorq xq1y, xq1y, xq1y ;q = 0 next_vect32: - sub tmp, 1 ;Inner loop for each source vector mov ptr, [arg2+tmp*8] ; get pointer to next vect vpxorq xq1y, xq1y, xs1y ; q1 ^= s1 vgf2p8affineqb xq1y, xq1y, gfmatrixy, 0x00 vpxorq xp1y, xp1y, xs1y ; p ^= s XLDR xs1y, [ptr+pos] ; Get next vector (source data) - jg next_vect32 ; Loop for each vect except 0 + sub tmp, 1 ;Inner loop for each source vector + jae next_vect32 ; Loop for each vect except 0 mov ptr, [arg2+8+vec*8] ;Get address of P parity vector mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector @@ -222,7 +222,7 @@ next_vect32: return_pass: - mov return, 0 + xor DWORD(return), DWORD(return) FUNC_RESTORE ret diff --git a/raid/xor_gen_avx.asm b/raid/xor_gen_avx.asm index 436840fa..1b2903bd 100644 --- a/raid/xor_gen_avx.asm +++ b/raid/xor_gen_avx.asm @@ -114,12 +114,11 @@ func(xor_gen_avx) len_aligned_128bytes: sub len, 128 - mov pos, 0 + xor DWORD(pos), DWORD(pos) loop128: - mov tmp, vec ;Back to last vector mov tmp2, [arg2+vec*PS] ;Fetch last pointer in array - sub tmp, 1 ;Next vect + lea tmp, [vec-1] ;Next vect XLDR ymm0, [tmp2+pos] ;Start with end of array in last vector XLDR ymm1, [tmp2+pos+32] ;Keep xor parity in xmm0-7 XLDR ymm2, [tmp2+pos+(2*32)] @@ -127,7 +126,6 @@ loop128: next_vect: mov ptr, [arg2+tmp*PS] - sub tmp, 1 XLDR ymm4, [ptr+pos] ;Get next vector (source) XLDR ymm5, [ptr+pos+32] XLDR ymm6, [ptr+pos+(2*32)] @@ -136,7 +134,8 @@ next_vect: vxorpd ymm1, ymm1, ymm5 vxorpd ymm2, ymm2, ymm6 vxorpd ymm3, ymm3, ymm7 - jge next_vect ;Loop for each source + sub tmp, 1 + jae next_vect ;Loop for each source mov ptr, [arg2+PS+vec*PS] ;Address of parity vector XSTR [ptr+pos], ymm0 ;Write parity xor vector @@ -149,7 +148,7 @@ next_vect: return_pass: FUNC_RESTORE - mov return, 0 + xor DWORD(return), DWORD(return) ret diff --git a/raid/xor_gen_avx512.asm b/raid/xor_gen_avx512.asm index a45d58df..cd98dd47 100644 --- a/raid/xor_gen_avx512.asm +++ b/raid/xor_gen_avx512.asm @@ -113,23 +113,22 @@ func(xor_gen_avx512) len_aligned_128bytes: sub len, 128 - mov pos, 0 + xor DWORD(pos), DWORD(pos) loop128: - mov tmp, vec ;Back to last vector mov tmp2, [arg2+vec*PS] ;Fetch last pointer in array - sub tmp, 1 ;Next vect + lea tmp, [vec-1] ;Next vect XLDR zmm0, [tmp2+pos] ;Start with end of array in last vector XLDR zmm1, [tmp2+pos+64] ;Keep xor parity in xmm0-7 next_vect: mov ptr, [arg2+tmp*PS] - sub tmp, 1 XLDR zmm4, [ptr+pos] ;Get next vector (source) XLDR zmm5, [ptr+pos+64] vpxorq zmm0, zmm0, zmm4 ;Add to xor parity vpxorq zmm1, zmm1, zmm5 - jge next_vect ;Loop for each source + sub tmp, 1 + jae next_vect ;Loop for each source mov ptr, [arg2+PS+vec*PS] ;Address of parity vector XSTR [ptr+pos], zmm0 ;Write parity xor vector @@ -140,7 +139,7 @@ next_vect: return_pass: FUNC_RESTORE - mov return, 0 + xor DWORD(return), DWORD(return) ret diff --git a/raid/xor_gen_sse.asm b/raid/xor_gen_sse.asm index 41f2a77c..e8e9b2f0 100644 --- a/raid/xor_gen_sse.asm +++ b/raid/xor_gen_sse.asm @@ -116,12 +116,11 @@ func(xor_gen_sse) len_aligned_128bytes: sub len, 128 - mov pos, 0 - mov tmp, vec ;Preset to last vector + xor DWORD(pos), DWORD(pos) loop128: - mov tmp2, [arg2+tmp*PS] ;Fetch last pointer in array - sub tmp, 1 ;Next vect + mov tmp2, [arg2+vec*PS] ;Fetch last pointer in array + lea tmp, [vec-1] ;Next vect XLDR xmm0, [tmp2+pos] ;Start with end of array in last vector XLDR xmm1, [tmp2+pos+16] ;Keep xor parity in xmm0-7 XLDR xmm2, [tmp2+pos+(2*16)] @@ -133,7 +132,6 @@ loop128: next_vect: mov ptr, [arg2+tmp*PS] - sub tmp, 1 xorpd xmm0, [ptr+pos] ;Get next vector (source) xorpd xmm1, [ptr+pos+16] xorpd xmm2, [ptr+pos+(2*16)] @@ -143,11 +141,11 @@ next_vect: xorpd xmm6, [ptr+pos+(6*16)] xorpd xmm7, [ptr+pos+(7*16)] ;;; prefetch [ptr+pos+(8*16)] - jge next_vect ;Loop for each vect + sub tmp, 1 + jae next_vect ;Loop for each vect - mov tmp, vec ;Back to last vector - mov ptr, [arg2+PS+tmp*PS] ;Address of parity vector + mov ptr, [arg2+PS+vec*PS] ;Address of parity vector XSTR [ptr+pos], xmm0 ;Write parity xor vector XSTR [ptr+pos+(1*16)], xmm1 XSTR [ptr+pos+(2*16)], xmm2 @@ -161,7 +159,7 @@ next_vect: jle loop128 return_pass: - mov return, 0 + xor DWORD(return), DWORD(return) FUNC_RESTORE ret