@@ -130,7 +130,7 @@ func(pq_gen_avx512)
130
130
je return_pass
131
131
test len , ( 32 - 1 ) ;Check alignment of length
132
132
jnz return_fail
133
- mov pos, 0
133
+ xor DWORD( pos) , DWORD(pos)
134
134
mov tmp , 0x1d
135
135
vpbroadcastb xpoly , tmp
136
136
vpxorq xzero , xzero , xzero
@@ -142,7 +142,7 @@ len_aligned_32bytes:
142
142
143
143
loop128:
144
144
mov ptr , [ arg2 + vec * 8 ] ;Fetch last source pointer
145
- mov tmp , vec ;Set tmp to point back to last vector
145
+ lea tmp , [ vec - 1 ] ;Set tmp to point back to last vector
146
146
XLDR xs1 , [ ptr + pos ] ;Preload last vector (source)
147
147
XLDR xs2 , [ ptr + pos + 64 ] ;Preload last vector (source)
148
148
vpxorq xp1 , xp1 , xp1 ;p1 = 0
@@ -151,7 +151,6 @@ loop128:
151
151
vpxorq xq2 , xq2 , xq2 ;q2 = 0
152
152
153
153
next_vect:
154
- sub tmp , 1 ;Inner loop for each source vector
155
154
mov ptr , [ arg2 + tmp * 8 ] ; get pointer to next vect
156
155
vpxorq xq1 , xq1 , xs1 ; q1 ^= s1
157
156
vpxorq xq2 , xq2 , xs2 ; q2 ^= s2
@@ -167,7 +166,8 @@ next_vect:
167
166
vpaddb xq2 , xq2 , xq2 ; q2 = q2<<1
168
167
vpxorq xq1 , xq1 , xtmp1 ; q1 = q1<<1 ^ poly_masked
169
168
vpxorq xq2 , xq2 , xtmp2 ; q2 = q2<<1 ^ poly_masked
170
- jg next_vect ; Loop for each vect except 0
169
+ sub tmp , 1
170
+ jae next_vect ; Loop for each vect except 0
171
171
172
172
mov ptr , [ arg2 + 8 + vec * 8 ] ;Get address of P parity vector
173
173
mov tmp , [ arg2 + ( 2 * 8 ) + vec * 8 ] ;Get address of Q parity vector
@@ -191,21 +191,21 @@ next_vect:
191
191
192
192
loop32:
193
193
mov ptr , [ arg2 + vec * 8 ] ;Fetch last source pointer
194
- mov tmp , vec ;Set tmp to point back to last vector
194
+ lea tmp , [ vec - 1 ] ;Set tmp to point back to last vector
195
195
XLDR xs1y , [ ptr + pos ] ;Preload last vector (source)
196
196
vpxorq xp1y , xp1y , xp1y ;p = 0
197
197
vpxorq xq1y , xq1y , xq1y ;q = 0
198
198
199
199
next_vect32:
200
- sub tmp , 1 ;Inner loop for each source vector
201
200
mov ptr , [ arg2 + tmp * 8 ] ; get pointer to next vect
202
201
vpxorq xq1y , xq1y , xs1y ; q1 ^= s1
203
202
vpblendvb xtmp1y , xzeroy , xpolyy , xq1y ; xtmp1 = poly or 0x00
204
203
vpxorq xp1y , xp1y , xs1y ; p ^= s
205
204
vpaddb xq1y , xq1y , xq1y ; q = q<<1
206
205
vpxorq xq1y , xq1y , xtmp1y ; q = q<<1 ^ poly_masked
207
206
XLDR xs1y , [ ptr + pos ] ; Get next vector (source data)
208
- jg next_vect32 ; Loop for each vect except 0
207
+ sub tmp , 1
208
+ jae next_vect32 ; Loop for each vect except 0
209
209
210
210
mov ptr , [ arg2 + 8 + vec * 8 ] ;Get address of P parity vector
211
211
mov tmp , [ arg2 + ( 2 * 8 ) + vec * 8 ] ;Get address of Q parity vector
@@ -219,7 +219,7 @@ next_vect32:
219
219
220
220
221
221
return_pass:
222
- mov return, 0
222
+ xor DWORD( return) , DWORD(return)
223
223
FUNC_RESTORE
224
224
ret
225
225
0 commit comments