Skip to content

Commit b7e6d3e

Browse files
committed
Keep sub and jcc in sequence to enable macrofusion.
Signed-off-by: Nicola Torracca <[email protected]>
1 parent e677f66 commit b7e6d3e

9 files changed

+63
-66
lines changed

raid/pq_check_sse.asm

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ func(pq_check_sse)
130130
je return_pass
131131
test len, (16-1) ;Check alignment of length
132132
jnz return_fail
133-
mov pos, 0
133+
xor DWORD(pos), DWORD(pos)
134134
movdqa xpoly, [poly]
135135
cmp len, 48
136136
jl loop16
@@ -148,13 +148,12 @@ loop48:
148148
pxor xq3, xq3 ;q3 = 0
149149

150150
mov ptr, [arg2+vec*8] ;Fetch last source pointer
151-
mov tmp, vec ;Set tmp to point back to last vector
151+
lea tmp, [vec-1] ;Set tmp to point back to last vector
152152
XLDR xs1, [ptr+pos] ;Preload last vector (source)
153153
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
154154
XLDR xs3, [ptr+pos+32] ;Preload last vector (source)
155155

156156
next_vect:
157-
sub tmp, 1 ;Inner loop for each source vector
158157
mov ptr, [arg2+tmp*8] ; get pointer to next vect
159158
pxor xp1, xs1 ; p1 ^= s1
160159
pxor xp2, xs2 ; p2 ^= s2
@@ -180,7 +179,8 @@ next_vect:
180179
pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
181180
pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
182181
pxor xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
183-
jg next_vect ; Loop for each vect except 0
182+
sub tmp, 1 ;Inner loop for each source vector
183+
jae next_vect ; Loop for each vect except 0
184184

185185
pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
186186
pxor xq1, xs1 ;q1 ^= 1 * s1[0]
@@ -222,11 +222,10 @@ loop16:
222222
XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
223223
pxor xq1, xq1 ;q = 0
224224
mov ptr, [arg2+vec*8] ;Fetch last source pointer
225-
mov tmp, vec ;Set tmp to point back to last vector
225+
lea tmp, [vec-1] ;Set tmp to point back to last vector
226226
XLDR xs1, [ptr+pos] ;Preload last vector (source)
227227

228228
next_vect16:
229-
sub tmp, 1 ;Inner loop for each source vector
230229
mov ptr, [arg2+tmp*8] ; get pointer to next vect
231230
pxor xq1, xs1 ; q ^= s
232231
pxor xtmp1, xtmp1 ; xtmp = 0
@@ -236,7 +235,8 @@ next_vect16:
236235
paddb xq1, xq1 ; q = q<<1
237236
pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
238237
XLDR xs1, [ptr+pos] ; Get next vector (source data)
239-
jg next_vect16 ; Loop for each vect except 0
238+
sub tmp, 1 ;Inner loop for each source vector
239+
jae next_vect16 ; Loop for each vect except 0
240240

241241
pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
242242
pxor xq1, xs1 ;q ^= 1 * s[0]
@@ -254,7 +254,7 @@ next_vect16:
254254

255255

256256
return_pass:
257-
mov return, 0
257+
xor DWORD(return), DWORD(return)
258258
FUNC_RESTORE
259259
ret
260260

raid/pq_gen_avx.asm

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ func(pq_gen_avx)
134134
je return_pass
135135
test len, (16-1) ;Check alignment of length
136136
jnz return_fail
137-
mov pos, 0
137+
xor DWORD(pos), DWORD(pos)
138138
vmovdqa xpoly, [poly]
139139
vpxor xzero, xzero, xzero
140140
cmp len, 48
@@ -145,7 +145,7 @@ len_aligned_32bytes:
145145

146146
loop48:
147147
mov ptr, [arg2+vec*8] ;Fetch last source pointer
148-
mov tmp, vec ;Set tmp to point back to last vector
148+
lea tmp, [vec-1] ;Set tmp to point back to last vector
149149
XLDR xs1, [ptr+pos] ;Preload last vector (source)
150150
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
151151
XLDR xs3, [ptr+pos+32] ;Preload last vector (source)
@@ -157,7 +157,6 @@ loop48:
157157
vpxor xq3, xq3, xq3 ;q3 = 0
158158

159159
next_vect:
160-
sub tmp, 1 ;Inner loop for each source vector
161160
mov ptr, [arg2+tmp*8] ; get pointer to next vect
162161
vpxor xq1, xq1, xs1 ; q1 ^= s1
163162
vpxor xq2, xq2, xs2 ; q2 ^= s2
@@ -177,7 +176,8 @@ next_vect:
177176
vpxor xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
178177
vpxor xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
179178
vpxor xq3, xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
180-
jg next_vect ; Loop for each vect except 0
179+
sub tmp, 1
180+
jae next_vect ; Loop for each vect except 0
181181

182182
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
183183
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
@@ -205,21 +205,21 @@ next_vect:
205205

206206
loop16:
207207
mov ptr, [arg2+vec*8] ;Fetch last source pointer
208-
mov tmp, vec ;Set tmp to point back to last vector
208+
lea tmp, [vec-1] ;Set tmp to point back to last vector
209209
XLDR xs1, [ptr+pos] ;Preload last vector (source)
210210
vpxor xp1, xp1, xp1 ;p = 0
211211
vpxor xq1, xq1, xq1 ;q = 0
212212

213213
next_vect16:
214-
sub tmp, 1 ;Inner loop for each source vector
215214
mov ptr, [arg2+tmp*8] ; get pointer to next vect
216215
vpxor xq1, xq1, xs1 ; q1 ^= s1
217216
vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
218217
vpxor xp1, xp1, xs1 ; p ^= s
219218
vpaddb xq1, xq1, xq1 ; q = q<<1
220219
vpxor xq1, xq1, xtmp1 ; q = q<<1 ^ poly_masked
221220
XLDR xs1, [ptr+pos] ; Get next vector (source data)
222-
jg next_vect16 ; Loop for each vect except 0
221+
sub tmp, 1
222+
jae next_vect16 ; Loop for each vect except 0
223223

224224
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
225225
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
@@ -233,7 +233,7 @@ next_vect16:
233233

234234

235235
return_pass:
236-
mov return, 0
236+
xor DWORD(return), DWORD(return)
237237
FUNC_RESTORE
238238
ret
239239

raid/pq_gen_avx2.asm

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ func(pq_gen_avx2)
135135
je return_pass
136136
test len, (32-1) ;Check alignment of length
137137
jnz return_fail
138-
mov pos, 0
138+
xor DWORD(pos), DWORD(pos)
139139
vmovdqa xpoly, [poly]
140140
vpxor xzero, xzero, xzero
141141
cmp len, 96
@@ -146,7 +146,7 @@ len_aligned_32bytes:
146146

147147
loop96:
148148
mov ptr, [arg2+vec*8] ;Fetch last source pointer
149-
mov tmp, vec ;Set tmp to point back to last vector
149+
lea tmp, [vec-1] ;Set tmp to point back to last vector
150150
XLDR xs1, [ptr+pos] ;Preload last vector (source)
151151
XLDR xs2, [ptr+pos+32] ;Preload last vector (source)
152152
XLDR xs3, [ptr+pos+64] ;Preload last vector (source)
@@ -158,7 +158,6 @@ loop96:
158158
vpxor xq3, xq3, xq3 ;q3 = 0
159159

160160
next_vect:
161-
sub tmp, 1 ;Inner loop for each source vector
162161
mov ptr, [arg2+tmp*8] ; get pointer to next vect
163162
vpxor xq1, xq1, xs1 ; q1 ^= s1
164163
vpxor xq2, xq2, xs2 ; q2 ^= s2
@@ -178,7 +177,8 @@ next_vect:
178177
vpxor xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
179178
vpxor xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
180179
vpxor xq3, xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
181-
jg next_vect ; Loop for each vect except 0
180+
sub tmp, 1
181+
jae next_vect ; Loop for each vect except 0
182182

183183
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
184184
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
@@ -206,21 +206,21 @@ next_vect:
206206

207207
loop32:
208208
mov ptr, [arg2+vec*8] ;Fetch last source pointer
209-
mov tmp, vec ;Set tmp to point back to last vector
209+
lea tmp, [vec-1] ;Set tmp to point back to last vector
210210
XLDR xs1, [ptr+pos] ;Preload last vector (source)
211211
vpxor xp1, xp1, xp1 ;p = 0
212212
vpxor xq1, xq1, xq1 ;q = 0
213213

214214
next_vect32:
215-
sub tmp, 1 ;Inner loop for each source vector
216215
mov ptr, [arg2+tmp*8] ; get pointer to next vect
217216
vpxor xq1, xq1, xs1 ; q1 ^= s1
218217
vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
219218
vpxor xp1, xp1, xs1 ; p ^= s
220219
vpaddb xq1, xq1, xq1 ; q = q<<1
221220
vpxor xq1, xq1, xtmp1 ; q = q<<1 ^ poly_masked
222221
XLDR xs1, [ptr+pos] ; Get next vector (source data)
223-
jg next_vect32 ; Loop for each vect except 0
222+
sub tmp, 1
223+
jae next_vect32 ; Loop for each vect except 0
224224

225225
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
226226
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
@@ -234,7 +234,7 @@ next_vect32:
234234

235235

236236
return_pass:
237-
mov return, 0
237+
xor DWORD(return), DWORD(return)
238238
FUNC_RESTORE
239239
ret
240240

raid/pq_gen_avx2_gfni.asm

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ func(pq_gen_avx2_gfni)
140140

141141
vmovdqa gfmatrix, [rel gf_matrix]
142142

143-
xor pos, pos
143+
xor DWORD(pos), DWORD(pos)
144144
cmp len, 64
145145
jb loop32
146146

@@ -149,7 +149,7 @@ len_aligned_32bytes:
149149

150150
loop64:
151151
mov ptr, [arg2+vec*8] ;Fetch last source pointer
152-
mov tmp, vec ;Set tmp to point back to last vector
152+
lea tmp, [vec-1] ;Set tmp to point back to last vector
153153
XLDR xs1, [ptr+pos] ;Preload last vector (source)
154154
XLDR xs2, [ptr+pos+32] ;Preload last vector (source)
155155
vpxor xp1, xp1, xp1 ;p1 = 0
@@ -158,7 +158,6 @@ loop64:
158158
vpxor xq2, xq2, xq2 ;q2 = 0
159159

160160
next_vect:
161-
sub tmp, 1 ;Inner loop for each source vector
162161
mov ptr, [arg2+tmp*8] ; get pointer to next vect
163162
vpxor xq1, xq1, xs1 ; q1 ^= s1
164163
vpxor xq2, xq2, xs2 ; q2 ^= s2
@@ -168,7 +167,8 @@ next_vect:
168167
XLDR xs2, [ptr+pos+32] ; Get next vector (source data2)
169168
vgf2p8affineqb xq1, xq1, gfmatrix, 0x00
170169
vgf2p8affineqb xq2, xq2, gfmatrix, 0x00
171-
jg next_vect ; Loop for each vect except 0
170+
sub tmp, 1
171+
jae next_vect ; Loop for each vect except 0
172172

173173
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
174174
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
@@ -192,19 +192,19 @@ next_vect:
192192

193193
loop32:
194194
mov ptr, [arg2+vec*8] ;Fetch last source pointer
195-
mov tmp, vec ;Set tmp to point back to last vector
195+
lea tmp, [vec-1] ;Set tmp to point back to last vector
196196
XLDR xs1, [ptr+pos] ;Preload last vector (source)
197197
vpxor xp1, xp1, xp1 ;p = 0
198198
vpxor xq1, xq1, xq1 ;q = 0
199199

200200
next_vect32:
201-
sub tmp, 1 ;Inner loop for each source vector
202201
mov ptr, [arg2+tmp*8] ; get pointer to next vect
203202
vpxor xq1, xq1, xs1 ; q1 ^= s1
204203
vgf2p8affineqb xq1, xq1, gfmatrix, 0x00
205204
vpxor xp1, xp1, xs1 ; p ^= s
206205
XLDR xs1, [ptr+pos] ; Get next vector (source data)
207-
jg next_vect32 ; Loop for each vect except 0
206+
sub tmp, 1
207+
jae next_vect32 ; Loop for each vect except 0
208208

209209
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
210210
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
@@ -218,7 +218,7 @@ next_vect32:
218218

219219

220220
return_pass:
221-
mov return, 0
221+
xor DWORD(return), DWORD(return)
222222
FUNC_RESTORE
223223
ret
224224

raid/pq_gen_avx512.asm

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ func(pq_gen_avx512)
130130
je return_pass
131131
test len, (32-1) ;Check alignment of length
132132
jnz return_fail
133-
mov pos, 0
133+
xor DWORD(pos), DWORD(pos)
134134
mov tmp, 0x1d
135135
vpbroadcastb xpoly, tmp
136136
vpxorq xzero, xzero, xzero
@@ -142,7 +142,7 @@ len_aligned_32bytes:
142142

143143
loop128:
144144
mov ptr, [arg2+vec*8] ;Fetch last source pointer
145-
mov tmp, vec ;Set tmp to point back to last vector
145+
lea tmp, [vec-1] ;Set tmp to point back to last vector
146146
XLDR xs1, [ptr+pos] ;Preload last vector (source)
147147
XLDR xs2, [ptr+pos+64] ;Preload last vector (source)
148148
vpxorq xp1, xp1, xp1 ;p1 = 0
@@ -151,7 +151,6 @@ loop128:
151151
vpxorq xq2, xq2, xq2 ;q2 = 0
152152

153153
next_vect:
154-
sub tmp, 1 ;Inner loop for each source vector
155154
mov ptr, [arg2+tmp*8] ; get pointer to next vect
156155
vpxorq xq1, xq1, xs1 ; q1 ^= s1
157156
vpxorq xq2, xq2, xs2 ; q2 ^= s2
@@ -167,7 +166,8 @@ next_vect:
167166
vpaddb xq2, xq2, xq2 ; q2 = q2<<1
168167
vpxorq xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
169168
vpxorq xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
170-
jg next_vect ; Loop for each vect except 0
169+
sub tmp, 1
170+
jae next_vect ; Loop for each vect except 0
171171

172172
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
173173
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
@@ -191,21 +191,21 @@ next_vect:
191191

192192
loop32:
193193
mov ptr, [arg2+vec*8] ;Fetch last source pointer
194-
mov tmp, vec ;Set tmp to point back to last vector
194+
lea tmp, [vec-1] ;Set tmp to point back to last vector
195195
XLDR xs1y, [ptr+pos] ;Preload last vector (source)
196196
vpxorq xp1y, xp1y, xp1y ;p = 0
197197
vpxorq xq1y, xq1y, xq1y ;q = 0
198198

199199
next_vect32:
200-
sub tmp, 1 ;Inner loop for each source vector
201200
mov ptr, [arg2+tmp*8] ; get pointer to next vect
202201
vpxorq xq1y, xq1y, xs1y ; q1 ^= s1
203202
vpblendvb xtmp1y, xzeroy, xpolyy, xq1y ; xtmp1 = poly or 0x00
204203
vpxorq xp1y, xp1y, xs1y ; p ^= s
205204
vpaddb xq1y, xq1y, xq1y ; q = q<<1
206205
vpxorq xq1y, xq1y, xtmp1y ; q = q<<1 ^ poly_masked
207206
XLDR xs1y, [ptr+pos] ; Get next vector (source data)
208-
jg next_vect32 ; Loop for each vect except 0
207+
sub tmp, 1
208+
jae next_vect32 ; Loop for each vect except 0
209209

210210
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
211211
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
@@ -219,7 +219,7 @@ next_vect32:
219219

220220

221221
return_pass:
222-
mov return, 0
222+
xor DWORD(return), DWORD(return)
223223
FUNC_RESTORE
224224
ret
225225

0 commit comments

Comments
 (0)