Skip to content

Commit 46b5272

Browse files
pablodelaramdcornu
authored andcommitted
crc: prefetch data with prefetcht1 for non-VPCLMUL implementations
Signed-off-by: Pablo de Lara <[email protected]>
1 parent 81ee1cd commit 46b5272

14 files changed

+137
-55
lines changed

crc/crc16_t10dif_01.asm

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,13 @@
4444

4545
%include "reg_sizes.asm"
4646

47-
%define fetch_dist 1024
47+
%ifndef fetch_dist
48+
%define fetch_dist 4096
49+
%endif
50+
51+
%ifndef PREFETCH
52+
%define PREFETCH prefetcht1
53+
%endif
4854

4955
[bits 64]
5056
default rel
@@ -148,7 +154,7 @@ _fold_128_B_loop:
148154
; update the buffer pointer
149155
add arg2, 128 ; buf += 128;
150156

151-
prefetchnta [arg2+fetch_dist+0]
157+
PREFETCH [arg2+fetch_dist+0]
152158
movdqu xmm9, [arg2+16*0]
153159
movdqu xmm12, [arg2+16*1]
154160
pshufb xmm9, xmm11
@@ -164,7 +170,7 @@ _fold_128_B_loop:
164170
pxor xmm1, xmm12
165171
xorps xmm1, xmm13
166172

167-
prefetchnta [arg2+fetch_dist+32]
173+
PREFETCH [arg2+fetch_dist+32]
168174
movdqu xmm9, [arg2+16*2]
169175
movdqu xmm12, [arg2+16*3]
170176
pshufb xmm9, xmm11
@@ -180,7 +186,7 @@ _fold_128_B_loop:
180186
pxor xmm3, xmm12
181187
xorps xmm3, xmm13
182188

183-
prefetchnta [arg2+fetch_dist+64]
189+
PREFETCH [arg2+fetch_dist+64]
184190
movdqu xmm9, [arg2+16*4]
185191
movdqu xmm12, [arg2+16*5]
186192
pshufb xmm9, xmm11
@@ -196,7 +202,7 @@ _fold_128_B_loop:
196202
pxor xmm5, xmm12
197203
xorps xmm5, xmm13
198204

199-
prefetchnta [arg2+fetch_dist+96]
205+
PREFETCH [arg2+fetch_dist+96]
200206
movdqu xmm9, [arg2+16*6]
201207
movdqu xmm12, [arg2+16*7]
202208
pshufb xmm9, xmm11

crc/crc16_t10dif_02.asm

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,14 @@
4444

4545
%include "reg_sizes.asm"
4646

47-
%define fetch_dist 1024
47+
%ifndef fetch_dist
48+
%define fetch_dist 4096
49+
%endif
50+
51+
%ifndef PREFETCH
52+
%define PREFETCH prefetcht1
53+
%endif
54+
4855

4956
[bits 64]
5057
default rel
@@ -148,7 +155,7 @@ _fold_128_B_loop:
148155
; update the buffer pointer
149156
add arg2, 128 ; buf += 128;
150157

151-
prefetchnta [arg2+fetch_dist+0]
158+
PREFETCH [arg2+fetch_dist+0]
152159
vmovdqu xmm9, [arg2+16*0]
153160
vmovdqu xmm12, [arg2+16*1]
154161
vpshufb xmm9, xmm11
@@ -164,7 +171,7 @@ _fold_128_B_loop:
164171
vpxor xmm1, xmm12
165172
vxorps xmm1, xmm13
166173

167-
prefetchnta [arg2+fetch_dist+32]
174+
PREFETCH [arg2+fetch_dist+32]
168175
vmovdqu xmm9, [arg2+16*2]
169176
vmovdqu xmm12, [arg2+16*3]
170177
vpshufb xmm9, xmm11
@@ -180,7 +187,7 @@ _fold_128_B_loop:
180187
vpxor xmm3, xmm12
181188
vxorps xmm3, xmm13
182189

183-
prefetchnta [arg2+fetch_dist+64]
190+
PREFETCH [arg2+fetch_dist+64]
184191
vmovdqu xmm9, [arg2+16*4]
185192
vmovdqu xmm12, [arg2+16*5]
186193
vpshufb xmm9, xmm11
@@ -196,7 +203,7 @@ _fold_128_B_loop:
196203
vpxor xmm5, xmm12
197204
vxorps xmm5, xmm13
198205

199-
prefetchnta [arg2+fetch_dist+96]
206+
PREFETCH [arg2+fetch_dist+96]
200207
vmovdqu xmm9, [arg2+16*6]
201208
vmovdqu xmm12, [arg2+16*7]
202209
vpshufb xmm9, xmm11

crc/crc16_t10dif_by4.asm

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,13 @@
4545

4646
%include "reg_sizes.asm"
4747

48-
%define fetch_dist 1024
48+
%ifndef fetch_dist
49+
%define fetch_dist 4096
50+
%endif
51+
52+
%ifndef PREFETCH
53+
%define PREFETCH prefetcht1
54+
%endif
4955

5056
[bits 64]
5157
default rel
@@ -132,7 +138,7 @@ _fold_64_B_loop:
132138
; update the buffer pointer
133139
add arg2, 64 ; buf += 64;
134140

135-
prefetchnta [arg2+fetch_dist+0]
141+
PREFETCH [arg2+fetch_dist+0]
136142
movdqu xmm4, xmm0
137143
movdqu xmm5, xmm1
138144

@@ -145,7 +151,7 @@ _fold_64_B_loop:
145151
pxor xmm0, xmm4
146152
pxor xmm1, xmm5
147153

148-
prefetchnta [arg2+fetch_dist+32]
154+
PREFETCH [arg2+fetch_dist+32]
149155
movdqu xmm4, xmm2
150156
movdqu xmm5, xmm3
151157

crc/crc16_t10dif_copy_by4.asm

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,13 @@
4646

4747
%include "reg_sizes.asm"
4848

49-
%define fetch_dist 1024
49+
%ifndef fetch_dist
50+
%define fetch_dist 4096
51+
%endif
52+
53+
%ifndef PREFETCH
54+
%define PREFETCH prefetcht1
55+
%endif
5056

5157
[bits 64]
5258
default rel
@@ -142,7 +148,7 @@ _fold_64_B_loop:
142148
add arg3, 64 ; buf += 64;
143149
add arg2, 64
144150

145-
prefetchnta [arg3+fetch_dist+0]
151+
PREFETCH [arg3+fetch_dist+0]
146152
movdqu xmm4, xmm0
147153
movdqu xmm5, xmm1
148154

@@ -155,7 +161,7 @@ _fold_64_B_loop:
155161
pxor xmm0, xmm4
156162
pxor xmm1, xmm5
157163

158-
prefetchnta [arg3+fetch_dist+32]
164+
PREFETCH [arg3+fetch_dist+32]
159165
movdqu xmm4, xmm2
160166
movdqu xmm5, xmm3
161167

crc/crc16_t10dif_copy_by4_02.asm

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,13 @@
4646

4747
%include "reg_sizes.asm"
4848

49-
%define fetch_dist 1024
49+
%ifndef fetch_dist
50+
%define fetch_dist 4096
51+
%endif
52+
53+
%ifndef PREFETCH
54+
%define PREFETCH prefetcht1
55+
%endif
5056

5157
[bits 64]
5258
default rel
@@ -142,7 +148,7 @@ _fold_64_B_loop:
142148
add arg3, 64 ; buf += 64;
143149
add arg2, 64
144150

145-
prefetchnta [arg3+fetch_dist+0]
151+
PREFETCH [arg3+fetch_dist+0]
146152
vmovdqu xmm4, xmm0
147153
vmovdqu xmm5, xmm1
148154

@@ -155,7 +161,7 @@ _fold_64_B_loop:
155161
vpxor xmm0, xmm4
156162
vpxor xmm1, xmm5
157163

158-
prefetchnta [arg3+fetch_dist+32]
164+
PREFETCH [arg3+fetch_dist+32]
159165
vmovdqu xmm4, xmm2
160166
vmovdqu xmm5, xmm3
161167

crc/crc32_gzip_refl_by8.asm

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,14 @@
5555

5656
%include "reg_sizes.asm"
5757

58-
%define fetch_dist 1024
58+
%ifndef fetch_dist
59+
%define fetch_dist 4096
60+
%endif
61+
62+
%ifndef PREFETCH
63+
%define PREFETCH prefetcht1
64+
%endif
65+
5966

6067
[bits 64]
6168
default rel
@@ -145,7 +152,7 @@ _fold_128_B_loop:
145152
; update the buffer pointer
146153
add arg2, 128
147154

148-
prefetchnta [arg2+fetch_dist+0]
155+
PREFETCH [arg2+fetch_dist+0]
149156
movdqu xmm9, [arg2+16*0]
150157
movdqu xmm12, [arg2+16*1]
151158
movdqa xmm8, xmm0
@@ -159,7 +166,7 @@ _fold_128_B_loop:
159166
pxor xmm1, xmm12
160167
xorps xmm1, xmm13
161168

162-
prefetchnta [arg2+fetch_dist+32]
169+
PREFETCH [arg2+fetch_dist+32]
163170
movdqu xmm9, [arg2+16*2]
164171
movdqu xmm12, [arg2+16*3]
165172
movdqa xmm8, xmm2
@@ -173,7 +180,7 @@ _fold_128_B_loop:
173180
pxor xmm3, xmm12
174181
xorps xmm3, xmm13
175182

176-
prefetchnta [arg2+fetch_dist+64]
183+
PREFETCH [arg2+fetch_dist+64]
177184
movdqu xmm9, [arg2+16*4]
178185
movdqu xmm12, [arg2+16*5]
179186
movdqa xmm8, xmm4
@@ -187,7 +194,7 @@ _fold_128_B_loop:
187194
pxor xmm5, xmm12
188195
xorps xmm5, xmm13
189196

190-
prefetchnta [arg2+fetch_dist+96]
197+
PREFETCH [arg2+fetch_dist+96]
191198
movdqu xmm9, [arg2+16*6]
192199
movdqu xmm12, [arg2+16*7]
193200
movdqa xmm8, xmm6

crc/crc32_gzip_refl_by8_02.asm

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,13 @@
5555

5656
%include "reg_sizes.asm"
5757

58-
%define fetch_dist 1024
58+
%ifndef fetch_dist
59+
%define fetch_dist 4096
60+
%endif
61+
62+
%ifndef PREFETCH
63+
%define PREFETCH prefetcht1
64+
%endif
5965

6066
[bits 64]
6167
default rel
@@ -135,7 +141,7 @@ crc32_gzip_refl_by8_02:
135141
; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
136142
.fold_128_B_loop:
137143
add arg2, 128
138-
prefetchnta [arg2+fetch_dist+0]
144+
PREFETCH [arg2+fetch_dist+0]
139145
vmovdqu xmm9, [arg2+16*0]
140146
vmovdqu xmm12, [arg2+16*1]
141147
vpclmulqdq xmm8, xmm0, xmm10, 0x10
@@ -147,7 +153,7 @@ crc32_gzip_refl_by8_02:
147153
vpxor xmm1, xmm12
148154
vxorps xmm1, xmm13
149155

150-
prefetchnta [arg2+fetch_dist+32]
156+
PREFETCH [arg2+fetch_dist+32]
151157
vmovdqu xmm9, [arg2+16*2]
152158
vmovdqu xmm12, [arg2+16*3]
153159
vpclmulqdq xmm8, xmm2, xmm10, 0x10
@@ -159,7 +165,7 @@ crc32_gzip_refl_by8_02:
159165
vpxor xmm3, xmm12
160166
vxorps xmm3, xmm13
161167

162-
prefetchnta [arg2+fetch_dist+64]
168+
PREFETCH [arg2+fetch_dist+64]
163169
vmovdqu xmm9, [arg2+16*4]
164170
vmovdqu xmm12, [arg2+16*5]
165171
vpclmulqdq xmm8, xmm4, xmm10, 0x10
@@ -171,7 +177,7 @@ crc32_gzip_refl_by8_02:
171177
vpxor xmm5, xmm12
172178
vxorps xmm5, xmm13
173179

174-
prefetchnta [arg2+fetch_dist+96]
180+
PREFETCH [arg2+fetch_dist+96]
175181
vmovdqu xmm9, [arg2+16*6]
176182
vmovdqu xmm12, [arg2+16*7]
177183
vpclmulqdq xmm8, xmm6, xmm10, 0x10

crc/crc32_ieee_01.asm

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,14 @@
4444

4545
%include "reg_sizes.asm"
4646

47-
%define fetch_dist 1024
47+
%ifndef fetch_dist
48+
%define fetch_dist 4096
49+
%endif
50+
51+
%ifndef PREFETCH
52+
%define PREFETCH prefetcht1
53+
%endif
54+
4855
[bits 64]
4956
default rel
5057

@@ -145,7 +152,7 @@ _fold_128_B_loop:
145152
; update the buffer pointer
146153
add arg2, 128 ; buf += 128;
147154

148-
prefetchnta [arg2+fetch_dist+0]
155+
PREFETCH [arg2+fetch_dist+0]
149156
movdqu xmm9, [arg2+16*0]
150157
movdqu xmm12, [arg2+16*1]
151158
pshufb xmm9, xmm11
@@ -161,7 +168,7 @@ _fold_128_B_loop:
161168
pxor xmm1, xmm12
162169
xorps xmm1, xmm13
163170

164-
prefetchnta [arg2+fetch_dist+32]
171+
PREFETCH [arg2+fetch_dist+32]
165172
movdqu xmm9, [arg2+16*2]
166173
movdqu xmm12, [arg2+16*3]
167174
pshufb xmm9, xmm11
@@ -177,7 +184,7 @@ _fold_128_B_loop:
177184
pxor xmm3, xmm12
178185
xorps xmm3, xmm13
179186

180-
prefetchnta [arg2+fetch_dist+64]
187+
PREFETCH [arg2+fetch_dist+64]
181188
movdqu xmm9, [arg2+16*4]
182189
movdqu xmm12, [arg2+16*5]
183190
pshufb xmm9, xmm11
@@ -193,7 +200,7 @@ _fold_128_B_loop:
193200
pxor xmm5, xmm12
194201
xorps xmm5, xmm13
195202

196-
prefetchnta [arg2+fetch_dist+96]
203+
PREFETCH [arg2+fetch_dist+96]
197204
movdqu xmm9, [arg2+16*6]
198205
movdqu xmm12, [arg2+16*7]
199206
pshufb xmm9, xmm11

0 commit comments

Comments
 (0)