Skip to content

Commit 81ee1cd

Browse files
pablodelaramdcornu
authored andcommitted
crc: prefetch data with prefetcht0 for VPCLMUL implementations
Signed-off-by: Pablo de Lara <[email protected]>
1 parent 4613c5a commit 81ee1cd

6 files changed

+69
-3
lines changed

crc/crc16_t10dif_by16_10.asm

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@
5151
%define FUNCTION_NAME crc16_t10dif_by16_10
5252
%endif
5353

54+
%ifndef fetch_dist
55+
%define fetch_dist 1536
56+
%endif
57+
58+
%ifndef PREFETCH
59+
%define PREFETCH prefetcht0
60+
%endif
61+
5462
[bits 64]
5563
default rel
5664

@@ -139,24 +147,28 @@ FUNCTION_NAME:
139147

140148
.fold_256_B_loop:
141149
add arg2, 256
150+
PREFETCH [arg2+fetch_dist+0]
142151
vmovdqu8 zmm3, [arg2+16*0]
143152
vpshufb zmm3, zmm3, zmm18
144153
vpclmulqdq zmm1, zmm0, zmm16, 0x00
145154
vpclmulqdq zmm0, zmm0, zmm16, 0x11
146155
vpternlogq zmm0, zmm1, zmm3, 0x96
147156

157+
PREFETCH [arg2+fetch_dist+64]
148158
vmovdqu8 zmm9, [arg2+16*4]
149159
vpshufb zmm9, zmm9, zmm18
150160
vpclmulqdq zmm5, zmm4, zmm16, 0x00
151161
vpclmulqdq zmm4, zmm4, zmm16, 0x11
152162
vpternlogq zmm4, zmm5, zmm9, 0x96
153163

164+
PREFETCH [arg2+fetch_dist+64*2]
154165
vmovdqu8 zmm11, [arg2+16*8]
155166
vpshufb zmm11, zmm11, zmm18
156167
vpclmulqdq zmm12, zmm7, zmm16, 0x00
157168
vpclmulqdq zmm7, zmm7, zmm16, 0x11
158169
vpternlogq zmm7, zmm12, zmm11, 0x96
159170

171+
PREFETCH [arg2+fetch_dist+64*3]
160172
vmovdqu8 zmm17, [arg2+16*12]
161173
vpshufb zmm17, zmm17, zmm18
162174
vpclmulqdq zmm14, zmm8, zmm16, 0x00

crc/crc32_gzip_refl_by16_10.asm

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,13 @@
5959
%define FUNCTION_NAME crc32_gzip_refl_by16_10
6060
%endif
6161

62-
%define fetch_dist 1024
62+
%ifndef fetch_dist
63+
%define fetch_dist 1536
64+
%endif
65+
66+
%ifndef PREFETCH
67+
%define PREFETCH prefetcht0
68+
%endif
6369

6470
[bits 64]
6571
default rel
@@ -130,18 +136,22 @@ FUNCTION_NAME:
130136
align 16
131137
.fold_256_B_loop:
132138
add arg2, 256
139+
PREFETCH [arg2+fetch_dist+0]
133140
vpclmulqdq zmm1, zmm0, zmm16, 0x10
134141
vpclmulqdq zmm0, zmm0, zmm16, 0x01
135142
vpternlogq zmm0, zmm1, [arg2+16*0], 0x96
136143

144+
PREFETCH [arg2+fetch_dist+64]
137145
vpclmulqdq zmm2, zmm4, zmm16, 0x10
138146
vpclmulqdq zmm4, zmm4, zmm16, 0x01
139147
vpternlogq zmm4, zmm2, [arg2+16*4], 0x96
140148

149+
PREFETCH [arg2+fetch_dist+64*2]
141150
vpclmulqdq zmm3, zmm7, zmm16, 0x10
142151
vpclmulqdq zmm7, zmm7, zmm16, 0x01
143152
vpternlogq zmm7, zmm3, [arg2+16*8], 0x96
144153

154+
PREFETCH [arg2+fetch_dist+64*3]
145155
vpclmulqdq zmm5, zmm8, zmm16, 0x10
146156
vpclmulqdq zmm8, zmm8, zmm16, 0x01
147157
vpternlogq zmm8, zmm5, [arg2+16*12], 0x96

crc/crc32_ieee_by16_10.asm

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@
5151
%define FUNCTION_NAME crc32_ieee_by16_10
5252
%endif
5353

54+
%ifndef fetch_dist
55+
%define fetch_dist 1536
56+
%endif
57+
58+
%ifndef PREFETCH
59+
%define PREFETCH prefetcht0
60+
%endif
61+
5462
[bits 64]
5563
default rel
5664

@@ -129,24 +137,28 @@ align 16
129137
.fold_256_B_loop:
130138
add arg2, 256
131139
vmovdqu8 zmm3, [arg2+16*0]
140+
PREFETCH [arg2+fetch_dist+0]
132141
vpshufb zmm3, zmm3, zmm18
133142
vpclmulqdq zmm1, zmm0, zmm16, 0x00
134143
vpclmulqdq zmm0, zmm0, zmm16, 0x11
135144
vpternlogq zmm0, zmm1, zmm3, 0x96
136145

137146
vmovdqu8 zmm9, [arg2+16*4]
147+
PREFETCH [arg2+fetch_dist+64]
138148
vpshufb zmm9, zmm9, zmm18
139149
vpclmulqdq zmm5, zmm4, zmm16, 0x00
140150
vpclmulqdq zmm4, zmm4, zmm16, 0x11
141151
vpternlogq zmm4, zmm5, zmm9, 0x96
142152

143153
vmovdqu8 zmm11, [arg2+16*8]
154+
PREFETCH [arg2+fetch_dist+64*2]
144155
vpshufb zmm11, zmm11, zmm18
145156
vpclmulqdq zmm12, zmm7, zmm16, 0x00
146157
vpclmulqdq zmm7, zmm7, zmm16, 0x11
147158
vpternlogq zmm7, zmm12, zmm11, 0x96
148159

149160
vmovdqu8 zmm17, [arg2+16*12]
161+
PREFETCH [arg2+fetch_dist+64*3]
150162
vpshufb zmm17, zmm17, zmm18
151163
vpclmulqdq zmm14, zmm8, zmm16, 0x00
152164
vpclmulqdq zmm8, zmm8, zmm16, 0x11

crc/crc32_iscsi_by16_10.asm

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@
5151
%define FUNCTION_NAME crc32_iscsi_by16_10
5252
%endif
5353

54+
%ifndef fetch_dist
55+
%define fetch_dist 1536
56+
%endif
57+
58+
%ifndef PREFETCH
59+
%define PREFETCH prefetcht0
60+
%endif
61+
5462
[bits 64]
5563
default rel
5664

@@ -117,18 +125,22 @@ FUNCTION_NAME:
117125
align 16
118126
.fold_256_B_loop:
119127
add arg2, 256
128+
PREFETCH [arg2+fetch_dist+0]
120129
vpclmulqdq zmm1, zmm0, zmm16, 0x10
121130
vpclmulqdq zmm0, zmm0, zmm16, 0x01
122131
vpternlogq zmm0, zmm1, [arg2+16*0], 0x96
123132

133+
PREFETCH [arg2+fetch_dist+64]
124134
vpclmulqdq zmm2, zmm4, zmm16, 0x10
125135
vpclmulqdq zmm4, zmm4, zmm16, 0x01
126136
vpternlogq zmm4, zmm2, [arg2+16*4], 0x96
127137

138+
PREFETCH [arg2+fetch_dist+64*2]
128139
vpclmulqdq zmm3, zmm7, zmm16, 0x10
129140
vpclmulqdq zmm7, zmm7, zmm16, 0x01
130141
vpternlogq zmm7, zmm3, [arg2+16*8], 0x96
131142

143+
PREFETCH [arg2+fetch_dist+64*3]
132144
vpclmulqdq zmm5, zmm8, zmm16, 0x10
133145
vpclmulqdq zmm8, zmm8, zmm16, 0x01
134146
vpternlogq zmm8, zmm5, [arg2+16*12], 0x96

crc/crc64_iso_norm_by16_10.asm

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,13 @@
4141
%define FUNCTION_NAME crc64_iso_norm_by16_10
4242
%endif
4343

44-
%define fetch_dist 1024
44+
%ifndef fetch_dist
45+
%define fetch_dist 1536
46+
%endif
47+
48+
%ifndef PREFETCH
49+
%define PREFETCH prefetcht0
50+
%endif
4551

4652
[bits 64]
4753
default rel
@@ -111,24 +117,28 @@ FUNCTION_NAME:
111117

112118
_fold_256_B_loop:
113119
add arg2, 256
120+
PREFETCH [arg2+fetch_dist+0]
114121
vmovdqu8 zmm3, [arg2+16*0]
115122
vpshufb zmm3, zmm3, zmm18
116123
vpclmulqdq zmm1, zmm0, zmm16, 0x00
117124
vpclmulqdq zmm0, zmm0, zmm16, 0x11
118125
vpternlogq zmm0, zmm1, zmm3, 0x96
119126

127+
PREFETCH [arg2+fetch_dist+64]
120128
vmovdqu8 zmm9, [arg2+16*4]
121129
vpshufb zmm9, zmm9, zmm18
122130
vpclmulqdq zmm5, zmm4, zmm16, 0x00
123131
vpclmulqdq zmm4, zmm4, zmm16, 0x11
124132
vpternlogq zmm4, zmm5, zmm9, 0x96
125133

134+
PREFETCH [arg2+fetch_dist+64*2]
126135
vmovdqu8 zmm11, [arg2+16*8]
127136
vpshufb zmm11, zmm11, zmm18
128137
vpclmulqdq zmm12, zmm7, zmm16, 0x00
129138
vpclmulqdq zmm7, zmm7, zmm16, 0x11
130139
vpternlogq zmm7, zmm12, zmm11, 0x96
131140

141+
PREFETCH [arg2+fetch_dist+64*3]
132142
vmovdqu8 zmm17, [arg2+16*12]
133143
vpshufb zmm17, zmm17, zmm18
134144
vpclmulqdq zmm14, zmm8, zmm16, 0x00

crc/crc64_iso_refl_by16_10.asm

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,13 @@
4141
%define FUNCTION_NAME crc64_iso_refl_by16_10
4242
%endif
4343

44-
%define fetch_dist 1024
44+
%ifndef fetch_dist
45+
%define fetch_dist 1536
46+
%endif
47+
48+
%ifndef PREFETCH
49+
%define PREFETCH prefetcht0
50+
%endif
4551

4652
[bits 64]
4753
default rel
@@ -112,18 +118,22 @@ FUNCTION_NAME:
112118

113119
_fold_256_B_loop:
114120
add arg2, 256
121+
PREFETCH [arg2+fetch_dist+0]
115122
vpclmulqdq zmm1, zmm0, zmm16, 0x10
116123
vpclmulqdq zmm0, zmm0, zmm16, 0x01
117124
vpternlogq zmm0, zmm1, [arg2+16*0], 0x96
118125

126+
PREFETCH [arg2+fetch_dist+64]
119127
vpclmulqdq zmm2, zmm4, zmm16, 0x10
120128
vpclmulqdq zmm4, zmm4, zmm16, 0x01
121129
vpternlogq zmm4, zmm2, [arg2+16*4], 0x96
122130

131+
PREFETCH [arg2+fetch_dist+64*2]
123132
vpclmulqdq zmm3, zmm7, zmm16, 0x10
124133
vpclmulqdq zmm7, zmm7, zmm16, 0x01
125134
vpternlogq zmm7, zmm3, [arg2+16*8], 0x96
126135

136+
PREFETCH [arg2+fetch_dist+64*3]
127137
vpclmulqdq zmm5, zmm8, zmm16, 0x10
128138
vpclmulqdq zmm8, zmm8, zmm16, 0x01
129139
vpternlogq zmm8, zmm5, [arg2+16*12], 0x96

0 commit comments

Comments
 (0)