84
84
85
85
#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
86
86
movzbl src ## bh, RID1d; \
87
+ leaq s1(%rip ), RID2; \
88
+ movl (RID2,RID1,4 ), dst ## d; \
87
89
movzbl src ## bl, RID2d; \
90
+ leaq s2(%rip ), RID1; \
91
+ op1 (RID1,RID2,4 ), dst ## d; \
88
92
shrq $16 , src; \
89
- movl s1(, RID1, 4 ), dst ## d; \
90
- op1 s2(, RID2, 4 ), dst ## d; \
91
93
movzbl src ## bh, RID1d; \
94
+ leaq s3(%rip ), RID2; \
95
+ op2 (RID2,RID1,4 ), dst ## d; \
92
96
movzbl src ## bl, RID2d; \
93
97
interleave_op(il_reg); \
94
- op2 s3(, RID1, 4 ), dst ## d; \
95
- op3 s4(, RID2, 4 ), dst ## d;
98
+ leaq s4( %rip ), RID1; \
99
+ op3 (RID1, RID2,4 ), dst ## d;
96
100
97
101
#define dummy(d) /* do nothing */
98
102
175
179
qop(RD, RC, 1 );
176
180
177
181
#define shuffle(mask) \
178
- vpshufb mask, RKR, RKR;
182
+ vpshufb mask( %rip ) , RKR, RKR;
179
183
180
184
#define preload_rkr(n, do_mask, mask) \
181
- vbroadcastss .L16_mask, RKR; \
185
+ vbroadcastss .L16_mask( %rip ), RKR; \
182
186
/* add 16-bit rotation to key rotations (mod 32) */ \
183
187
vpxor (kr+n*16 )(CTX), RKR, RKR; \
184
188
do_mask(mask);
@@ -258,9 +262,9 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
258
262
259
263
movq %rdi , CTX;
260
264
261
- vmovdqa .Lbswap_mask, RKM;
262
- vmovd .Lfirst_mask, R1ST;
263
- vmovd .L32_mask, R32;
265
+ vmovdqa .Lbswap_mask( %rip ) , RKM;
266
+ vmovd .Lfirst_mask( %rip ) , R1ST;
267
+ vmovd .L32_mask( %rip ) , R32;
264
268
265
269
inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
266
270
inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -284,7 +288,7 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
284
288
popq %rbx ;
285
289
popq %r15 ;
286
290
287
- vmovdqa .Lbswap_mask, RKM;
291
+ vmovdqa .Lbswap_mask( %rip ) , RKM;
288
292
289
293
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
290
294
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -306,9 +310,9 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
306
310
307
311
movq %rdi , CTX;
308
312
309
- vmovdqa .Lbswap_mask, RKM;
310
- vmovd .Lfirst_mask, R1ST;
311
- vmovd .L32_mask, R32;
313
+ vmovdqa .Lbswap_mask( %rip ) , RKM;
314
+ vmovd .Lfirst_mask( %rip ) , R1ST;
315
+ vmovd .L32_mask( %rip ) , R32;
312
316
313
317
inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
314
318
inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -332,7 +336,7 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
332
336
popq %rbx ;
333
337
popq %r15 ;
334
338
335
- vmovdqa .Lbswap_mask, RKM;
339
+ vmovdqa .Lbswap_mask( %rip ) , RKM;
336
340
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
337
341
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
338
342
0 commit comments