Skip to content

Commit 7f8ec31

Browse files
ardbiesheuvelherbertx
authored andcommitted
crypto: x86/cast6 - Use RIP-relative addressing
Prefer RIP-relative addressing where possible, which removes the need for boot time relocation fixups. Co-developed-by: Thomas Garnier <[email protected]> Signed-off-by: Thomas Garnier <[email protected]> Signed-off-by: Ard Biesheuvel <[email protected]> Signed-off-by: Herbert Xu <[email protected]>
1 parent 0dcc778 commit 7f8ec31

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

arch/x86/crypto/cast6-avx-x86_64-asm_64.S

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,19 @@
8484

8585
#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
8686
movzbl src ## bh, RID1d; \
87+
leaq s1(%rip), RID2; \
88+
movl (RID2,RID1,4), dst ## d; \
8789
movzbl src ## bl, RID2d; \
90+
leaq s2(%rip), RID1; \
91+
op1 (RID1,RID2,4), dst ## d; \
8892
shrq $16, src; \
89-
movl s1(, RID1, 4), dst ## d; \
90-
op1 s2(, RID2, 4), dst ## d; \
9193
movzbl src ## bh, RID1d; \
94+
leaq s3(%rip), RID2; \
95+
op2 (RID2,RID1,4), dst ## d; \
9296
movzbl src ## bl, RID2d; \
9397
interleave_op(il_reg); \
94-
op2 s3(, RID1, 4), dst ## d; \
95-
op3 s4(, RID2, 4), dst ## d;
98+
leaq s4(%rip), RID1; \
99+
op3 (RID1,RID2,4), dst ## d;
96100

97101
#define dummy(d) /* do nothing */
98102

@@ -175,10 +179,10 @@
175179
qop(RD, RC, 1);
176180

177181
#define shuffle(mask) \
178-
vpshufb mask, RKR, RKR;
182+
vpshufb mask(%rip), RKR, RKR;
179183

180184
#define preload_rkr(n, do_mask, mask) \
181-
vbroadcastss .L16_mask, RKR; \
185+
vbroadcastss .L16_mask(%rip), RKR; \
182186
/* add 16-bit rotation to key rotations (mod 32) */ \
183187
vpxor (kr+n*16)(CTX), RKR, RKR; \
184188
do_mask(mask);
@@ -258,9 +262,9 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
258262

259263
movq %rdi, CTX;
260264

261-
vmovdqa .Lbswap_mask, RKM;
262-
vmovd .Lfirst_mask, R1ST;
263-
vmovd .L32_mask, R32;
265+
vmovdqa .Lbswap_mask(%rip), RKM;
266+
vmovd .Lfirst_mask(%rip), R1ST;
267+
vmovd .L32_mask(%rip), R32;
264268

265269
inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
266270
inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -284,7 +288,7 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
284288
popq %rbx;
285289
popq %r15;
286290

287-
vmovdqa .Lbswap_mask, RKM;
291+
vmovdqa .Lbswap_mask(%rip), RKM;
288292

289293
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
290294
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -306,9 +310,9 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
306310

307311
movq %rdi, CTX;
308312

309-
vmovdqa .Lbswap_mask, RKM;
310-
vmovd .Lfirst_mask, R1ST;
311-
vmovd .L32_mask, R32;
313+
vmovdqa .Lbswap_mask(%rip), RKM;
314+
vmovd .Lfirst_mask(%rip), R1ST;
315+
vmovd .L32_mask(%rip), R32;
312316

313317
inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
314318
inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -332,7 +336,7 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
332336
popq %rbx;
333337
popq %r15;
334338

335-
vmovdqa .Lbswap_mask, RKM;
339+
vmovdqa .Lbswap_mask(%rip), RKM;
336340
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
337341
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
338342

0 commit comments

Comments
 (0)