Skip to content

Commit 24ff1e9

Browse files
ardbiesheuvelherbertx
authored andcommitted
crypto: x86/camellia - Use RIP-relative addressing
Prefer RIP-relative addressing where possible, which removes the need for boot time relocation fixups. Co-developed-by: Thomas Garnier <[email protected]> Signed-off-by: Thomas Garnier <[email protected]> Signed-off-by: Ard Biesheuvel <[email protected]> Signed-off-by: Herbert Xu <[email protected]>
1 parent 52fc482 commit 24ff1e9

File tree

3 files changed

+34
-32
lines changed

3 files changed

+34
-32
lines changed

arch/x86/crypto/camellia-aesni-avx-asm_64.S

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@
5252
/* \
5353
* S-function with AES subbytes \
5454
*/ \
55-
vmovdqa .Linv_shift_row, t4; \
56-
vbroadcastss .L0f0f0f0f, t7; \
57-
vmovdqa .Lpre_tf_lo_s1, t0; \
58-
vmovdqa .Lpre_tf_hi_s1, t1; \
55+
vmovdqa .Linv_shift_row(%rip), t4; \
56+
vbroadcastss .L0f0f0f0f(%rip), t7; \
57+
vmovdqa .Lpre_tf_lo_s1(%rip), t0; \
58+
vmovdqa .Lpre_tf_hi_s1(%rip), t1; \
5959
\
6060
/* AES inverse shift rows */ \
6161
vpshufb t4, x0, x0; \
@@ -68,8 +68,8 @@
6868
vpshufb t4, x6, x6; \
6969
\
7070
/* prefilter sboxes 1, 2 and 3 */ \
71-
vmovdqa .Lpre_tf_lo_s4, t2; \
72-
vmovdqa .Lpre_tf_hi_s4, t3; \
71+
vmovdqa .Lpre_tf_lo_s4(%rip), t2; \
72+
vmovdqa .Lpre_tf_hi_s4(%rip), t3; \
7373
filter_8bit(x0, t0, t1, t7, t6); \
7474
filter_8bit(x7, t0, t1, t7, t6); \
7575
filter_8bit(x1, t0, t1, t7, t6); \
@@ -83,8 +83,8 @@
8383
filter_8bit(x6, t2, t3, t7, t6); \
8484
\
8585
/* AES subbytes + AES shift rows */ \
86-
vmovdqa .Lpost_tf_lo_s1, t0; \
87-
vmovdqa .Lpost_tf_hi_s1, t1; \
86+
vmovdqa .Lpost_tf_lo_s1(%rip), t0; \
87+
vmovdqa .Lpost_tf_hi_s1(%rip), t1; \
8888
vaesenclast t4, x0, x0; \
8989
vaesenclast t4, x7, x7; \
9090
vaesenclast t4, x1, x1; \
@@ -95,16 +95,16 @@
9595
vaesenclast t4, x6, x6; \
9696
\
9797
/* postfilter sboxes 1 and 4 */ \
98-
vmovdqa .Lpost_tf_lo_s3, t2; \
99-
vmovdqa .Lpost_tf_hi_s3, t3; \
98+
vmovdqa .Lpost_tf_lo_s3(%rip), t2; \
99+
vmovdqa .Lpost_tf_hi_s3(%rip), t3; \
100100
filter_8bit(x0, t0, t1, t7, t6); \
101101
filter_8bit(x7, t0, t1, t7, t6); \
102102
filter_8bit(x3, t0, t1, t7, t6); \
103103
filter_8bit(x6, t0, t1, t7, t6); \
104104
\
105105
/* postfilter sbox 3 */ \
106-
vmovdqa .Lpost_tf_lo_s2, t4; \
107-
vmovdqa .Lpost_tf_hi_s2, t5; \
106+
vmovdqa .Lpost_tf_lo_s2(%rip), t4; \
107+
vmovdqa .Lpost_tf_hi_s2(%rip), t5; \
108108
filter_8bit(x2, t2, t3, t7, t6); \
109109
filter_8bit(x5, t2, t3, t7, t6); \
110110
\
@@ -443,7 +443,7 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
443443
transpose_4x4(c0, c1, c2, c3, a0, a1); \
444444
transpose_4x4(d0, d1, d2, d3, a0, a1); \
445445
\
446-
vmovdqu .Lshufb_16x16b, a0; \
446+
vmovdqu .Lshufb_16x16b(%rip), a0; \
447447
vmovdqu st1, a1; \
448448
vpshufb a0, a2, a2; \
449449
vpshufb a0, a3, a3; \
@@ -482,7 +482,7 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
482482
#define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
483483
y6, y7, rio, key) \
484484
vmovq key, x0; \
485-
vpshufb .Lpack_bswap, x0, x0; \
485+
vpshufb .Lpack_bswap(%rip), x0, x0; \
486486
\
487487
vpxor 0 * 16(rio), x0, y7; \
488488
vpxor 1 * 16(rio), x0, y6; \
@@ -533,7 +533,7 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
533533
vmovdqu x0, stack_tmp0; \
534534
\
535535
vmovq key, x0; \
536-
vpshufb .Lpack_bswap, x0, x0; \
536+
vpshufb .Lpack_bswap(%rip), x0, x0; \
537537
\
538538
vpxor x0, y7, y7; \
539539
vpxor x0, y6, y6; \

arch/x86/crypto/camellia-aesni-avx2-asm_64.S

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,12 @@
6464
/* \
6565
* S-function with AES subbytes \
6666
*/ \
67-
vbroadcasti128 .Linv_shift_row, t4; \
68-
vpbroadcastd .L0f0f0f0f, t7; \
69-
vbroadcasti128 .Lpre_tf_lo_s1, t5; \
70-
vbroadcasti128 .Lpre_tf_hi_s1, t6; \
71-
vbroadcasti128 .Lpre_tf_lo_s4, t2; \
72-
vbroadcasti128 .Lpre_tf_hi_s4, t3; \
67+
vbroadcasti128 .Linv_shift_row(%rip), t4; \
68+
vpbroadcastd .L0f0f0f0f(%rip), t7; \
69+
vbroadcasti128 .Lpre_tf_lo_s1(%rip), t5; \
70+
vbroadcasti128 .Lpre_tf_hi_s1(%rip), t6; \
71+
vbroadcasti128 .Lpre_tf_lo_s4(%rip), t2; \
72+
vbroadcasti128 .Lpre_tf_hi_s4(%rip), t3; \
7373
\
7474
/* AES inverse shift rows */ \
7575
vpshufb t4, x0, x0; \
@@ -115,8 +115,8 @@
115115
vinserti128 $1, t2##_x, x6, x6; \
116116
vextracti128 $1, x1, t3##_x; \
117117
vextracti128 $1, x4, t2##_x; \
118-
vbroadcasti128 .Lpost_tf_lo_s1, t0; \
119-
vbroadcasti128 .Lpost_tf_hi_s1, t1; \
118+
vbroadcasti128 .Lpost_tf_lo_s1(%rip), t0; \
119+
vbroadcasti128 .Lpost_tf_hi_s1(%rip), t1; \
120120
vaesenclast t4##_x, x2##_x, x2##_x; \
121121
vaesenclast t4##_x, t6##_x, t6##_x; \
122122
vinserti128 $1, t6##_x, x2, x2; \
@@ -131,16 +131,16 @@
131131
vinserti128 $1, t2##_x, x4, x4; \
132132
\
133133
/* postfilter sboxes 1 and 4 */ \
134-
vbroadcasti128 .Lpost_tf_lo_s3, t2; \
135-
vbroadcasti128 .Lpost_tf_hi_s3, t3; \
134+
vbroadcasti128 .Lpost_tf_lo_s3(%rip), t2; \
135+
vbroadcasti128 .Lpost_tf_hi_s3(%rip), t3; \
136136
filter_8bit(x0, t0, t1, t7, t6); \
137137
filter_8bit(x7, t0, t1, t7, t6); \
138138
filter_8bit(x3, t0, t1, t7, t6); \
139139
filter_8bit(x6, t0, t1, t7, t6); \
140140
\
141141
/* postfilter sbox 3 */ \
142-
vbroadcasti128 .Lpost_tf_lo_s2, t4; \
143-
vbroadcasti128 .Lpost_tf_hi_s2, t5; \
142+
vbroadcasti128 .Lpost_tf_lo_s2(%rip), t4; \
143+
vbroadcasti128 .Lpost_tf_hi_s2(%rip), t5; \
144144
filter_8bit(x2, t2, t3, t7, t6); \
145145
filter_8bit(x5, t2, t3, t7, t6); \
146146
\
@@ -475,7 +475,7 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
475475
transpose_4x4(c0, c1, c2, c3, a0, a1); \
476476
transpose_4x4(d0, d1, d2, d3, a0, a1); \
477477
\
478-
vbroadcasti128 .Lshufb_16x16b, a0; \
478+
vbroadcasti128 .Lshufb_16x16b(%rip), a0; \
479479
vmovdqu st1, a1; \
480480
vpshufb a0, a2, a2; \
481481
vpshufb a0, a3, a3; \
@@ -514,7 +514,7 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
514514
#define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
515515
y6, y7, rio, key) \
516516
vpbroadcastq key, x0; \
517-
vpshufb .Lpack_bswap, x0, x0; \
517+
vpshufb .Lpack_bswap(%rip), x0, x0; \
518518
\
519519
vpxor 0 * 32(rio), x0, y7; \
520520
vpxor 1 * 32(rio), x0, y6; \
@@ -565,7 +565,7 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
565565
vmovdqu x0, stack_tmp0; \
566566
\
567567
vpbroadcastq key, x0; \
568-
vpshufb .Lpack_bswap, x0, x0; \
568+
vpshufb .Lpack_bswap(%rip), x0, x0; \
569569
\
570570
vpxor x0, y7, y7; \
571571
vpxor x0, y6, y6; \

arch/x86/crypto/camellia-x86_64-asm_64.S

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,13 @@
7777
#define RXORbl %r9b
7878

7979
#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
80+
leaq T0(%rip), tmp1; \
8081
movzbl ab ## bl, tmp2 ## d; \
82+
xorq (tmp1, tmp2, 8), dst; \
83+
leaq T1(%rip), tmp2; \
8184
movzbl ab ## bh, tmp1 ## d; \
8285
rorq $16, ab; \
83-
xorq T0(, tmp2, 8), dst; \
84-
xorq T1(, tmp1, 8), dst;
86+
xorq (tmp2, tmp1, 8), dst;
8587

8688
/**********************************************************************
8789
1-way camellia

0 commit comments

Comments
 (0)