1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+ ; RUN: llc < %s -mtriple=riscv32 -mattr=-zbc,-zbkc -verify-machineinstrs | FileCheck %s --check-prefix=RV32-EXPAND
3+ ; RUN: llc < %s -mtriple=riscv64 -mattr=-zbc,-zbkc -verify-machineinstrs | FileCheck %s --check-prefix=RV64-EXPAND
4+ ; RUN: llc < %s -mtriple=x86_64 -verify-machineinstrs | FileCheck %s --check-prefix=X64-EXPAND
5+
6+ ; Test CLMUL expansion when the instruction is not natively supported
7+
8+ declare i8 @llvm.clmul.i8 (i8 %a , i8 %b )
9+ declare i16 @llvm.clmul.i16 (i16 %a , i16 %b )
10+ declare i32 @llvm.clmul.i32 (i32 %a , i32 %b )
11+ declare i64 @llvm.clmul.i64 (i64 %a , i64 %b )
12+ declare i128 @llvm.clmul.i128 (i128 %a , i128 %b )
13+
14+ define i8 @clmul_expand_i8 (i8 %a , i8 %b ) nounwind {
15+ ; RV32-EXPAND-LABEL: clmul_expand_i8:
16+ ; RV32-EXPAND: # %bb.0:
17+ ; RV32-EXPAND-NEXT: andi a2, a0, 1
18+ ; RV32-EXPAND-NEXT: beqz a2, .LBB0_2
19+ ; RV32-EXPAND-NEXT: # %bb.1:
20+ ; RV32-EXPAND-NEXT: mv a2, a1
21+ ; RV32-EXPAND-NEXT: j .LBB0_3
22+ ; RV32-EXPAND-NEXT: .LBB0_2:
23+ ; RV32-EXPAND-NEXT: li a2, 0
24+ ; RV32-EXPAND-NEXT: .LBB0_3:
25+ ; RV32-EXPAND-NEXT: srli a0, a0, 1
26+ ; RV32-EXPAND-NEXT: slli a1, a1, 1
27+ ; RV32-EXPAND-NEXT: andi a3, a0, 1
28+ ; RV32-EXPAND-NEXT: beqz a3, .LBB0_5
29+ ; RV32-EXPAND-NEXT: # %bb.4:
30+ ; RV32-EXPAND-NEXT: xor a2, a2, a1
31+ ; RV32-EXPAND-NEXT: .LBB0_5:
32+ ; RV32-EXPAND-NEXT: srli a0, a0, 1
33+ ; RV32-EXPAND-NEXT: slli a1, a1, 1
34+ ; RV32-EXPAND-NEXT: andi a3, a0, 1
35+ ; RV32-EXPAND-NEXT: beqz a3, .LBB0_7
36+ ; RV32-EXPAND-NEXT: # %bb.6:
37+ ; RV32-EXPAND-NEXT: xor a2, a2, a1
38+ ; RV32-EXPAND-NEXT: .LBB0_7:
39+ ; RV32-EXPAND-NEXT: srli a0, a0, 1
40+ ; RV32-EXPAND-NEXT: slli a1, a1, 1
41+ ; RV32-EXPAND-NEXT: andi a3, a0, 1
42+ ; RV32-EXPAND-NEXT: beqz a3, .LBB0_9
43+ ; RV32-EXPAND-NEXT: # %bb.8:
44+ ; RV32-EXPAND-NEXT: xor a2, a2, a1
45+ ; RV32-EXPAND-NEXT: .LBB0_9:
46+ ; RV32-EXPAND-NEXT: srli a0, a0, 1
47+ ; RV32-EXPAND-NEXT: slli a1, a1, 1
48+ ; RV32-EXPAND-NEXT: andi a3, a0, 1
49+ ; RV32-EXPAND-NEXT: beqz a3, .LBB0_11
50+ ; RV32-EXPAND-NEXT: # %bb.10:
51+ ; RV32-EXPAND-NEXT: xor a2, a2, a1
52+ ; RV32-EXPAND-NEXT: .LBB0_11:
53+ ; RV32-EXPAND-NEXT: srli a0, a0, 1
54+ ; RV32-EXPAND-NEXT: slli a1, a1, 1
55+ ; RV32-EXPAND-NEXT: andi a3, a0, 1
56+ ; RV32-EXPAND-NEXT: beqz a3, .LBB0_13
57+ ; RV32-EXPAND-NEXT: # %bb.12:
58+ ; RV32-EXPAND-NEXT: xor a2, a2, a1
59+ ; RV32-EXPAND-NEXT: .LBB0_13:
60+ ; RV32-EXPAND-NEXT: srli a0, a0, 1
61+ ; RV32-EXPAND-NEXT: slli a1, a1, 1
62+ ; RV32-EXPAND-NEXT: andi a3, a0, 1
63+ ; RV32-EXPAND-NEXT: beqz a3, .LBB0_15
64+ ; RV32-EXPAND-NEXT: # %bb.14:
65+ ; RV32-EXPAND-NEXT: xor a2, a2, a1
66+ ; RV32-EXPAND-NEXT: .LBB0_15:
67+ ; RV32-EXPAND-NEXT: andi a0, a0, 2
68+ ; RV32-EXPAND-NEXT: beqz a0, .LBB0_17
69+ ; RV32-EXPAND-NEXT: # %bb.16:
70+ ; RV32-EXPAND-NEXT: slli a0, a1, 1
71+ ; RV32-EXPAND-NEXT: xor a2, a2, a0
72+ ; RV32-EXPAND-NEXT: .LBB0_17:
73+ ; RV32-EXPAND-NEXT: mv a0, a2
74+ ; RV32-EXPAND-NEXT: ret
75+ %result = call i8 @llvm.clmul.i8 (i8 %a , i8 %b )
76+ ret i8 %result
77+ }
78+
79+ define i16 @clmul_expand_i16 (i16 %a , i16 %b ) nounwind {
80+ ; RV32-EXPAND-LABEL: clmul_expand_i16:
81+ ; RV32-EXPAND: # %bb.0:
82+ ; RV32-EXPAND-NEXT: andi a2, a0, 1
83+ ; RV32-EXPAND-NEXT: beqz a2, .LBB1_2
84+ ; RV32-EXPAND-NEXT: # %bb.1:
85+ ; RV32-EXPAND-NEXT: mv a2, a1
86+ ; RV32-EXPAND-NEXT: j .LBB1_3
87+ ; RV32-EXPAND-NEXT: .LBB1_2:
88+ ; RV32-EXPAND-NEXT: li a2, 0
89+ ; RV32-EXPAND-NEXT: .LBB1_3:
90+ ; RV32-EXPAND-NEXT: srli a0, a0, 1
91+ ; RV32-EXPAND-NEXT: slli a1, a1, 1
92+ %result = call i16 @llvm.clmul.i16 (i16 %a , i16 %b )
93+ ret i16 %result
94+ }
95+
96+ define i32 @clmul_expand_i32 (i32 %a , i32 %b ) nounwind {
97+ ; RV32-EXPAND-LABEL: clmul_expand_i32:
98+ ; RV32-EXPAND: # %bb.0:
99+ ; RV32-EXPAND-NEXT: andi a2, a0, 1
100+ ; RV32-EXPAND-NEXT: beqz a2, .LBB2_2
101+ ; RV32-EXPAND-NEXT: # %bb.1:
102+ ; RV32-EXPAND-NEXT: mv a2, a1
103+ ; RV32-EXPAND-NEXT: j .LBB2_3
104+ ; RV32-EXPAND-NEXT: .LBB2_2:
105+ ; RV32-EXPAND-NEXT: li a2, 0
106+ ; RV32-EXPAND-NEXT: .LBB2_3:
107+ ; RV32-EXPAND-NEXT: srli a0, a0, 1
108+ ; RV32-EXPAND-NEXT: slli a1, a1, 1
109+ %result = call i32 @llvm.clmul.i32 (i32 %a , i32 %b )
110+ ret i32 %result
111+ }
112+
113+ define i64 @clmul_expand_i64 (i64 %a , i64 %b ) nounwind {
114+ ; RV32-EXPAND-LABEL: clmul_expand_i64:
115+ ; RV32-EXPAND: # %bb.0:
116+ ; RV32-EXPAND-NEXT: addi sp, sp, -16
117+ ; RV32-EXPAND-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
118+ ; RV32-EXPAND-NEXT: call __clmuldi3
119+ ; RV32-EXPAND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
120+ ; RV32-EXPAND-NEXT: addi sp, sp, 16
121+ ; RV32-EXPAND-NEXT: ret
122+ ;
123+ ; RV64-EXPAND-LABEL: clmul_expand_i64:
124+ ; RV64-EXPAND: # %bb.0:
125+ ; RV64-EXPAND-NEXT: andi a2, a0, 1
126+ ; RV64-EXPAND-NEXT: beqz a2, .LBB3_2
127+ ; RV64-EXPAND-NEXT: # %bb.1:
128+ ; RV64-EXPAND-NEXT: mv a2, a1
129+ ; RV64-EXPAND-NEXT: j .LBB3_3
130+ ; RV64-EXPAND-NEXT: .LBB3_2:
131+ ; RV64-EXPAND-NEXT: li a2, 0
132+ ; RV64-EXPAND-NEXT: .LBB3_3:
133+ ; RV64-EXPAND-NEXT: srli a0, a0, 1
134+ ; RV64-EXPAND-NEXT: slli a1, a1, 1
135+ %result = call i64 @llvm.clmul.i64 (i64 %a , i64 %b )
136+ ret i64 %result
137+ }
138+
139+ define i128 @clmul_expand_i128 (i128 %a , i128 %b ) nounwind {
140+ ; RV32-EXPAND-LABEL: clmul_expand_i128:
141+ ; RV32-EXPAND: # %bb.0:
142+ ; RV32-EXPAND-NEXT: addi sp, sp, -16
143+ ; RV32-EXPAND-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
144+ ; RV32-EXPAND-NEXT: call __clmulti3
145+ ; RV32-EXPAND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
146+ ; RV32-EXPAND-NEXT: addi sp, sp, 16
147+ ; RV32-EXPAND-NEXT: ret
148+ ;
149+ ; RV64-EXPAND-LABEL: clmul_expand_i128:
150+ ; RV64-EXPAND: # %bb.0:
151+ ; RV64-EXPAND-NEXT: addi sp, sp, -16
152+ ; RV64-EXPAND-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
153+ ; RV64-EXPAND-NEXT: call __clmulti3
154+ ; RV64-EXPAND-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
155+ ; RV64-EXPAND-NEXT: addi sp, sp, 16
156+ ; RV64-EXPAND-NEXT: ret
157+ %result = call i128 @llvm.clmul.i128 (i128 %a , i128 %b )
158+ ret i128 %result
159+ }
160+
161+ ; Test with known constants to verify correctness
162+ define i8 @clmul_expand_i8_known () nounwind {
163+ ; RV32-EXPAND-LABEL: clmul_expand_i8_known:
164+ ; RV32-EXPAND: # %bb.0:
165+ ; RV32-EXPAND-NEXT: li a0, 14
166+ ; RV32-EXPAND-NEXT: ret
167+ ;
168+ ; RV64-EXPAND-LABEL: clmul_expand_i8_known:
169+ ; RV64-EXPAND: # %bb.0:
170+ ; RV64-EXPAND-NEXT: li a0, 14
171+ ; RV64-EXPAND-NEXT: ret
172+ ; clmul(5, 6) = clmul(0b101, 0b110) = 0b1110 = 14
173+ %result = call i8 @llvm.clmul.i8 (i8 5 , i8 6 )
174+ ret i8 %result
175+ }
176+
177+ define i16 @clmul_expand_i16_known () nounwind {
178+ ; RV32-EXPAND-LABEL: clmul_expand_i16_known:
179+ ; RV32-EXPAND: # %bb.0:
180+ ; RV32-EXPAND-NEXT: li a0, 158
181+ ; RV32-EXPAND-NEXT: ret
182+ ;
183+ ; RV64-EXPAND-LABEL: clmul_expand_i16_known:
184+ ; RV64-EXPAND: # %bb.0:
185+ ; RV64-EXPAND-NEXT: li a0, 158
186+ ; RV64-EXPAND-NEXT: ret
187+ ; clmul(15, 13) = clmul(0b1111, 0b1101) = 0b10011110 = 158
188+ %result = call i16 @llvm.clmul.i16 (i16 15 , i16 13 )
189+ ret i16 %result
190+ }
0 commit comments