Skip to content

Commit 3846290

Browse files
author
Oscar Smith
committed
claude tries to write tests
1 parent 5143eef commit 3846290

File tree

4 files changed

+709
-0
lines changed

4 files changed

+709
-0
lines changed
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=-zbc,-zbkc -verify-machineinstrs | FileCheck %s --check-prefix=RV32-EXPAND
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=-zbc,-zbkc -verify-machineinstrs | FileCheck %s --check-prefix=RV64-EXPAND
4+
; RUN: llc < %s -mtriple=x86_64 -verify-machineinstrs | FileCheck %s --check-prefix=X64-EXPAND
5+
6+
; Test CLMUL expansion when the instruction is not natively supported
7+
8+
declare i8 @llvm.clmul.i8(i8 %a, i8 %b)
9+
declare i16 @llvm.clmul.i16(i16 %a, i16 %b)
10+
declare i32 @llvm.clmul.i32(i32 %a, i32 %b)
11+
declare i64 @llvm.clmul.i64(i64 %a, i64 %b)
12+
declare i128 @llvm.clmul.i128(i128 %a, i128 %b)
13+
14+
define i8 @clmul_expand_i8(i8 %a, i8 %b) nounwind {
15+
; RV32-EXPAND-LABEL: clmul_expand_i8:
16+
; RV32-EXPAND: # %bb.0:
17+
; RV32-EXPAND-NEXT: andi a2, a0, 1
18+
; RV32-EXPAND-NEXT: beqz a2, .LBB0_2
19+
; RV32-EXPAND-NEXT: # %bb.1:
20+
; RV32-EXPAND-NEXT: mv a2, a1
21+
; RV32-EXPAND-NEXT: j .LBB0_3
22+
; RV32-EXPAND-NEXT: .LBB0_2:
23+
; RV32-EXPAND-NEXT: li a2, 0
24+
; RV32-EXPAND-NEXT: .LBB0_3:
25+
; RV32-EXPAND-NEXT: srli a0, a0, 1
26+
; RV32-EXPAND-NEXT: slli a1, a1, 1
27+
; RV32-EXPAND-NEXT: andi a3, a0, 1
28+
; RV32-EXPAND-NEXT: beqz a3, .LBB0_5
29+
; RV32-EXPAND-NEXT: # %bb.4:
30+
; RV32-EXPAND-NEXT: xor a2, a2, a1
31+
; RV32-EXPAND-NEXT: .LBB0_5:
32+
; RV32-EXPAND-NEXT: srli a0, a0, 1
33+
; RV32-EXPAND-NEXT: slli a1, a1, 1
34+
; RV32-EXPAND-NEXT: andi a3, a0, 1
35+
; RV32-EXPAND-NEXT: beqz a3, .LBB0_7
36+
; RV32-EXPAND-NEXT: # %bb.6:
37+
; RV32-EXPAND-NEXT: xor a2, a2, a1
38+
; RV32-EXPAND-NEXT: .LBB0_7:
39+
; RV32-EXPAND-NEXT: srli a0, a0, 1
40+
; RV32-EXPAND-NEXT: slli a1, a1, 1
41+
; RV32-EXPAND-NEXT: andi a3, a0, 1
42+
; RV32-EXPAND-NEXT: beqz a3, .LBB0_9
43+
; RV32-EXPAND-NEXT: # %bb.8:
44+
; RV32-EXPAND-NEXT: xor a2, a2, a1
45+
; RV32-EXPAND-NEXT: .LBB0_9:
46+
; RV32-EXPAND-NEXT: srli a0, a0, 1
47+
; RV32-EXPAND-NEXT: slli a1, a1, 1
48+
; RV32-EXPAND-NEXT: andi a3, a0, 1
49+
; RV32-EXPAND-NEXT: beqz a3, .LBB0_11
50+
; RV32-EXPAND-NEXT: # %bb.10:
51+
; RV32-EXPAND-NEXT: xor a2, a2, a1
52+
; RV32-EXPAND-NEXT: .LBB0_11:
53+
; RV32-EXPAND-NEXT: srli a0, a0, 1
54+
; RV32-EXPAND-NEXT: slli a1, a1, 1
55+
; RV32-EXPAND-NEXT: andi a3, a0, 1
56+
; RV32-EXPAND-NEXT: beqz a3, .LBB0_13
57+
; RV32-EXPAND-NEXT: # %bb.12:
58+
; RV32-EXPAND-NEXT: xor a2, a2, a1
59+
; RV32-EXPAND-NEXT: .LBB0_13:
60+
; RV32-EXPAND-NEXT: srli a0, a0, 1
61+
; RV32-EXPAND-NEXT: slli a1, a1, 1
62+
; RV32-EXPAND-NEXT: andi a3, a0, 1
63+
; RV32-EXPAND-NEXT: beqz a3, .LBB0_15
64+
; RV32-EXPAND-NEXT: # %bb.14:
65+
; RV32-EXPAND-NEXT: xor a2, a2, a1
66+
; RV32-EXPAND-NEXT: .LBB0_15:
67+
; RV32-EXPAND-NEXT: andi a0, a0, 2
68+
; RV32-EXPAND-NEXT: beqz a0, .LBB0_17
69+
; RV32-EXPAND-NEXT: # %bb.16:
70+
; RV32-EXPAND-NEXT: slli a0, a1, 1
71+
; RV32-EXPAND-NEXT: xor a2, a2, a0
72+
; RV32-EXPAND-NEXT: .LBB0_17:
73+
; RV32-EXPAND-NEXT: mv a0, a2
74+
; RV32-EXPAND-NEXT: ret
75+
%result = call i8 @llvm.clmul.i8(i8 %a, i8 %b)
76+
ret i8 %result
77+
}
78+
79+
define i16 @clmul_expand_i16(i16 %a, i16 %b) nounwind {
80+
; RV32-EXPAND-LABEL: clmul_expand_i16:
81+
; RV32-EXPAND: # %bb.0:
82+
; RV32-EXPAND-NEXT: andi a2, a0, 1
83+
; RV32-EXPAND-NEXT: beqz a2, .LBB1_2
84+
; RV32-EXPAND-NEXT: # %bb.1:
85+
; RV32-EXPAND-NEXT: mv a2, a1
86+
; RV32-EXPAND-NEXT: j .LBB1_3
87+
; RV32-EXPAND-NEXT: .LBB1_2:
88+
; RV32-EXPAND-NEXT: li a2, 0
89+
; RV32-EXPAND-NEXT: .LBB1_3:
90+
; RV32-EXPAND-NEXT: srli a0, a0, 1
91+
; RV32-EXPAND-NEXT: slli a1, a1, 1
92+
%result = call i16 @llvm.clmul.i16(i16 %a, i16 %b)
93+
ret i16 %result
94+
}
95+
96+
define i32 @clmul_expand_i32(i32 %a, i32 %b) nounwind {
97+
; RV32-EXPAND-LABEL: clmul_expand_i32:
98+
; RV32-EXPAND: # %bb.0:
99+
; RV32-EXPAND-NEXT: andi a2, a0, 1
100+
; RV32-EXPAND-NEXT: beqz a2, .LBB2_2
101+
; RV32-EXPAND-NEXT: # %bb.1:
102+
; RV32-EXPAND-NEXT: mv a2, a1
103+
; RV32-EXPAND-NEXT: j .LBB2_3
104+
; RV32-EXPAND-NEXT: .LBB2_2:
105+
; RV32-EXPAND-NEXT: li a2, 0
106+
; RV32-EXPAND-NEXT: .LBB2_3:
107+
; RV32-EXPAND-NEXT: srli a0, a0, 1
108+
; RV32-EXPAND-NEXT: slli a1, a1, 1
109+
%result = call i32 @llvm.clmul.i32(i32 %a, i32 %b)
110+
ret i32 %result
111+
}
112+
113+
define i64 @clmul_expand_i64(i64 %a, i64 %b) nounwind {
114+
; RV32-EXPAND-LABEL: clmul_expand_i64:
115+
; RV32-EXPAND: # %bb.0:
116+
; RV32-EXPAND-NEXT: addi sp, sp, -16
117+
; RV32-EXPAND-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
118+
; RV32-EXPAND-NEXT: call __clmuldi3
119+
; RV32-EXPAND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
120+
; RV32-EXPAND-NEXT: addi sp, sp, 16
121+
; RV32-EXPAND-NEXT: ret
122+
;
123+
; RV64-EXPAND-LABEL: clmul_expand_i64:
124+
; RV64-EXPAND: # %bb.0:
125+
; RV64-EXPAND-NEXT: andi a2, a0, 1
126+
; RV64-EXPAND-NEXT: beqz a2, .LBB3_2
127+
; RV64-EXPAND-NEXT: # %bb.1:
128+
; RV64-EXPAND-NEXT: mv a2, a1
129+
; RV64-EXPAND-NEXT: j .LBB3_3
130+
; RV64-EXPAND-NEXT: .LBB3_2:
131+
; RV64-EXPAND-NEXT: li a2, 0
132+
; RV64-EXPAND-NEXT: .LBB3_3:
133+
; RV64-EXPAND-NEXT: srli a0, a0, 1
134+
; RV64-EXPAND-NEXT: slli a1, a1, 1
135+
%result = call i64 @llvm.clmul.i64(i64 %a, i64 %b)
136+
ret i64 %result
137+
}
138+
139+
define i128 @clmul_expand_i128(i128 %a, i128 %b) nounwind {
140+
; RV32-EXPAND-LABEL: clmul_expand_i128:
141+
; RV32-EXPAND: # %bb.0:
142+
; RV32-EXPAND-NEXT: addi sp, sp, -16
143+
; RV32-EXPAND-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
144+
; RV32-EXPAND-NEXT: call __clmulti3
145+
; RV32-EXPAND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
146+
; RV32-EXPAND-NEXT: addi sp, sp, 16
147+
; RV32-EXPAND-NEXT: ret
148+
;
149+
; RV64-EXPAND-LABEL: clmul_expand_i128:
150+
; RV64-EXPAND: # %bb.0:
151+
; RV64-EXPAND-NEXT: addi sp, sp, -16
152+
; RV64-EXPAND-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
153+
; RV64-EXPAND-NEXT: call __clmulti3
154+
; RV64-EXPAND-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
155+
; RV64-EXPAND-NEXT: addi sp, sp, 16
156+
; RV64-EXPAND-NEXT: ret
157+
%result = call i128 @llvm.clmul.i128(i128 %a, i128 %b)
158+
ret i128 %result
159+
}
160+
161+
; Test with known constants to verify correctness
162+
define i8 @clmul_expand_i8_known() nounwind {
163+
; RV32-EXPAND-LABEL: clmul_expand_i8_known:
164+
; RV32-EXPAND: # %bb.0:
165+
; RV32-EXPAND-NEXT: li a0, 14
166+
; RV32-EXPAND-NEXT: ret
167+
;
168+
; RV64-EXPAND-LABEL: clmul_expand_i8_known:
169+
; RV64-EXPAND: # %bb.0:
170+
; RV64-EXPAND-NEXT: li a0, 14
171+
; RV64-EXPAND-NEXT: ret
172+
; clmul(5, 6) = clmul(0b101, 0b110) = 0b1110 = 14
173+
%result = call i8 @llvm.clmul.i8(i8 5, i8 6)
174+
ret i8 %result
175+
}
176+
177+
define i16 @clmul_expand_i16_known() nounwind {
178+
; RV32-EXPAND-LABEL: clmul_expand_i16_known:
179+
; RV32-EXPAND: # %bb.0:
180+
; RV32-EXPAND-NEXT: li a0, 158
181+
; RV32-EXPAND-NEXT: ret
182+
;
183+
; RV64-EXPAND-LABEL: clmul_expand_i16_known:
184+
; RV64-EXPAND: # %bb.0:
185+
; RV64-EXPAND-NEXT: li a0, 158
186+
; RV64-EXPAND-NEXT: ret
187+
; clmul(15, 13) = clmul(0b1111, 0b1101) = 0b10011110 = 158
188+
%result = call i16 @llvm.clmul.i16(i16 15, i16 13)
189+
ret i16 %result
190+
}
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=-zbc,-zbkc -verify-machineinstrs | FileCheck %s --check-prefix=RV32
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=-zbc,-zbkc -verify-machineinstrs | FileCheck %s --check-prefix=RV64
4+
; RUN: llc < %s -mtriple=x86_64 -verify-machineinstrs | FileCheck %s --check-prefix=X64
5+
6+
; Test CLMUL with vector types
7+
8+
declare <2 x i32> @llvm.clmul.v2i32(<2 x i32> %a, <2 x i32> %b)
9+
declare <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a, <4 x i32> %b)
10+
declare <2 x i64> @llvm.clmul.v2i64(<2 x i64> %a, <2 x i64> %b)
11+
declare <8 x i16> @llvm.clmul.v8i16(<8 x i16> %a, <8 x i16> %b)
12+
declare <16 x i8> @llvm.clmul.v16i8(<16 x i8> %a, <16 x i8> %b)
13+
14+
define <2 x i32> @clmul_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
15+
; RV32-LABEL: clmul_v2i32:
16+
; RV32: # %bb.0:
17+
; RV32-NEXT: addi sp, sp, -32
18+
; RV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill
19+
; RV32-NEXT: sw s1, 24(sp) # 4-byte Folded Spill
20+
; RV32-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
21+
; RV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
22+
; RV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill
23+
; RV32-NEXT: sw s5, 8(sp) # 4-byte Folded Spill
24+
; RV32-NEXT: mv s0, a3
25+
; RV32-NEXT: mv s1, a2
26+
; RV32-NEXT: mv s2, a1
27+
; RV32-NEXT: mv s3, a0
28+
; RV32-NEXT: andi a0, a3, 1
29+
; RV32-NEXT: beqz a0, .LBB0_2
30+
; RV32-NEXT: # %bb.1:
31+
; RV32-NEXT: mv s4, s1
32+
; RV32-NEXT: j .LBB0_3
33+
; RV32-NEXT: .LBB0_2:
34+
; RV32-NEXT: li s4, 0
35+
; RV32-NEXT: .LBB0_3:
36+
; RV32-NEXT: srli s0, s0, 1
37+
; RV32-NEXT: slli s1, s1, 1
38+
%result = call <2 x i32> @llvm.clmul.v2i32(<2 x i32> %a, <2 x i32> %b)
39+
ret <2 x i32> %result
40+
}
41+
42+
define <4 x i32> @clmul_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
43+
; RV32-LABEL: clmul_v4i32:
44+
; RV32: # %bb.0:
45+
; RV32-NEXT: addi sp, sp, -80
46+
; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
47+
; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
48+
; RV32-NEXT: sw s1, 68(sp) # 4-byte Folded Spill
49+
; RV32-NEXT: sw s2, 64(sp) # 4-byte Folded Spill
50+
; RV32-NEXT: sw s3, 60(sp) # 4-byte Folded Spill
51+
; RV32-NEXT: sw s4, 56(sp) # 4-byte Folded Spill
52+
; RV32-NEXT: sw s5, 52(sp) # 4-byte Folded Spill
53+
; RV32-NEXT: sw s6, 48(sp) # 4-byte Folded Spill
54+
; RV32-NEXT: sw s7, 44(sp) # 4-byte Folded Spill
55+
; RV32-NEXT: sw s8, 40(sp) # 4-byte Folded Spill
56+
; RV32-NEXT: sw s9, 36(sp) # 4-byte Folded Spill
57+
; RV32-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
58+
; RV32-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
59+
%result = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a, <4 x i32> %b)
60+
ret <4 x i32> %result
61+
}
62+
63+
define <2 x i64> @clmul_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
64+
; RV32-LABEL: clmul_v2i64:
65+
; RV32: # %bb.0:
66+
; RV32-NEXT: addi sp, sp, -48
67+
; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
68+
; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
69+
; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
70+
; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
71+
; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
72+
; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
73+
; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
74+
; RV32-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
75+
; RV32-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
76+
; RV32-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
77+
; RV32-NEXT: lw s0, 64(sp)
78+
; RV32-NEXT: lw s1, 68(sp)
79+
; RV32-NEXT: lw s2, 72(sp)
80+
; RV32-NEXT: lw s3, 76(sp)
81+
; RV32-NEXT: mv s4, a0
82+
; RV32-NEXT: mv s5, a1
83+
; RV32-NEXT: mv s6, a2
84+
; RV32-NEXT: mv s7, a3
85+
; RV32-NEXT: mv a0, s6
86+
; RV32-NEXT: mv a1, s7
87+
; RV32-NEXT: mv a2, s2
88+
; RV32-NEXT: mv a3, s3
89+
; RV32-NEXT: call __clmuldi3
90+
; RV32-NEXT: mv s8, a0
91+
; RV32-NEXT: mv s2, a1
92+
; RV32-NEXT: mv a0, s4
93+
; RV32-NEXT: mv a1, s5
94+
; RV32-NEXT: mv a2, s0
95+
; RV32-NEXT: mv a3, s1
96+
; RV32-NEXT: call __clmuldi3
97+
; RV32-NEXT: mv a2, a0
98+
; RV32-NEXT: mv a3, a1
99+
; RV32-NEXT: mv a0, a2
100+
; RV32-NEXT: mv a1, a3
101+
; RV32-NEXT: mv a2, s8
102+
; RV32-NEXT: mv a3, s2
103+
; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
104+
; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
105+
; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
106+
; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
107+
; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
108+
; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
109+
; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
110+
; RV32-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
111+
; RV32-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
112+
; RV32-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
113+
; RV32-NEXT: addi sp, sp, 48
114+
; RV32-NEXT: ret
115+
%result = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %a, <2 x i64> %b)
116+
ret <2 x i64> %result
117+
}
118+
119+
define <8 x i16> @clmul_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
120+
; RV32-LABEL: clmul_v8i16:
121+
; RV32: # %bb.0:
122+
; RV32-NEXT: addi sp, sp, -144
123+
; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
124+
; RV32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
125+
%result = call <8 x i16> @llvm.clmul.v8i16(<8 x i16> %a, <8 x i16> %b)
126+
ret <8 x i16> %result
127+
}
128+
129+
define <16 x i8> @clmul_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
130+
; RV32-LABEL: clmul_v16i8:
131+
; RV32: # %bb.0:
132+
; RV32-NEXT: addi sp, sp, -272
133+
; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill
134+
; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill
135+
%result = call <16 x i8> @llvm.clmul.v16i8(<16 x i8> %a, <16 x i8> %b)
136+
ret <16 x i8> %result
137+
}
138+
139+
; Test with splat vectors
140+
define <2 x i32> @clmul_v2i32_splat(<2 x i32> %a) nounwind {
141+
; RV32-LABEL: clmul_v2i32_splat:
142+
; RV32: # %bb.0:
143+
; RV32-NEXT: addi sp, sp, -32
144+
; RV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill
145+
; RV32-NEXT: sw s1, 24(sp) # 4-byte Folded Spill
146+
; RV32-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
147+
; RV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
148+
; RV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill
149+
; RV32-NEXT: sw s5, 8(sp) # 4-byte Folded Spill
150+
; RV32-NEXT: mv s0, a1
151+
; RV32-NEXT: mv s1, a0
152+
; RV32-NEXT: andi a0, a1, 1
153+
; RV32-NEXT: li s2, 3
154+
; RV32-NEXT: beqz a0, .LBB5_2
155+
; RV32-NEXT: # %bb.1:
156+
; RV32-NEXT: mv s3, s2
157+
; RV32-NEXT: j .LBB5_3
158+
; RV32-NEXT: .LBB5_2:
159+
; RV32-NEXT: li s3, 0
160+
; RV32-NEXT: .LBB5_3:
161+
; RV32-NEXT: srli s0, s0, 1
162+
; RV32-NEXT: slli s2, s2, 1
163+
%splat = insertelement <2 x i32> poison, i32 3, i32 0
164+
%splat_vec = shufflevector <2 x i32> %splat, <2 x i32> poison, <2 x i32> zeroinitializer
165+
%result = call <2 x i32> @llvm.clmul.v2i32(<2 x i32> %a, <2 x i32> %splat_vec)
166+
ret <2 x i32> %result
167+
}
168+
169+
; Test with constant vectors
170+
define <2 x i32> @clmul_v2i32_const() nounwind {
171+
; RV32-LABEL: clmul_v2i32_const:
172+
; RV32: # %bb.0:
173+
; RV32-NEXT: li a0, 14
174+
; RV32-NEXT: li a1, 6
175+
; RV32-NEXT: ret
176+
%result = call <2 x i32> @llvm.clmul.v2i32(<2 x i32> <i32 5, i32 2>, <2 x i32> <i32 6, i32 3>)
177+
ret <2 x i32> %result
178+
}

0 commit comments

Comments
 (0)