@@ -34,8 +34,7 @@ define i64 @callee_not_avx(<4 x i64> %arg) noinline {
3434 ret i64 %v
3535}
3636
37- ; This call also shouldn't be inlined, as we don't know whether callee_unknown
38- ; is ABI compatible or not.
37+ ; This call also shouldn't be inlined, as caller_not_avx2 is not ABI compatible.
3938define void @caller_avx2 () "target-features" ="+avx" {
4039; CHECK-LABEL: define {{[^@]+}}@caller_avx2
4140; CHECK-SAME: () #[[ATTR0]] {
@@ -55,27 +54,54 @@ define internal void @caller_not_avx2() {
5554 ret void
5655}
5756
57+ ; Should be inlined, as caller_avx7 is ABI compatible. The fact that we don't
58+ ; know anything about callee_unknown doesn't matter, as it is the caller that
59+ ; determines the ABI as far as target features are concerned.
60+ define void @caller_avx6 () "target-features" ="+avx" {
61+ ; CHECK-LABEL: define {{[^@]+}}@caller_avx6
62+ ; CHECK-SAME: () #[[ATTR0]] {
63+ ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee_unknown(<4 x i64> <i64 0, i64 1, i64 2, i64 3>)
64+ ; CHECK-NEXT: ret void
65+ ;
66+ call void @caller_avx7 ()
67+ ret void
68+ }
69+
70+ define void @caller_avx7 () "target-features" ="+avx" {
71+ ; CHECK-LABEL: define {{[^@]+}}@caller_avx7
72+ ; CHECK-SAME: () #[[ATTR0]] {
73+ ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee_unknown(<4 x i64> <i64 0, i64 1, i64 2, i64 3>)
74+ ; CHECK-NEXT: ret void
75+ ;
76+ call i64 @callee_unknown (<4 x i64 > <i64 0 , i64 1 , i64 2 , i64 3 >)
77+ ret void
78+ }
79+
5880declare i64 @callee_unknown (<4 x i64 >)
5981
6082; This call should get inlined, because we assume that intrinsics are always
6183; ABI compatible.
6284define void @caller_avx3 () "target-features" ="+avx" {
6385; CHECK-LABEL: define {{[^@]+}}@caller_avx3
6486; CHECK-SAME: () #[[ATTR0]] {
65- ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.some_intrinsic(<4 x i64> <i64 0, i64 1, i64 2, i64 3>)
87+ ; CHECK-NEXT: [[V_I:%.*]] = load <4 x i64>, ptr @g, align 32
88+ ; CHECK-NEXT: [[V2_I:%.*]] = call <4 x i64> @llvm.abs.v4i64(<4 x i64> [[V_I]], i1 false)
89+ ; CHECK-NEXT: store <4 x i64> [[V2_I]], ptr @g, align 32
6690; CHECK-NEXT: ret void
6791;
6892 call void @caller_not_avx3 ()
6993 ret void
7094}
7195
96+ @g = external global <4 x i64 >
97+
7298define internal void @caller_not_avx3 () {
73- call i64 @llvm.some_intrinsic (<4 x i64 > <i64 0 , i64 1 , i64 2 , i64 3 >)
99+ %v = load <4 x i64 >, ptr @g
100+ %v2 = call <4 x i64 > @llvm.abs (<4 x i64 > %v , i1 false )
101+ store <4 x i64 > %v2 , ptr @g
74102 ret void
75103}
76104
77- declare i64 @llvm.some_intrinsic (<4 x i64 >)
78-
79105; This call should get inlined, because only simple types are involved.
80106define void @caller_avx4 () "target-features" ="+avx" {
81107; CHECK-LABEL: define {{[^@]+}}@caller_avx4
0 commit comments