Skip to content

Commit 898c30b

Browse files
committed
[clang][llvm][aarch64] Add aarch64_sme_in_streaming_mode intrinsic
1 parent 9fc54c0 commit 898c30b

File tree

6 files changed

+74
-28
lines changed

6 files changed

+74
-28
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,8 @@ let SMETargetGuard = "sme2" in {
716716
def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
717717
}
718718

719+
def IN_STREAMING_MODE : Inst<"in_streaming_mode", "d", "", MergeNone, "aarch64_sme_in_streaming_mode", [IsOverloadNone, IsStreamingCompatible], []>;
720+
719721
//
720722
// lookup table expand four contiguous registers
721723
//

clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,26 @@
88

99
// CHECK-LABEL: @test_in_streaming_mode(
1010
// CHECK-NEXT: entry:
11-
// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]]
12-
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
13-
// CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1
14-
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0
15-
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
11+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
12+
// CHECK-NEXT: ret i1 [[TMP0]]
1613
//
1714
// CPP-CHECK-LABEL: @_Z22test_in_streaming_modev(
1815
// CPP-CHECK-NEXT: entry:
19-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]]
20-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
21-
// CPP-CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1
22-
// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0
23-
// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]]
16+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
17+
// CPP-CHECK-NEXT: ret i1 [[TMP0]]
2418
//
2519
bool test_in_streaming_mode(void) __arm_streaming_compatible {
2620
return __arm_in_streaming_mode();
2721
}
2822

2923
// CHECK-LABEL: @test_za_disable(
3024
// CHECK-NEXT: entry:
31-
// CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR3]]
25+
// CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR5:[0-9]+]]
3226
// CHECK-NEXT: ret void
3327
//
3428
// CPP-CHECK-LABEL: @_Z15test_za_disablev(
3529
// CPP-CHECK-NEXT: entry:
36-
// CPP-CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR3]]
30+
// CPP-CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR5:[0-9]+]]
3731
// CPP-CHECK-NEXT: ret void
3832
//
3933
void test_za_disable(void) __arm_streaming_compatible {
@@ -42,14 +36,14 @@ void test_za_disable(void) __arm_streaming_compatible {
4236

4337
// CHECK-LABEL: @test_has_sme(
4438
// CHECK-NEXT: entry:
45-
// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]]
39+
// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR5]]
4640
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
4741
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0
4842
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
4943
//
5044
// CPP-CHECK-LABEL: @_Z12test_has_smev(
5145
// CPP-CHECK-NEXT: entry:
52-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]]
46+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR5]]
5347
// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
5448
// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0
5549
// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]]
@@ -72,12 +66,12 @@ void test_svundef_za(void) __arm_streaming_compatible __arm_out("za") {
7266

7367
// CHECK-LABEL: @test_sc_memcpy(
7468
// CHECK-NEXT: entry:
75-
// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
69+
// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
7670
// CHECK-NEXT: ret ptr [[CALL]]
7771
//
7872
// CPP-CHECK-LABEL: @_Z14test_sc_memcpyPvPKvm(
7973
// CPP-CHECK-NEXT: entry:
80-
// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
74+
// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
8175
// CPP-CHECK-NEXT: ret ptr [[CALL]]
8276
//
8377
void *test_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible {
@@ -86,12 +80,12 @@ void *test_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_comp
8680

8781
// CHECK-LABEL: @test_sc_memmove(
8882
// CHECK-NEXT: entry:
89-
// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
83+
// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
9084
// CHECK-NEXT: ret ptr [[CALL]]
9185
//
9286
// CPP-CHECK-LABEL: @_Z15test_sc_memmovePvPKvm(
9387
// CPP-CHECK-NEXT: entry:
94-
// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
88+
// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
9589
// CPP-CHECK-NEXT: ret ptr [[CALL]]
9690
//
9791
void *test_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible {
@@ -100,12 +94,12 @@ void *test_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_com
10094

10195
// CHECK-LABEL: @test_sc_memset(
10296
// CHECK-NEXT: entry:
103-
// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
97+
// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
10498
// CHECK-NEXT: ret ptr [[CALL]]
10599
//
106100
// CPP-CHECK-LABEL: @_Z14test_sc_memsetPvim(
107101
// CPP-CHECK-NEXT: entry:
108-
// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
102+
// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
109103
// CPP-CHECK-NEXT: ret ptr [[CALL]]
110104
//
111105
void *test_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible {
@@ -114,12 +108,12 @@ void *test_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible {
114108

115109
// CHECK-LABEL: @test_sc_memchr(
116110
// CHECK-NEXT: entry:
117-
// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
111+
// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
118112
// CHECK-NEXT: ret ptr [[CALL]]
119113
//
120114
// CPP-CHECK-LABEL: @_Z14test_sc_memchrPvim(
121115
// CPP-CHECK-NEXT: entry:
122-
// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
116+
// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
123117
// CPP-CHECK-NEXT: ret ptr [[CALL]]
124118
//
125119
void *test_sc_memchr(void *s, int c, size_t n) __arm_streaming_compatible {

clang/utils/TableGen/SveEmitter.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,12 +1636,8 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) {
16361636
OS << " return x0 & (1ULL << 63);\n";
16371637
OS << "}\n\n";
16381638

1639-
OS << "__ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible "
1640-
"{\n";
1641-
OS << " uint64_t x0, x1;\n";
1642-
OS << " __builtin_arm_get_sme_state(&x0, &x1);\n";
1643-
OS << " return x0 & 1;\n";
1644-
OS << "}\n\n";
1639+
OS << "__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_in_streaming_mode)))";
1640+
OS << " bool __arm_in_streaming_mode(void) __arm_streaming_compatible;\n\n";
16451641

16461642
OS << "void *__arm_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible;\n";
16471643
OS << "void *__arm_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible;\n";

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2974,6 +2974,7 @@ let TargetPrefix = "aarch64" in {
29742974

29752975

29762976
def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
2977+
def int_aarch64_sme_in_streaming_mode : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrNoMem]>, ClangBuiltin<"__builtin_arm_in_streaming_mode">;
29772978

29782979
class SME_OuterProduct_Intrinsic
29792980
: DefaultAttrsIntrinsic<[],

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,6 +1183,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11831183
setMaxDivRemBitWidthSupported(128);
11841184

11851185
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1186+
if (Subtarget->hasSME())
1187+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
11861188

11871189
if (Subtarget->isNeonAvailable()) {
11881190
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
@@ -27292,6 +27294,13 @@ void AArch64TargetLowering::ReplaceNodeResults(
2729227294
N->getOperand(1), N->getOperand(2));
2729327295
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
2729427296
return;
27297+
}
27298+
case Intrinsic::aarch64_sme_in_streaming_mode: {
27299+
auto DL = SDLoc(N);
27300+
SDValue Chain = DAG.getEntryNode();
27301+
auto RuntimePStateSM = getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0));
27302+
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM));
27303+
return;
2729527304
}
2729627305
case Intrinsic::experimental_vector_match:
2729727306
case Intrinsic::get_active_lane_mask: {
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
3+
4+
5+
define i1 @streaming_mode_st_compatible() #0 {
6+
; CHECK-LABEL: streaming_mode_st_compatible:
7+
; CHECK: // %bb.0:
8+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
9+
; CHECK-NEXT: bl __arm_sme_state
10+
; CHECK-NEXT: and w0, w0, #0x1
11+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
12+
; CHECK-NEXT: ret
13+
%mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode()
14+
ret i1 %mode
15+
}
16+
17+
define i1 @streaming_mode_st_enabled() #1 {
18+
; CHECK-LABEL: streaming_mode_st_enabled:
19+
; CHECK: // %bb.0:
20+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
21+
; CHECK-NEXT: bl __arm_sme_state
22+
; CHECK-NEXT: and w0, w0, #0x1
23+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
24+
; CHECK-NEXT: ret
25+
%mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode()
26+
ret i1 %mode
27+
}
28+
29+
define i1 @streaming_mode_st_disabled() #2 {
30+
; CHECK-LABEL: streaming_mode_st_disabled:
31+
; CHECK: // %bb.0:
32+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
33+
; CHECK-NEXT: bl __arm_sme_state
34+
; CHECK-NEXT: and w0, w0, #0x1
35+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
36+
; CHECK-NEXT: ret
37+
%mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode()
38+
ret i1 %mode
39+
}
40+
41+
42+
attributes #0 = {nounwind memory(none) "aarch64_pstate_sm_compatible"}
43+
attributes #1 = {nounwind memory(none) "aarch64_pstate_sm_enabled"}
44+
attributes #2 = {nounwind memory(none)}

0 commit comments

Comments
 (0)