Skip to content

Commit 04e5bc7

Browse files
[AArch64] Add support for range prefetch intrinsic (#170490)
This patch adds support in Clang for the RPRFM instruction, by adding the following intrinsics: ``` void __pldx_range(unsigned int *access_kind*, unsigned int retention_policy, signed int length*, unsigned int count, signed int stride, size_t reuse distance, void const *addr); void __pld_range(unsigned int access_kind*, unsigned int retention_policy, uint64_t metadata, void const *addr); ``` The `__ARM_PREFETCH_RANGE` macro can be used to test whether these intrinsics are implemented. If the RPRFM instruction is not available, this instruction is a NOP. This implements the following ACLE proposal: ARM-software/acle#423
1 parent 3c45c54 commit 04e5bc7

File tree

18 files changed

+254
-1
lines changed

18 files changed

+254
-1
lines changed

clang/include/clang/Basic/BuiltinsAArch64.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a")
9696
// Prefetch
9797
BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
9898

99+
// Range Prefetch
100+
BUILTIN(__builtin_arm_range_prefetch_x, "vvC*UiUiiUiiz", "n")
101+
BUILTIN(__builtin_arm_range_prefetch, "vvC*UiUiWUi", "n")
102+
99103
// System Registers
100104
BUILTIN(__builtin_arm_rsr, "UicC*", "nc")
101105
BUILTIN(__builtin_arm_rsr64, "WUicC*", "nc")

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
477477

478478
Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4");
479479

480+
// Clang supports range prefetch intrinsics
481+
Builder.defineMacro("__ARM_PREFETCH_RANGE", "1");
482+
480483
if (FPU & NeonMode) {
481484
Builder.defineMacro("__ARM_NEON", "1");
482485
// 64-bit NEON supports half, single and double precision operations.

clang/lib/CodeGen/TargetBuiltins/ARM.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2660,6 +2660,56 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
26602660
return Builder.CreateCall(F, { Metadata, ArgValue });
26612661
}
26622662

2663+
static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
2664+
const CallExpr *E) {
2665+
CodeGen::CGBuilderTy &Builder = CGF.Builder;
2666+
CodeGen::CodeGenModule &CGM = CGF.CGM;
2667+
SmallVector<llvm::Value *, 4> Ops;
2668+
2669+
auto getIntArg = [&](unsigned ArgNo) {
2670+
Expr::EvalResult Result;
2671+
if (!E->getArg(ArgNo)->EvaluateAsInt(Result, CGM.getContext()))
2672+
llvm_unreachable("Expected constant argument to range prefetch.");
2673+
return Result.Val.getInt().getExtValue();
2674+
};
2675+
2676+
Ops.push_back(CGF.EmitScalarExpr(E->getArg(0))); /*Addr*/
2677+
Ops.push_back(CGF.EmitScalarExpr(E->getArg(1))); /*Access Kind*/
2678+
Ops.push_back(CGF.EmitScalarExpr(E->getArg(2))); /*Policy*/
2679+
2680+
if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
2681+
auto Length = getIntArg(3);
2682+
auto Count = getIntArg(4) - 1;
2683+
auto Stride = getIntArg(5);
2684+
auto Distance = getIntArg(6);
2685+
2686+
// Map ReuseDistance given in bytes to four bits representing decreasing
2687+
// powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
2688+
// are rounded up to the nearest power of 2, starting at 32KiB. Any value
2689+
// over the maximum is represented by 0 (distance not known).
2690+
if (Distance > 0) {
2691+
Distance = llvm::Log2_32_Ceil(Distance);
2692+
if (Distance < 15)
2693+
Distance = 15;
2694+
else if (Distance > 29)
2695+
Distance = 0;
2696+
else
2697+
Distance = 30 - Distance;
2698+
}
2699+
2700+
uint64_t Mask22 = (1ULL << 22) - 1;
2701+
uint64_t Mask16 = (1ULL << 16) - 1;
2702+
uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
2703+
((Count & Mask16) << 22) | (Length & Mask22);
2704+
2705+
Ops.push_back(llvm::ConstantInt::get(Builder.getInt64Ty(), Metadata));
2706+
} else
2707+
Ops.push_back(CGF.EmitScalarExpr(E->getArg(3)));
2708+
2709+
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_range_prefetch),
2710+
Ops);
2711+
}
2712+
26632713
/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
26642714
/// argument that specifies the vector type.
26652715
static bool HasExtraNeonArgument(unsigned BuiltinID) {
@@ -5447,6 +5497,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
54475497
CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
54485498
}
54495499

5500+
if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
5501+
BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
5502+
return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
5503+
54505504
// Memory Tagging Extensions (MTE) Intrinsics
54515505
Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
54525506
switch (BuiltinID) {

clang/lib/Headers/arm_acle.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,12 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
9898
#else
9999
#define __pldx(access_kind, cache_level, retention_policy, addr) \
100100
__builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
101+
#define __pldx_range(access_kind, retention_policy, length, count, stride, \
102+
reuse_distance, addr) \
103+
__builtin_arm_range_prefetch_x(addr, access_kind, retention_policy, length, \
104+
count, stride, reuse_distance)
105+
#define __pld_range(access_kind, retention_policy, metadata, addr) \
106+
__builtin_arm_range_prefetch(addr, access_kind, retention_policy, metadata)
101107
#endif
102108

103109
/* 7.6.2 Instruction prefetch */

clang/lib/Sema/SemaARM.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,19 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
11221122
SemaRef.BuiltinConstantArgRange(TheCall, 4, 0, 1);
11231123
}
11241124

1125+
if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x) {
1126+
return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
1127+
SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1) ||
1128+
SemaRef.BuiltinConstantArgRange(TheCall, 3, -2097152, 2097151) ||
1129+
SemaRef.BuiltinConstantArgRange(TheCall, 4, 1, 65536) ||
1130+
SemaRef.BuiltinConstantArgRange(TheCall, 5, -2097152, 2097151);
1131+
}
1132+
1133+
if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch) {
1134+
return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
1135+
SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1);
1136+
}
1137+
11251138
if (BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
11261139
BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
11271140
BuiltinID == AArch64::BI__builtin_arm_rsr128 ||

clang/test/CodeGen/arm_acle.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,28 @@ void test_pld() {
164164
__pld(0);
165165
}
166166

167+
#if defined(__ARM_64BIT_STATE) && defined(__ARM_PREFETCH_RANGE)
168+
169+
// AArch64-LABEL: @test_pld_range(
170+
// AArch64-NEXT: entry:
171+
// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i64 [[MD:%.*]])
172+
// AArch64-NEXT: ret void
173+
//
174+
void test_pld_range(uint64_t md) {
175+
__pld_range(0, 1, md, 0);
176+
}
177+
178+
// AArch64-LABEL: @test_pldx_range(
179+
// AArch64-NEXT: entry:
180+
// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i64 -576460477427613697)
181+
// AArch64-NEXT: ret void
182+
//
183+
void test_pldx_range() {
184+
__pldx_range(0, 1, 2097151, 65536, -2097152, 15, 0);
185+
}
186+
187+
#endif
188+
167189
// AArch32-LABEL: @test_pldx(
168190
// AArch32-NEXT: entry:
169191
// AArch32-NEXT: call void @llvm.prefetch.p0(ptr null, i32 1, i32 3, i32 1)

clang/test/CodeGen/builtins-arm64.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,55 @@ void prefetch(void) {
6262
// CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 3, i32 0, i32 1)
6363
}
6464

65+
void range_prefetch(void) {
66+
__builtin_arm_range_prefetch(0, 0, 0, 0); // pldkeep
67+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 0)
68+
69+
__builtin_arm_range_prefetch(0, 0, 1, 0); // pldstrm
70+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i64 0)
71+
72+
__builtin_arm_range_prefetch(0, 1, 0, 0); // pstkeep
73+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, i64 0)
74+
75+
__builtin_arm_range_prefetch(0, 1, 1, 0); // pststrm
76+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i64 0)
77+
}
78+
79+
void range_prefetch_x(void) {
80+
__builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 0); // pldkeep
81+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 0)
82+
__builtin_arm_range_prefetch_x(0, 0, 1, 0, 1, 0, 0); // pldstrm
83+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i64 0)
84+
__builtin_arm_range_prefetch_x(0, 1, 0, 0, 1, 0, 0); // pstkeep
85+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, i64 0)
86+
__builtin_arm_range_prefetch_x(0, 1, 1, 0, 1, 0, 0); // pststrm
87+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i64 0)
88+
89+
// Lower limits (length, count & stride)
90+
__builtin_arm_range_prefetch_x(0, 0, 0, -2097152, 1, -2097152, 0);
91+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 576460752305520640)
92+
93+
// Upper limits (length, count & stride)
94+
__builtin_arm_range_prefetch_x(0, 0, 0, 2097151, 65536, 2097151, 0);
95+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 576460752301326335)
96+
97+
// Distance less than minumum, round up to first power of two (1111)
98+
__builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 1);
99+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 -1152921504606846976)
100+
101+
// Distance 1 over minimum, round up to next power of 2 (1110)
102+
__builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 32769);
103+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 -2305843009213693952)
104+
105+
// Distance is a power of two in range (1010)
106+
__builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 1048576);
107+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 -6917529027641081856)
108+
109+
// Distance is out of range, set to 0 (0000)
110+
__builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 536870913);
111+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 0)
112+
}
113+
65114
__attribute__((target("v8.5a")))
66115
int32_t jcvt(double v) {
67116
//CHECK-LABEL: @jcvt(

clang/test/Preprocessor/aarch64-target-features.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
// CHECK: __ARM_NEON_FP 0xE
4242
// CHECK: __ARM_NEON_SVE_BRIDGE 1
4343
// CHECK: __ARM_PCS_AAPCS64 1
44+
// CHECK: __ARM_PREFETCH_RANGE 1
4445
// CHECK-NOT: __ARM_PCS 1
4546
// CHECK-NOT: __ARM_PCS_VFP 1
4647
// CHECK-NOT: __ARM_SIZEOF_MINIMAL_ENUM 1

clang/test/Preprocessor/init-aarch64.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
// AARCH64-NEXT: #define __ARM_FP16_FORMAT_IEEE 1
3333
// AARCH64-NEXT: #define __ARM_NEON_SVE_BRIDGE 1
3434
// AARCH64-NEXT: #define __ARM_PCS_AAPCS64 1
35+
// AARCH64-NEXT: #define __ARM_PREFETCH_RANGE 1
3536
// AARCH64-NEXT: #define __ARM_SIZEOF_MINIMAL_ENUM 4
3637
// AARCH64-NEXT: #define __ARM_SIZEOF_WCHAR_T 4
3738
// AARCH64-NEXT: #define __ARM_STATE_ZA 1

clang/test/Sema/builtins-arm64.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,24 @@ void test_prefetch(void) {
3030
__builtin_arm_prefetch(0, 0, 0, 0, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
3131
}
3232

33+
void test_range_prefetch(void) {
34+
__builtin_arm_range_prefetch(0, 2, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
35+
__builtin_arm_range_prefetch(0, 0, 2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
36+
37+
__builtin_arm_range_prefetch_x(0, 2, 0, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
38+
__builtin_arm_range_prefetch_x(0, 0, 2, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
39+
__builtin_arm_range_prefetch_x(0, 0, 0, -2097153, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
40+
__builtin_arm_range_prefetch_x(0, 0, 0, 2097152, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
41+
__builtin_arm_range_prefetch_x(0, 0, 0, 0, 65537, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
42+
__builtin_arm_range_prefetch_x(0, 0, 0, 0, 0, -2097153, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
43+
__builtin_arm_range_prefetch_x(0, 0, 0, 0, 0, 2097152, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
44+
}
45+
3346
void test_trap(short s, unsigned short us) {
3447
__builtin_arm_trap(42);
3548
__builtin_arm_trap(65535);
3649
__builtin_arm_trap(-1);
3750
__builtin_arm_trap(65536); // expected-warning {{implicit conversion from 'int' to 'unsigned short' changes value from 65536 to 0}}
3851
__builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
3952
__builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
40-
}
53+
}

0 commit comments

Comments
 (0)