Skip to content

Commit 2b94903

Browse files
committed
Implement operand bundles for floating-point operations
Currently floating-point operations in general form (beyond the default mode) are always represented by calls to constrained intrinsics. In addition to the side effect, they carry additional information in the form of metadata arguments. This scheme is not efficient in the case of intrinsic function calls, as was noted in https://discourse.llvm.org/t/thought-on-strictfp-support/71453, because it requires defining a separate intrinsic for the same operation but used in non-default FP environment. The solution proposed in the discussion was "to move the complexity about the environment tracking from the intrinsics themselves to the call instruction". The way implemented in this change is to use operand bundles (https://llvm.org/docs/LangRef.html#operand-bundles). This way was tried previously (https://reviews.llvm.org/D93455), but was not finished. This change does not add any new functionality, it only adds the new way of keeping FP related information in LLVM IR. Metadata arguments of constrained functions are preserved, but they are not used in the queries like `getRoundingMode` or `getExceptionBehavior`.
1 parent a177be5 commit 2b94903

22 files changed

+413
-72
lines changed

clang/test/CodeGen/X86/strictfp_builtins.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ void p(char *str, int x) {
2727
// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
2828
// CHECK-NEXT: store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
2929
// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
30-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 516) #[[ATTR3]]
30+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 516) #[[ATTR4:[0-9]+]]
3131
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
3232
// CHECK-NEXT: call void @p(ptr noundef @.str.1, i32 noundef [[TMP2]]) #[[ATTR3]]
3333
// CHECK-NEXT: ret void
@@ -43,7 +43,7 @@ void test_long_double_isinf(long double ld) {
4343
// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
4444
// CHECK-NEXT: store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
4545
// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
46-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 504) #[[ATTR3]]
46+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 504) #[[ATTR4]]
4747
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
4848
// CHECK-NEXT: call void @p(ptr noundef @.str.2, i32 noundef [[TMP2]]) #[[ATTR3]]
4949
// CHECK-NEXT: ret void
@@ -59,7 +59,7 @@ void test_long_double_isfinite(long double ld) {
5959
// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
6060
// CHECK-NEXT: store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
6161
// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
62-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 3) #[[ATTR3]]
62+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 3) #[[ATTR4]]
6363
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
6464
// CHECK-NEXT: call void @p(ptr noundef @.str.3, i32 noundef [[TMP2]]) #[[ATTR3]]
6565
// CHECK-NEXT: ret void

clang/test/CodeGen/strictfp_builtins.c

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,21 +31,21 @@ void p(char *str, int x) {
3131
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8
3232
// CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8
3333
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
34-
// CHECK-NEXT: [[ISZERO:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double 0.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR4]]
34+
// CHECK-NEXT: [[ISZERO:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double 0.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR5:[0-9]+]] [ "fpe.except"(i32 2) ]
3535
// CHECK-NEXT: br i1 [[ISZERO]], label [[FPCLASSIFY_END:%.*]], label [[FPCLASSIFY_NOT_ZERO:%.*]]
3636
// CHECK: fpclassify_end:
3737
// CHECK-NEXT: [[FPCLASSIFY_RESULT:%.*]] = phi i32 [ 4, [[ENTRY:%.*]] ], [ 0, [[FPCLASSIFY_NOT_ZERO]] ], [ 1, [[FPCLASSIFY_NOT_NAN:%.*]] ], [ [[TMP2:%.*]], [[FPCLASSIFY_NOT_INF:%.*]] ]
3838
// CHECK-NEXT: call void @p(ptr noundef @.str.1, i32 noundef [[FPCLASSIFY_RESULT]]) #[[ATTR4]]
3939
// CHECK-NEXT: ret void
4040
// CHECK: fpclassify_not_zero:
41-
// CHECK-NEXT: [[CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP0]], metadata !"uno", metadata !"fpexcept.strict") #[[ATTR4]]
41+
// CHECK-NEXT: [[CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP0]], metadata !"uno", metadata !"fpexcept.strict") #[[ATTR5]] [ "fpe.except"(i32 2) ]
4242
// CHECK-NEXT: br i1 [[CMP]], label [[FPCLASSIFY_END]], label [[FPCLASSIFY_NOT_NAN]]
4343
// CHECK: fpclassify_not_nan:
44-
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[TMP0]]) #[[ATTR5:[0-9]+]]
45-
// CHECK-NEXT: [[ISINF:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x7FF0000000000000, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR4]]
44+
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[TMP0]]) #[[ATTR6:[0-9]+]]
45+
// CHECK-NEXT: [[ISINF:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x7FF0000000000000, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR5]] [ "fpe.except"(i32 2) ]
4646
// CHECK-NEXT: br i1 [[ISINF]], label [[FPCLASSIFY_END]], label [[FPCLASSIFY_NOT_INF]]
4747
// CHECK: fpclassify_not_inf:
48-
// CHECK-NEXT: [[ISNORMAL:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x10000000000000, metadata !"uge", metadata !"fpexcept.strict") #[[ATTR4]]
48+
// CHECK-NEXT: [[ISNORMAL:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x10000000000000, metadata !"uge", metadata !"fpexcept.strict") #[[ATTR5]] [ "fpe.except"(i32 2) ]
4949
// CHECK-NEXT: [[TMP2]] = select i1 [[ISNORMAL]], i32 2, i32 3
5050
// CHECK-NEXT: br label [[FPCLASSIFY_END]]
5151
//
@@ -60,7 +60,7 @@ void test_fpclassify(double d) {
6060
// CHECK-NEXT: [[H_ADDR:%.*]] = alloca half, align 2
6161
// CHECK-NEXT: store half [[H:%.*]], ptr [[H_ADDR]], align 2
6262
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[H_ADDR]], align 2
63-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 516) #[[ATTR4]]
63+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 516) #[[ATTR5]]
6464
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
6565
// CHECK-NEXT: call void @p(ptr noundef @.str.2, i32 noundef [[TMP2]]) #[[ATTR4]]
6666
// CHECK-NEXT: ret void
@@ -76,7 +76,7 @@ void test_fp16_isinf(_Float16 h) {
7676
// CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
7777
// CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4
7878
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
79-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 516) #[[ATTR4]]
79+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 516) #[[ATTR5]]
8080
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
8181
// CHECK-NEXT: call void @p(ptr noundef @.str.3, i32 noundef [[TMP2]]) #[[ATTR4]]
8282
// CHECK-NEXT: ret void
@@ -92,7 +92,7 @@ void test_float_isinf(float f) {
9292
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8
9393
// CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8
9494
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
95-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 516) #[[ATTR4]]
95+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 516) #[[ATTR5]]
9696
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
9797
// CHECK-NEXT: call void @p(ptr noundef @.str.4, i32 noundef [[TMP2]]) #[[ATTR4]]
9898
// CHECK-NEXT: ret void
@@ -108,7 +108,7 @@ void test_double_isinf(double d) {
108108
// CHECK-NEXT: [[H_ADDR:%.*]] = alloca half, align 2
109109
// CHECK-NEXT: store half [[H:%.*]], ptr [[H_ADDR]], align 2
110110
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[H_ADDR]], align 2
111-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 504) #[[ATTR4]]
111+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 504) #[[ATTR5]]
112112
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
113113
// CHECK-NEXT: call void @p(ptr noundef @.str.5, i32 noundef [[TMP2]]) #[[ATTR4]]
114114
// CHECK-NEXT: ret void
@@ -124,7 +124,7 @@ void test_fp16_isfinite(_Float16 h) {
124124
// CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
125125
// CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4
126126
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
127-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 504) #[[ATTR4]]
127+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 504) #[[ATTR5]]
128128
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
129129
// CHECK-NEXT: call void @p(ptr noundef @.str.6, i32 noundef [[TMP2]]) #[[ATTR4]]
130130
// CHECK-NEXT: ret void
@@ -140,7 +140,7 @@ void test_float_isfinite(float f) {
140140
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8
141141
// CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8
142142
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
143-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 504) #[[ATTR4]]
143+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 504) #[[ATTR5]]
144144
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
145145
// CHECK-NEXT: call void @p(ptr noundef @.str.7, i32 noundef [[TMP2]]) #[[ATTR4]]
146146
// CHECK-NEXT: ret void
@@ -156,8 +156,8 @@ void test_double_isfinite(double d) {
156156
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8
157157
// CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8
158158
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
159-
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[TMP0]]) #[[ATTR5]]
160-
// CHECK-NEXT: [[ISINF:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x7FF0000000000000, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR4]]
159+
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[TMP0]]) #[[ATTR6]]
160+
// CHECK-NEXT: [[ISINF:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x7FF0000000000000, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR5]] [ "fpe.except"(i32 2) ]
161161
// CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[TMP0]] to i64
162162
// CHECK-NEXT: [[TMP3:%.*]] = icmp slt i64 [[TMP2]], 0
163163
// CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 -1, i32 1
@@ -176,7 +176,7 @@ void test_isinf_sign(double d) {
176176
// CHECK-NEXT: [[H_ADDR:%.*]] = alloca half, align 2
177177
// CHECK-NEXT: store half [[H:%.*]], ptr [[H_ADDR]], align 2
178178
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[H_ADDR]], align 2
179-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 3) #[[ATTR4]]
179+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 3) #[[ATTR5]]
180180
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
181181
// CHECK-NEXT: call void @p(ptr noundef @.str.9, i32 noundef [[TMP2]]) #[[ATTR4]]
182182
// CHECK-NEXT: ret void
@@ -192,7 +192,7 @@ void test_fp16_isnan(_Float16 h) {
192192
// CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
193193
// CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4
194194
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
195-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 3) #[[ATTR4]]
195+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 3) #[[ATTR5]]
196196
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
197197
// CHECK-NEXT: call void @p(ptr noundef @.str.10, i32 noundef [[TMP2]]) #[[ATTR4]]
198198
// CHECK-NEXT: ret void
@@ -208,7 +208,7 @@ void test_float_isnan(float f) {
208208
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8
209209
// CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8
210210
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
211-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 3) #[[ATTR4]]
211+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 3) #[[ATTR5]]
212212
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
213213
// CHECK-NEXT: call void @p(ptr noundef @.str.11, i32 noundef [[TMP2]]) #[[ATTR4]]
214214
// CHECK-NEXT: ret void
@@ -224,7 +224,7 @@ void test_double_isnan(double d) {
224224
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8
225225
// CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8
226226
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
227-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 264) #[[ATTR4]]
227+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 264) #[[ATTR5]]
228228
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
229229
// CHECK-NEXT: call void @p(ptr noundef @.str.12, i32 noundef [[TMP2]]) #[[ATTR4]]
230230
// CHECK-NEXT: ret void

clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
144144
// STRICTFP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[BLOCK_CAPTURE_ADDR1]], align 4
145145
// STRICTFP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP0]], i32 [[TMP1]]
146146
// STRICTFP-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX]], align 4
147-
// STRICTFP-NEXT: [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR5]]
147+
// STRICTFP-NEXT: [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR5]] [ "fpe.round"(i32 1), "fpe.except"(i32 2) ]
148148
// STRICTFP-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
149149
// STRICTFP-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[BLOCK_CAPTURE_ADDR2]], align 4
150150
// STRICTFP-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
@@ -173,7 +173,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
173173
// STRICTFP: attributes #[[ATTR2]] = { convergent noinline nounwind optnone strictfp "stack-protector-buffer-size"="8" }
174174
// STRICTFP: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind strictfp willreturn memory(inaccessiblemem: readwrite) }
175175
// STRICTFP: attributes #[[ATTR4]] = { convergent nounwind "stack-protector-buffer-size"="8" }
176-
// STRICTFP: attributes #[[ATTR5]] = { strictfp }
176+
// STRICTFP: attributes #[[ATTR5]] = { strictfp memory(inaccessiblemem: readwrite) }
177177
//.
178178
// SPIR32: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
179179
// SPIR32: [[META1:![0-9]+]] = !{i32 2, i32 0}

llvm/docs/LangRef.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3057,6 +3057,29 @@ A "convergencectrl" operand bundle is only valid on a ``convergent`` operation.
30573057
When present, the operand bundle must contain exactly one value of token type.
30583058
See the :doc:`ConvergentOperations` document for details.
30593059

3060+
.. _ob_fpe:
3061+
3062+
Floating-point Environment Operand Bundles
3063+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3064+
3065+
These operand bundles provide details on how the operation interacts with the
3066+
:ref:`floating-point environment <_floatenv>`. There are two kinds of such
3067+
operand bundles, which characterize interaction with floating-point control
3068+
modes and status bits.
3069+
3070+
An operand bundle tagged with "fpe.round" may be associated with the operations
3071+
that may depend on rounding mode. It has an integer value, which represents
3072+
the rounding mode with the same encoding as ``llvm::RoundingMode`` uses. If it
3073+
is present and is not equal to ``llvm::Dynamic``, it specifies the rounding
3074+
mode, which will be used for the operation evaluation. The value
3075+
``llvm::RoundingMode`` indicates that the rounding mode used by the operation is
3076+
specified in a floating-point control register.
3077+
3078+
An operand bundle tagged with "fpe.except" may be associated with the operations
3079+
that may read or write floating-point exception flags. It has the same meaning
3080+
and encoding as the corresponding argument in
3081+
:ref:`constrained intrinsics <_constrainedfp>`.
3082+
30603083
.. _moduleasm:
30613084

30623085
Module-Level Inline Assembly

llvm/include/llvm/ADT/FloatingPointMode.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,15 @@ enum class RoundingMode : int8_t {
4747
Invalid = -1 ///< Denotes invalid value.
4848
};
4949

50+
inline bool isValidRoundingMode(int X) {
51+
return X >= 0 && X <= static_cast<int>(RoundingMode::Dynamic);
52+
}
53+
54+
inline RoundingMode castToRoundingMode(int X) {
55+
assert(isValidRoundingMode(X));
56+
return static_cast<RoundingMode>(X);
57+
}
58+
5059
/// Returns text representation of the given rounding mode.
5160
inline StringRef spell(RoundingMode RM) {
5261
switch (RM) {

llvm/include/llvm/IR/AutoUpgrade.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ namespace llvm {
111111
/// Upgrade operand bundles (without knowing about their user instruction).
112112
void UpgradeOperandBundles(std::vector<OperandBundleDef> &OperandBundles);
113113

114+
CallBase *upgradeConstrainedFunctionCall(CallBase *CB);
115+
114116
} // End llvm namespace
115117

116118
#endif

llvm/include/llvm/IR/FPEnv.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ enum ExceptionBehavior : uint8_t {
4343

4444
}
4545

46+
inline bool isValidExceptionBehavior(unsigned X) {
47+
return X <= fp::ExceptionBehavior::ebStrict;
48+
}
49+
50+
inline fp::ExceptionBehavior castToExceptionBehavior(unsigned X) {
51+
assert(isValidExceptionBehavior(X));
52+
return static_cast<fp::ExceptionBehavior>(X);
53+
}
54+
4655
/// Returns a valid RoundingMode enumerator when given a string
4756
/// that is valid as input in constrained intrinsic rounding mode
4857
/// metadata.

llvm/include/llvm/IR/IRBuilder.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,9 @@ class IRBuilderBase {
379379

380380
void setConstrainedFPCallAttr(CallBase *I) {
381381
I->addFnAttr(Attribute::StrictFP);
382+
MemoryEffects ME = MemoryEffects::inaccessibleMemOnly();
383+
auto A = Attribute::getWithMemoryEffects(getContext(), ME);
384+
I->addFnAttr(A);
382385
}
383386

384387
void setDefaultOperandBundles(ArrayRef<OperandBundleDef> OpBundles) {
@@ -997,6 +1000,16 @@ class IRBuilderBase {
9971000
ArrayRef<Value *> Args, FMFSource FMFSource = {},
9981001
const Twine &Name = "");
9991002

1003+
/// Create a call to intrinsic \p ID with \p Args, mangled using \p Types and
1004+
/// with operand bundles.
1005+
/// If \p FMFSource is provided, copy fast-math-flags from that instruction to
1006+
/// the intrinsic.
1007+
CallInst *CreateIntrinsic(Intrinsic::ID ID, ArrayRef<Type *> Types,
1008+
ArrayRef<Value *> Args,
1009+
ArrayRef<OperandBundleDef> OpBundles,
1010+
Instruction *FMFSource = nullptr,
1011+
const Twine &Name = "");
1012+
10001013
/// Create a call to intrinsic \p ID with \p RetTy and \p Args. If
10011014
/// \p FMFSource is provided, copy fast-math-flags from that instruction to
10021015
/// the intrinsic.
@@ -1331,6 +1344,15 @@ class IRBuilderBase {
13311344
return I;
13321345
}
13331346

1347+
RoundingMode
1348+
getEffectiveRounding(std::optional<RoundingMode> Rounding = std::nullopt) {
1349+
RoundingMode RM = DefaultConstrainedRounding;
1350+
1351+
if (Rounding)
1352+
RM = *Rounding;
1353+
return RM;
1354+
}
1355+
13341356
Value *getConstrainedFPRounding(std::optional<RoundingMode> Rounding) {
13351357
RoundingMode UseRounding = DefaultConstrainedRounding;
13361358

@@ -1345,6 +1367,14 @@ class IRBuilderBase {
13451367
return MetadataAsValue::get(Context, RoundingMDS);
13461368
}
13471369

1370+
fp::ExceptionBehavior getEffectiveExceptionBehavior(
1371+
std::optional<fp::ExceptionBehavior> Except = std::nullopt) {
1372+
fp::ExceptionBehavior EB = DefaultConstrainedExcept;
1373+
if (Except)
1374+
EB = *Except;
1375+
return EB;
1376+
}
1377+
13481378
Value *getConstrainedFPExcept(std::optional<fp::ExceptionBehavior> Except) {
13491379
std::optional<StringRef> ExceptStr = convertExceptionBehaviorToStr(
13501380
Except.value_or(DefaultConstrainedExcept));
@@ -2485,6 +2515,10 @@ class IRBuilderBase {
24852515
Function *Callee, ArrayRef<Value *> Args, const Twine &Name = "",
24862516
std::optional<RoundingMode> Rounding = std::nullopt,
24872517
std::optional<fp::ExceptionBehavior> Except = std::nullopt);
2518+
CallInst *CreateConstrainedFPCall(
2519+
Intrinsic::ID ID, ArrayRef<Value *> Args, const Twine &Name = "",
2520+
std::optional<RoundingMode> Rounding = std::nullopt,
2521+
std::optional<fp::ExceptionBehavior> Except = std::nullopt);
24882522

24892523
Value *CreateSelect(Value *C, Value *True, Value *False,
24902524
const Twine &Name = "", Instruction *MDFrom = nullptr);
@@ -2688,6 +2722,20 @@ class IRBuilderBase {
26882722
/// Create an assume intrinsic call that represents an dereferencable
26892723
/// assumption on the provided pointer.
26902724
CallInst *CreateDereferenceableAssumption(Value *PtrValue, Value *SizeValue);
2725+
2726+
void
2727+
createFPRoundingBundle(SmallVectorImpl<OperandBundleDef> &Bundles,
2728+
std::optional<RoundingMode> Rounding = std::nullopt) {
2729+
int RM = static_cast<int32_t>(getEffectiveRounding(Rounding));
2730+
Bundles.emplace_back("fpe.round", getInt32(RM));
2731+
}
2732+
2733+
void createFPExceptionBundle(
2734+
SmallVectorImpl<OperandBundleDef> &Bundles,
2735+
std::optional<fp::ExceptionBehavior> Except = std::nullopt) {
2736+
int EB = getEffectiveExceptionBehavior(Except);
2737+
Bundles.emplace_back("fpe.except", getInt32(EB));
2738+
}
26912739
};
26922740

26932741
/// This provides a uniform API for creating instructions and inserting

0 commit comments

Comments
 (0)