-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[clang][bytecode] Implement ia32_select* builtins #154758
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Timm Baeder (tbaederr) ChangesFull diff: https://github.com/llvm/llvm-project/pull/154758.diff 2 Files Affected:
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 2cbebaf7b630e..c3e765cf99aa3 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2778,6 +2778,40 @@ static bool interp__builtin_elementwise_fma(InterpState &S, CodePtr OpPC,
return true;
}
+/// AVX512 predicated move: "Result = Mask[] ? LHS[] : RHS[]".
+static bool interp__builtin_select(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call, unsigned BuiltinID) {
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ PrimType MaskT = *S.getContext().classify(Call->getArg(0));
+ APSInt Mask = popToAPSInt(S.Stk, MaskT);
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ assert(LHS.getNumElems() == RHS.getNumElems());
+ assert(LHS.getNumElems() == Dst.getNumElems());
+ unsigned NumElems = LHS.getNumElems();
+ PrimType ElemT = LHS.getFieldDesc()->getPrimType();
+ PrimType DstElemT = Dst.getFieldDesc()->getPrimType();
+
+ for (unsigned I = 0; I != NumElems; ++I) {
+ if (ElemT == PT_Float) {
+ assert(DstElemT == PT_Float);
+ Dst.elem<Floating>(I) =
+ Mask[I] ? LHS.elem<Floating>(I) : RHS.elem<Floating>(I);
+ } else {
+ APSInt Elem;
+ INT_TYPE_SWITCH(ElemT, {
+ Elem = Mask[I] ? LHS.elem<T>(I).toAPSInt() : RHS.elem<T>(I).toAPSInt();
+ });
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT,
+ { Dst.elem<T>(I) = static_cast<T>(Elem); });
+ }
+ }
+ Dst.initializeAllElements();
+
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3210,9 +3244,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_pmuludq256:
case clang::X86::BI__builtin_ia32_pmuludq512:
return interp__builtin_ia32_pmul(S, OpPC, Call, BuiltinID);
+
case Builtin::BI__builtin_elementwise_fma:
return interp__builtin_elementwise_fma(S, OpPC, Call);
+ case X86::BI__builtin_ia32_selectb_128:
+ case X86::BI__builtin_ia32_selectb_256:
+ case X86::BI__builtin_ia32_selectb_512:
+ case X86::BI__builtin_ia32_selectw_128:
+ case X86::BI__builtin_ia32_selectw_256:
+ case X86::BI__builtin_ia32_selectw_512:
+ case X86::BI__builtin_ia32_selectd_128:
+ case X86::BI__builtin_ia32_selectd_256:
+ case X86::BI__builtin_ia32_selectd_512:
+ case X86::BI__builtin_ia32_selectq_128:
+ case X86::BI__builtin_ia32_selectq_256:
+ case X86::BI__builtin_ia32_selectq_512:
+ case X86::BI__builtin_ia32_selectph_128:
+ case X86::BI__builtin_ia32_selectph_256:
+ case X86::BI__builtin_ia32_selectph_512:
+ case X86::BI__builtin_ia32_selectpbf_128:
+ case X86::BI__builtin_ia32_selectpbf_256:
+ case X86::BI__builtin_ia32_selectpbf_512:
+ case X86::BI__builtin_ia32_selectps_128:
+ case X86::BI__builtin_ia32_selectps_256:
+ case X86::BI__builtin_ia32_selectps_512:
+ case X86::BI__builtin_ia32_selectpd_128:
+ case X86::BI__builtin_ia32_selectpd_256:
+ case X86::BI__builtin_ia32_selectpd_512:
+ return interp__builtin_select(S, OpPC, Call, BuiltinID);
+
default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 14a4bbf712f8c..29916be23cbae 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3,6 +3,11 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+
#include <immintrin.h>
#include "builtin_test_helpers.h"
|
|
@tbaederr for the new interpreter is the plan to always have target builtins in the same file and switch statement - or should we split them off into their own files like CodeGen did with EmitTargetArchBuiltinExpr? |
|
I don't see a good reason for different implementation files right now, |
|
There are a decent number of x86 intrinsics I'm hoping to support in constexpr - we'll have to see if we step over any kind of tolerance limit :/ |
| /// AVX512 predicated move: "Result = Mask[] ? LHS[] : RHS[]". | ||
| static bool interp__builtin_select(InterpState &S, CodePtr OpPC, | ||
| const CallExpr *Call) { | ||
| const Pointer &RHS = S.Stk.pop<Pointer>(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
assert(Call->getNumArgs() == 1);?
| Mask[I] ? LHS.elem<Floating>(I) : RHS.elem<Floating>(I); | ||
| } else { | ||
| APSInt Elem; | ||
| INT_TYPE_SWITCH(ElemT, { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hello, I am new to LLVM/Clang. I want to ask why there is an INT_TYPE_SWITCH and then another INT_TYPE_SWITCH_NO_BOOL. Is it possible that ElemT is PT_Bool? I checked BuiltinsX86.td and all these select buitins require vectors of integers as their parameters.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't remember anymore, but looking at this now, I'd probably use one only one of those switches. If it can't be bool, there should be an assert somewhere.
No description provided.