Skip to content

Commit e1ad3ca

Browse files
author
z1_cciauto
authored
merge main into amd-staging (llvm#3548)
2 parents efdc65b + 429f6e7 commit e1ad3ca

File tree

209 files changed

+4761
-1158
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

209 files changed

+4761
-1158
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,15 @@ of different sizes and signs is forbidden in binary and ternary builtins.
857857
semantics, see `LangRef
858858
<http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
859859
for the comparison.
860+
T __builtin_elementwise_fshl(T x, T y, T z) perform a funnel shift left. Concatenate x and y (x is the most integer types
861+
significant bits of the wide value), the combined value is shifted
862+
left by z, and the most significant bits are extracted to produce
863+
a result that is the same size as the original arguments.
864+
865+
T __builtin_elementwise_fshr(T x, T y, T z) perform a funnel shift right. Concatenate x and y (x is the most integer types
866+
significant bits of the wide value), the combined value is shifted
867+
right by z, and the least significant bits are extracted to produce
868+
a result that is the same size as the original arguments.
860869
============================================== ====================================================================== =========================================
861870

862871

clang/docs/OpenMPSupport.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ implementation.
191191
+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
192192
| device | teams construct on the host device | :good:`done` | r371553 |
193193
+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
194-
| device | support non-contiguous array sections for target update | :good:`done` | |
194+
| device | support non-contiguous array sections for target update | :good:`done` | https://github.com/llvm/llvm-project/pull/144635 |
195195
+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
196196
| device | pointer attachment | :good:`done` | |
197197
+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+

clang/docs/ReleaseNotes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ C23 Feature Support
107107

108108
Non-comprehensive list of changes in this release
109109
-------------------------------------------------
110+
- Added ``__builtin_elementwise_fshl`` and ``__builtin_elementwise_fshr``.
111+
110112
- Added ``__builtin_elementwise_minnumnum`` and ``__builtin_elementwise_maxnumnum``.
111113

112114
- Trapping UBSan (e.g. ``-fsanitize-trap=undefined``) now emits a string describing the reason for
@@ -563,6 +565,7 @@ OpenMP Support
563565
- Added parsing and semantic analysis support for the ``need_device_addr``
564566
modifier in the ``adjust_args`` clause.
565567
- Allow array length to be omitted in array section subscript expression.
568+
- Fixed non-contiguous strided update in the ``omp target update`` directive with the ``from`` clause.
566569

567570
Improvements
568571
^^^^^^^^^^^^

clang/include/clang/Basic/Builtins.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,6 +1514,18 @@ def ElementwiseSubSat : Builtin {
15141514
let Prototype = "void(...)";
15151515
}
15161516

1517+
def ElementwiseFshl : Builtin {
1518+
let Spellings = ["__builtin_elementwise_fshl"];
1519+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1520+
let Prototype = "void(...)";
1521+
}
1522+
1523+
def ElementwiseFshr : Builtin {
1524+
let Spellings = ["__builtin_elementwise_fshr"];
1525+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1526+
let Prototype = "void(...)";
1527+
}
1528+
15171529
def ReduceMax : Builtin {
15181530
let Spellings = ["__builtin_reduce_max"];
15191531
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -757,14 +757,6 @@ let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
757757
def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">;
758758
}
759759

760-
let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
761-
def vcvtph2ps : X86Builtin<"_Vector<4, float>(_Vector<8, short>)">;
762-
}
763-
764-
let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
765-
def vcvtph2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, short>)">;
766-
}
767-
768760
let Features = "rdrnd", Attributes = [NoThrow] in {
769761
def rdrand16_step : X86Builtin<"unsigned int(unsigned short *)">;
770762
def rdrand32_step : X86Builtin<"unsigned int(unsigned int *)">;

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4030,6 +4030,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
40304030
case Builtin::BI__builtin_elementwise_fma:
40314031
return RValue::get(
40324032
emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fma));
4033+
case Builtin::BI__builtin_elementwise_fshl:
4034+
return RValue::get(
4035+
emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshl));
4036+
case Builtin::BI__builtin_elementwise_fshr:
4037+
return RValue::get(
4038+
emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshr));
4039+
40334040
case Builtin::BI__builtin_elementwise_add_sat:
40344041
case Builtin::BI__builtin_elementwise_sub_sat: {
40354042
Value *Op0 = EmitScalarExpr(E->getArg(0));

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7535,7 +7535,32 @@ class MappableExprsHandler {
75357535
// dimension.
75367536
uint64_t DimSize = 1;
75377537

7538-
bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7538+
// Detects non-contiguous updates due to strided accesses.
7539+
// Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
7540+
// correctly when generating information to be passed to the runtime. The
7541+
// flag is set to true if any array section has a stride not equal to 1, or
7542+
// if the stride is not a constant expression (conservatively assumed
7543+
// non-contiguous).
7544+
bool IsNonContiguous =
7545+
CombinedInfo.NonContigInfo.IsNonContiguous ||
7546+
any_of(Components, [&](const auto &Component) {
7547+
const auto *OASE =
7548+
dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
7549+
if (!OASE)
7550+
return false;
7551+
7552+
const Expr *StrideExpr = OASE->getStride();
7553+
if (!StrideExpr)
7554+
return false;
7555+
7556+
const auto Constant =
7557+
StrideExpr->getIntegerConstantExpr(CGF.getContext());
7558+
if (!Constant)
7559+
return false;
7560+
7561+
return !Constant->isOne();
7562+
});
7563+
75397564
bool IsPrevMemberReference = false;
75407565

75417566
bool IsPartialMapped =

clang/lib/CodeGen/TargetBuiltins/X86.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2841,8 +2841,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
28412841
return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
28422842

28432843
// f16c half2float intrinsics
2844-
case X86::BI__builtin_ia32_vcvtph2ps:
2845-
case X86::BI__builtin_ia32_vcvtph2ps256:
28462844
case X86::BI__builtin_ia32_vcvtph2ps_mask:
28472845
case X86::BI__builtin_ia32_vcvtph2ps256_mask:
28482846
case X86::BI__builtin_ia32_vcvtph2ps512_mask: {

clang/lib/Headers/emmintrin.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3381,7 +3381,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) {
33813381
/// \param __a
33823382
/// A 32-bit signed integer operand.
33833383
/// \returns A 128-bit vector of [4 x i32].
3384-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) {
3384+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3385+
_mm_cvtsi32_si128(int __a) {
33853386
return __extension__(__m128i)(__v4si){__a, 0, 0, 0};
33863387
}
33873388

@@ -3396,7 +3397,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) {
33963397
/// \param __a
33973398
/// A 64-bit signed integer operand containing the value to be converted.
33983399
/// \returns A 128-bit vector of [2 x i64] containing the converted value.
3399-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) {
3400+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3401+
_mm_cvtsi64_si128(long long __a) {
34003402
return __extension__(__m128i)(__v2di){__a, 0};
34013403
}
34023404

@@ -3411,7 +3413,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) {
34113413
/// A vector of [4 x i32]. The least significant 32 bits are moved to the
34123414
/// destination.
34133415
/// \returns A 32-bit signed integer containing the moved value.
3414-
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) {
3416+
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
3417+
_mm_cvtsi128_si32(__m128i __a) {
34153418
__v4si __b = (__v4si)__a;
34163419
return __b[0];
34173420
}
@@ -3427,7 +3430,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) {
34273430
/// A vector of [2 x i64]. The least significant 64 bits are moved to the
34283431
/// destination.
34293432
/// \returns A 64-bit signed integer containing the moved value.
3430-
static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a) {
3433+
static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR
3434+
_mm_cvtsi128_si64(__m128i __a) {
34313435
return __a[0];
34323436
}
34333437

clang/lib/Headers/f16cintrin.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,7 @@
3838
static __inline float __DEFAULT_FN_ATTRS128
3939
_cvtsh_ss(unsigned short __a)
4040
{
41-
__v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
42-
__v4sf __r = __builtin_ia32_vcvtph2ps(__v);
43-
return __r[0];
41+
return (float)__builtin_bit_cast(__fp16, __a);
4442
}
4543

4644
/// Converts a 32-bit single-precision float value to a 16-bit
@@ -109,7 +107,10 @@ _cvtsh_ss(unsigned short __a)
109107
static __inline __m128 __DEFAULT_FN_ATTRS128
110108
_mm_cvtph_ps(__m128i __a)
111109
{
112-
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
110+
typedef __fp16 __v4fp16 __attribute__((__vector_size__(8)));
111+
112+
__v4hi __v = __builtin_shufflevector((__v8hi)__a, (__v8hi)__a, 0, 1, 2, 3);
113+
return (__m128) __builtin_convertvector((__v4fp16)__v, __v4sf);
113114
}
114115

115116
/// Converts a 256-bit vector of [8 x float] into a 128-bit vector
@@ -153,7 +154,9 @@ _mm_cvtph_ps(__m128i __a)
153154
static __inline __m256 __DEFAULT_FN_ATTRS256
154155
_mm256_cvtph_ps(__m128i __a)
155156
{
156-
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
157+
typedef __fp16 __v8fp16 __attribute__((__vector_size__(16), __aligned__(16)));
158+
159+
return (__m256) __builtin_convertvector((__v8fp16)__a, __v8sf);
157160
}
158161

159162
#undef __DEFAULT_FN_ATTRS128

0 commit comments

Comments
 (0)