Skip to content

Commit c3bf73b

Browse files
authored
[clang] Add elementwise fshl/fshr builtins (#153113)
This patch implements `__builtin_elementwise_fshl` and `__builtin_elementwise_fshr` builtins. These map to the fshl/fshr intrinsics described here: - https://llvm.org/docs/LangRef.html#llvm-fshl-intrinsic - https://llvm.org/docs/LangRef.html#llvm-fshr-intrinsic Fixes #152555.
1 parent 1840106 commit c3bf73b

File tree

7 files changed

+158
-0
lines changed

7 files changed

+158
-0
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,15 @@ of different sizes and signs is forbidden in binary and ternary builtins.
860860
semantics, see `LangRef
861861
<http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
862862
for the comparison.
863+
T __builtin_elementwise_fshl(T x, T y, T z) perform a funnel shift left. Concatenate x and y (x is the most integer types
864+
significant bits of the wide value), the combined value is shifted
865+
left by z, and the most significant bits are extracted to produce
866+
a result that is the same size as the original arguments.
867+
868+
T __builtin_elementwise_fshr(T x, T y, T z) perform a funnel shift right. Concatenate x and y (x is the most integer types
869+
significant bits of the wide value), the combined value is shifted
870+
right by z, and the least significant bits are extracted to produce
871+
a result that is the same size as the original arguments.
863872
============================================== ====================================================================== =========================================
864873

865874

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ C23 Feature Support
113113

114114
Non-comprehensive list of changes in this release
115115
-------------------------------------------------
116+
- Added ``__builtin_elementwise_fshl`` and ``__builtin_elementwise_fshr``.
117+
116118
- Added ``__builtin_elementwise_minnumnum`` and ``__builtin_elementwise_maxnumnum``.
117119

118120
- Trapping UBSan (e.g. ``-fsanitize-trap=undefined``) now emits a string describing the reason for

clang/include/clang/Basic/Builtins.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,6 +1514,18 @@ def ElementwiseSubSat : Builtin {
15141514
let Prototype = "void(...)";
15151515
}
15161516

1517+
def ElementwiseFshl : Builtin {
1518+
let Spellings = ["__builtin_elementwise_fshl"];
1519+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1520+
let Prototype = "void(...)";
1521+
}
1522+
1523+
def ElementwiseFshr : Builtin {
1524+
let Spellings = ["__builtin_elementwise_fshr"];
1525+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1526+
let Prototype = "void(...)";
1527+
}
1528+
15171529
def ReduceMax : Builtin {
15181530
let Spellings = ["__builtin_reduce_max"];
15191531
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4030,6 +4030,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
40304030
case Builtin::BI__builtin_elementwise_fma:
40314031
return RValue::get(
40324032
emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fma));
4033+
case Builtin::BI__builtin_elementwise_fshl:
4034+
return RValue::get(
4035+
emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshl));
4036+
case Builtin::BI__builtin_elementwise_fshr:
4037+
return RValue::get(
4038+
emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshr));
4039+
40334040
case Builtin::BI__builtin_elementwise_add_sat:
40344041
case Builtin::BI__builtin_elementwise_sub_sat: {
40354042
Value *Op0 = EmitScalarExpr(E->getArg(0));

clang/lib/Sema/SemaChecking.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3031,6 +3031,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
30313031
EltwiseBuiltinArgTyRestriction::IntegerTy))
30323032
return ExprError();
30333033
break;
3034+
case Builtin::BI__builtin_elementwise_fshl:
3035+
case Builtin::BI__builtin_elementwise_fshr:
3036+
if (BuiltinElementwiseTernaryMath(
3037+
TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy))
3038+
return ExprError();
3039+
break;
30343040
case Builtin::BI__builtin_elementwise_min:
30353041
case Builtin::BI__builtin_elementwise_max:
30363042
if (BuiltinElementwiseMath(TheCall))

clang/test/CodeGen/builtins-elementwise-math.c

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,3 +1179,89 @@ void test_builtin_elementwise_fma(float f32, double f64,
11791179
half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0);
11801180

11811181
}
1182+
1183+
void test_builtin_elementwise_fshl(long long int i1, long long int i2,
1184+
long long int i3, unsigned short us1,
1185+
unsigned short us2, unsigned short us3,
1186+
char c1, char c2, char c3,
1187+
unsigned char uc1, unsigned char uc2,
1188+
unsigned char uc3, si8 vi1, si8 vi2,
1189+
si8 vi3, u4 vu1, u4 vu2, u4 vu3) {
1190+
// CHECK: [[I1:%.+]] = load i64, ptr %i1.addr
1191+
// CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr
1192+
// CHECK-NEXT: [[I3:%.+]] = load i64, ptr %i3.addr
1193+
// CHECK-NEXT: [[I4:%.+]] = call i64 @llvm.fshl.i64(i64 [[I1]], i64 [[I2]], i64 [[I3]])
1194+
// CHECK-NEXT: store i64 [[I4]], ptr %tmp_lli_l
1195+
// CHECK-NEXT: [[I5:%.+]] = load i64, ptr %i1.addr
1196+
// CHECK-NEXT: [[I6:%.+]] = load i64, ptr %i2.addr
1197+
// CHECK-NEXT: [[I7:%.+]] = load i64, ptr %i3.addr
1198+
// CHECK-NEXT: [[I8:%.+]] = call i64 @llvm.fshr.i64(i64 [[I5]], i64 [[I6]], i64 [[I7]])
1199+
// CHECK-NEXT: store i64 [[I8]], ptr %tmp_lli_r
1200+
long long int tmp_lli_l = __builtin_elementwise_fshl(i1, i2, i3);
1201+
long long int tmp_lli_r = __builtin_elementwise_fshr(i1, i2, i3);
1202+
1203+
// CHECK: [[US1:%.+]] = load i16, ptr %us1.addr
1204+
// CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr
1205+
// CHECK-NEXT: [[US3:%.+]] = load i16, ptr %us3.addr
1206+
// CHECK-NEXT: [[US4:%.+]] = call i16 @llvm.fshl.i16(i16 [[US1]], i16 [[US2]], i16 [[US3]])
1207+
// CHECK-NEXT: store i16 [[US4]], ptr %tmp_usi_l
1208+
// CHECK-NEXT: [[US5:%.+]] = load i16, ptr %us1.addr
1209+
// CHECK-NEXT: [[US6:%.+]] = load i16, ptr %us2.addr
1210+
// CHECK-NEXT: [[US7:%.+]] = load i16, ptr %us3.addr
1211+
// CHECK-NEXT: [[US8:%.+]] = call i16 @llvm.fshr.i16(i16 [[US5]], i16 [[US6]], i16 [[US7]])
1212+
// CHECK-NEXT: store i16 [[US8]], ptr %tmp_usi_r
1213+
unsigned short tmp_usi_l = __builtin_elementwise_fshl(us1, us2, us3);
1214+
unsigned short tmp_usi_r = __builtin_elementwise_fshr(us1, us2, us3);
1215+
1216+
// CHECK: [[C1:%.+]] = load i8, ptr %c1.addr
1217+
// CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr
1218+
// CHECK-NEXT: [[C3:%.+]] = load i8, ptr %c3.addr
1219+
// CHECK-NEXT: [[C4:%.+]] = call i8 @llvm.fshl.i8(i8 [[C1]], i8 [[C2]], i8 [[C3]])
1220+
// CHECK-NEXT: store i8 [[C4]], ptr %tmp_c_l
1221+
// CHECK-NEXT: [[C5:%.+]] = load i8, ptr %c1.addr
1222+
// CHECK-NEXT: [[C6:%.+]] = load i8, ptr %c2.addr
1223+
// CHECK-NEXT: [[C7:%.+]] = load i8, ptr %c3.addr
1224+
// CHECK-NEXT: [[C8:%.+]] = call i8 @llvm.fshr.i8(i8 [[C5]], i8 [[C6]], i8 [[C7]])
1225+
// CHECK-NEXT: store i8 [[C8]], ptr %tmp_c_r
1226+
char tmp_c_l = __builtin_elementwise_fshl(c1, c2, c3);
1227+
char tmp_c_r = __builtin_elementwise_fshr(c1, c2, c3);
1228+
1229+
// CHECK: [[UC1:%.+]] = load i8, ptr %uc1.addr
1230+
// CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr
1231+
// CHECK-NEXT: [[UC3:%.+]] = load i8, ptr %uc3.addr
1232+
// CHECK-NEXT: [[UC4:%.+]] = call i8 @llvm.fshl.i8(i8 [[UC1]], i8 [[UC2]], i8 [[UC3]])
1233+
// CHECK-NEXT: store i8 [[UC4]], ptr %tmp_uc_l
1234+
// CHECK-NEXT: [[UC5:%.+]] = load i8, ptr %uc1.addr
1235+
// CHECK-NEXT: [[UC6:%.+]] = load i8, ptr %uc2.addr
1236+
// CHECK-NEXT: [[UC7:%.+]] = load i8, ptr %uc3.addr
1237+
// CHECK-NEXT: [[UC8:%.+]] = call i8 @llvm.fshr.i8(i8 [[UC5]], i8 [[UC6]], i8 [[UC7]])
1238+
// CHECK-NEXT: store i8 [[UC8]], ptr %tmp_uc_r
1239+
unsigned char tmp_uc_l = __builtin_elementwise_fshl(uc1, uc2, uc3);
1240+
unsigned char tmp_uc_r = __builtin_elementwise_fshr(uc1, uc2, uc3);
1241+
1242+
// CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr
1243+
// CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr
1244+
// CHECK-NEXT: [[VI3:%.+]] = load <8 x i16>, ptr %vi3.addr
1245+
// CHECK-NEXT: [[VI4:%.+]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]], <8 x i16> [[VI3]])
1246+
// CHECK-NEXT: store <8 x i16> [[VI4]], ptr %tmp_vi_l
1247+
// CHECK-NEXT: [[VI5:%.+]] = load <8 x i16>, ptr %vi1.addr
1248+
// CHECK-NEXT: [[VI6:%.+]] = load <8 x i16>, ptr %vi2.addr
1249+
// CHECK-NEXT: [[VI7:%.+]] = load <8 x i16>, ptr %vi3.addr
1250+
// CHECK-NEXT: [[VI8:%.+]] = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> [[VI5]], <8 x i16> [[VI6]], <8 x i16> [[VI7]])
1251+
// CHECK-NEXT: store <8 x i16> [[VI8]], ptr %tmp_vi_r
1252+
si8 tmp_vi_l = __builtin_elementwise_fshl(vi1, vi2, vi3);
1253+
si8 tmp_vi_r = __builtin_elementwise_fshr(vi1, vi2, vi3);
1254+
1255+
// CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr
1256+
// CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr
1257+
// CHECK-NEXT: [[VU3:%.+]] = load <4 x i32>, ptr %vu3.addr
1258+
// CHECK-NEXT: [[VU4:%.+]] = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]], <4 x i32> [[VU3]])
1259+
// CHECK-NEXT: store <4 x i32> [[VU4]], ptr %tmp_vu_l
1260+
// CHECK-NEXT: [[VU5:%.+]] = load <4 x i32>, ptr %vu1.addr
1261+
// CHECK-NEXT: [[VU6:%.+]] = load <4 x i32>, ptr %vu2.addr
1262+
// CHECK-NEXT: [[VU7:%.+]] = load <4 x i32>, ptr %vu3.addr
1263+
// CHECK-NEXT: [[VU8:%.+]] = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> [[VU5]], <4 x i32> [[VU6]], <4 x i32> [[VU7]])
1264+
// CHECK-NEXT: store <4 x i32> [[VU8]], ptr %tmp_vu_r
1265+
u4 tmp_vu_l = __builtin_elementwise_fshl(vu1, vu2, vu3);
1266+
u4 tmp_vu_r = __builtin_elementwise_fshr(vu1, vu2, vu3);
1267+
}

clang/test/Sema/builtins-elementwise-math.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,42 @@ void test_builtin_elementwise_fma(int i32, int2 v2i32, short i16,
12941294
// expected-error@-1 {{3rd argument must be a scalar or vector of floating-point types (was '_Complex float')}}
12951295
}
12961296

1297+
void test_builtin_elementwise_fsh(int i32, int2 v2i32, short i16, int3 v3i32,
1298+
double f64, float f32, float2 v2f32) {
1299+
i32 = __builtin_elementwise_fshl();
1300+
// expected-error@-1 {{too few arguments to function call, expected 3, have 0}}
1301+
1302+
i32 = __builtin_elementwise_fshr();
1303+
// expected-error@-1 {{too few arguments to function call, expected 3, have 0}}
1304+
1305+
i32 = __builtin_elementwise_fshl(i32, i32);
1306+
// expected-error@-1 {{too few arguments to function call, expected 3, have 2}}
1307+
1308+
i32 = __builtin_elementwise_fshr(i32, i32);
1309+
// expected-error@-1 {{too few arguments to function call, expected 3, have 2}}
1310+
1311+
i32 = __builtin_elementwise_fshl(i32, i32, i16);
1312+
// expected-error@-1 {{arguments are of different types ('int' vs 'short')}}
1313+
1314+
i16 = __builtin_elementwise_fshr(i16, i32, i16);
1315+
// expected-error@-1 {{arguments are of different types ('short' vs 'int')}}
1316+
1317+
f32 = __builtin_elementwise_fshl(f32, f32, f32);
1318+
// expected-error@-1 {{argument must be a scalar or vector of integer types (was 'float')}}
1319+
1320+
f64 = __builtin_elementwise_fshr(f64, f64, f64);
1321+
// expected-error@-1 {{argument must be a scalar or vector of integer types (was 'double')}}
1322+
1323+
v2i32 = __builtin_elementwise_fshl(v2i32, v2i32, v2f32);
1324+
// expected-error@-1 {{argument must be a scalar or vector of integer types (was 'float2' (vector of 2 'float' values))}}
1325+
1326+
v2i32 = __builtin_elementwise_fshr(v2i32, v2i32, v3i32);
1327+
// expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int3' (vector of 3 'int' values))}}
1328+
1329+
v3i32 = __builtin_elementwise_fshl(v3i32, v3i32, v2i32);
1330+
// expected-error@-1 {{arguments are of different types ('int3' (vector of 3 'int' values) vs 'int2' (vector of 2 'int' values))}}
1331+
}
1332+
12971333
typedef struct {
12981334
float3 b;
12991335
} struct_float3;

0 commit comments

Comments
 (0)