@@ -1179,3 +1179,89 @@ void test_builtin_elementwise_fma(float f32, double f64,
11791179 half2 tmp2_v2f16 = __builtin_elementwise_fma (v2f16 , v2f16 , (half2 )4.0 );
11801180
11811181}
1182+
1183+ void test_builtin_elementwise_fshl (long long int i1 , long long int i2 ,
1184+ long long int i3 , unsigned short us1 ,
1185+ unsigned short us2 , unsigned short us3 ,
1186+ char c1 , char c2 , char c3 ,
1187+ unsigned char uc1 , unsigned char uc2 ,
1188+ unsigned char uc3 , si8 vi1 , si8 vi2 ,
1189+ si8 vi3 , u4 vu1 , u4 vu2 , u4 vu3 ) {
1190+ // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr
1191+ // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr
1192+ // CHECK-NEXT: [[I3:%.+]] = load i64, ptr %i3.addr
1193+ // CHECK-NEXT: [[I4:%.+]] = call i64 @llvm.fshl.i64(i64 [[I1]], i64 [[I2]], i64 [[I3]])
1194+ // CHECK-NEXT: store i64 [[I4]], ptr %tmp_lli_l
1195+ // CHECK-NEXT: [[I5:%.+]] = load i64, ptr %i1.addr
1196+ // CHECK-NEXT: [[I6:%.+]] = load i64, ptr %i2.addr
1197+ // CHECK-NEXT: [[I7:%.+]] = load i64, ptr %i3.addr
1198+ // CHECK-NEXT: [[I8:%.+]] = call i64 @llvm.fshr.i64(i64 [[I5]], i64 [[I6]], i64 [[I7]])
1199+ // CHECK-NEXT: store i64 [[I8]], ptr %tmp_lli_r
1200+ long long int tmp_lli_l = __builtin_elementwise_fshl (i1 , i2 , i3 );
1201+ long long int tmp_lli_r = __builtin_elementwise_fshr (i1 , i2 , i3 );
1202+
1203+ // CHECK: [[US1:%.+]] = load i16, ptr %us1.addr
1204+ // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr
1205+ // CHECK-NEXT: [[US3:%.+]] = load i16, ptr %us3.addr
1206+ // CHECK-NEXT: [[US4:%.+]] = call i16 @llvm.fshl.i16(i16 [[US1]], i16 [[US2]], i16 [[US3]])
1207+ // CHECK-NEXT: store i16 [[US4]], ptr %tmp_usi_l
1208+ // CHECK-NEXT: [[US5:%.+]] = load i16, ptr %us1.addr
1209+ // CHECK-NEXT: [[US6:%.+]] = load i16, ptr %us2.addr
1210+ // CHECK-NEXT: [[US7:%.+]] = load i16, ptr %us3.addr
1211+ // CHECK-NEXT: [[US8:%.+]] = call i16 @llvm.fshr.i16(i16 [[US5]], i16 [[US6]], i16 [[US7]])
1212+ // CHECK-NEXT: store i16 [[US8]], ptr %tmp_usi_r
1213+ unsigned short tmp_usi_l = __builtin_elementwise_fshl (us1 , us2 , us3 );
1214+ unsigned short tmp_usi_r = __builtin_elementwise_fshr (us1 , us2 , us3 );
1215+
1216+ // CHECK: [[C1:%.+]] = load i8, ptr %c1.addr
1217+ // CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr
1218+ // CHECK-NEXT: [[C3:%.+]] = load i8, ptr %c3.addr
1219+ // CHECK-NEXT: [[C4:%.+]] = call i8 @llvm.fshl.i8(i8 [[C1]], i8 [[C2]], i8 [[C3]])
1220+ // CHECK-NEXT: store i8 [[C4]], ptr %tmp_c_l
1221+ // CHECK-NEXT: [[C5:%.+]] = load i8, ptr %c1.addr
1222+ // CHECK-NEXT: [[C6:%.+]] = load i8, ptr %c2.addr
1223+ // CHECK-NEXT: [[C7:%.+]] = load i8, ptr %c3.addr
1224+ // CHECK-NEXT: [[C8:%.+]] = call i8 @llvm.fshr.i8(i8 [[C5]], i8 [[C6]], i8 [[C7]])
1225+ // CHECK-NEXT: store i8 [[C8]], ptr %tmp_c_r
1226+ char tmp_c_l = __builtin_elementwise_fshl (c1 , c2 , c3 );
1227+ char tmp_c_r = __builtin_elementwise_fshr (c1 , c2 , c3 );
1228+
1229+ // CHECK: [[UC1:%.+]] = load i8, ptr %uc1.addr
1230+ // CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr
1231+ // CHECK-NEXT: [[UC3:%.+]] = load i8, ptr %uc3.addr
1232+ // CHECK-NEXT: [[UC4:%.+]] = call i8 @llvm.fshl.i8(i8 [[UC1]], i8 [[UC2]], i8 [[UC3]])
1233+ // CHECK-NEXT: store i8 [[UC4]], ptr %tmp_uc_l
1234+ // CHECK-NEXT: [[UC5:%.+]] = load i8, ptr %uc1.addr
1235+ // CHECK-NEXT: [[UC6:%.+]] = load i8, ptr %uc2.addr
1236+ // CHECK-NEXT: [[UC7:%.+]] = load i8, ptr %uc3.addr
1237+ // CHECK-NEXT: [[UC8:%.+]] = call i8 @llvm.fshr.i8(i8 [[UC5]], i8 [[UC6]], i8 [[UC7]])
1238+ // CHECK-NEXT: store i8 [[UC8]], ptr %tmp_uc_r
1239+ unsigned char tmp_uc_l = __builtin_elementwise_fshl (uc1 , uc2 , uc3 );
1240+ unsigned char tmp_uc_r = __builtin_elementwise_fshr (uc1 , uc2 , uc3 );
1241+
1242+ // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr
1243+ // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr
1244+ // CHECK-NEXT: [[VI3:%.+]] = load <8 x i16>, ptr %vi3.addr
1245+ // CHECK-NEXT: [[VI4:%.+]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]], <8 x i16> [[VI3]])
1246+ // CHECK-NEXT: store <8 x i16> [[VI4]], ptr %tmp_vi_l
1247+ // CHECK-NEXT: [[VI5:%.+]] = load <8 x i16>, ptr %vi1.addr
1248+ // CHECK-NEXT: [[VI6:%.+]] = load <8 x i16>, ptr %vi2.addr
1249+ // CHECK-NEXT: [[VI7:%.+]] = load <8 x i16>, ptr %vi3.addr
1250+ // CHECK-NEXT: [[VI8:%.+]] = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> [[VI5]], <8 x i16> [[VI6]], <8 x i16> [[VI7]])
1251+ // CHECK-NEXT: store <8 x i16> [[VI8]], ptr %tmp_vi_r
1252+ si8 tmp_vi_l = __builtin_elementwise_fshl (vi1 , vi2 , vi3 );
1253+ si8 tmp_vi_r = __builtin_elementwise_fshr (vi1 , vi2 , vi3 );
1254+
1255+ // CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr
1256+ // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr
1257+ // CHECK-NEXT: [[VU3:%.+]] = load <4 x i32>, ptr %vu3.addr
1258+ // CHECK-NEXT: [[VU4:%.+]] = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]], <4 x i32> [[VU3]])
1259+ // CHECK-NEXT: store <4 x i32> [[VU4]], ptr %tmp_vu_l
1260+ // CHECK-NEXT: [[VU5:%.+]] = load <4 x i32>, ptr %vu1.addr
1261+ // CHECK-NEXT: [[VU6:%.+]] = load <4 x i32>, ptr %vu2.addr
1262+ // CHECK-NEXT: [[VU7:%.+]] = load <4 x i32>, ptr %vu3.addr
1263+ // CHECK-NEXT: [[VU8:%.+]] = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> [[VU5]], <4 x i32> [[VU6]], <4 x i32> [[VU7]])
1264+ // CHECK-NEXT: store <4 x i32> [[VU8]], ptr %tmp_vu_r
1265+ u4 tmp_vu_l = __builtin_elementwise_fshl (vu1 , vu2 , vu3 );
1266+ u4 tmp_vu_r = __builtin_elementwise_fshr (vu1 , vu2 , vu3 );
1267+ }
0 commit comments