-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Closed
Description
The llvm.fshl intrinsic is neat. Sadly, the s390x backend does not pick good instructions for it.
https://godbolt.org/z/j6Kcob8oa (or https://godbolt.org/z/eq7Gd4WKG for a pure LLVM IR example)
e.g. this LLVM IR
define noundef <16 x i8> @vec_sld_manual(<16 x i8> %a, <16 x i8> %b) unnamed_addr {
start:
%0 = bitcast <16 x i8> %a to i128
%1 = bitcast <16 x i8> %b to i128
%_3 = tail call noundef i128 @llvm.fshl.i128(i128 noundef %0, i128 noundef %1, i128 noundef 32) #4
%2 = bitcast i128 %_3 to <16 x i8>
ret <16 x i8> %2
}
define <16 x i8> @vec_sld_builtin(<16 x i8> %a, <16 x i8> %b) unnamed_addr {
start:
%_0 = tail call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 noundef 4) #4
ret <16 x i8> %_0
}
declare i128 @llvm.fshl.i128(i128, i128, i128) unnamed_addr #2
declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32 immarg) unnamed_addr #3A llvm.fshl.i128 with a compile-time known value that is a multiple of 8 can be lowered to a vsldb, but that does not happen
vec_sld_manual:
vrepib %v0, 96
vsrlb %v0, %v26, %v0
vrepib %v1, 32
vslb %v1, %v24, %v1
vo %v24, %v1, %v0
br %r14
vec_sld_builtin:
vsldb %v24, %v24, %v26, 4
br %r14analogous simplifications are possible for vec_sldw and vec_sldb