-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Description
| Bugzilla Link | 50305 |
| Version | trunk |
| OS | Linux |
| CC | @RKSimon,@zygoloid,@tlively |
Extended Description
See https://godbolt.org/z/Ms9E4nPhM
Given the following code:
typedef uint8_t u8;
typedef uint16_t u16;
typedef u8 u8x16 attribute((vector_size(16)));
typedef u16 u16x8 attribute((vector_size(16)));
typedef struct {
u8x16 counter, shift;
} A;
void bad(A* a) {
u8x16 active = a->counter == 0;
a->counter -= 1 & ~active;
a->shift = ((a->shift << 1) & active) | (a->shift & ~active);
}
Clang seems to prefer to generate a variable shift in the LLVM IR (see %11), which then cannot be lowered efficiently in x86 SSE3/Wasm:
define dso_local void @_Z3badP1A(%struct.A* nocapture %0) local_unnamed_addr #0 !dbg !267 {
call void @llvm.dbg.value(metadata %struct.A* %0, metadata !285, metadata !DIExpression()), !dbg !287
%2 = getelementptr inbounds %struct.A, %struct.A* %0, i64 0, i32 0, !dbg !288
%3 = load <16 x i8>, <16 x i8>* %2, align 16, !dbg !288, !tbaa !289
%4 = icmp ne <16 x i8> %3, zeroinitializer, !dbg !292
call void @llvm.dbg.value(metadata <16 x i8> undef, metadata !286, metadata !DIExpression()), !dbg !287
%5 = sext <16 x i1> %4 to <16 x i8>, !dbg !293
%6 = add <16 x i8> %3, %5, !dbg !294
store <16 x i8> %6, <16 x i8>* %2, align 16, !dbg !294, !tbaa !289
%7 = getelementptr inbounds %struct.A, %struct.A* %0, i64 0, i32 1, !dbg !295
%8 = load <16 x i8>, <16 x i8>* %7, align 16, !dbg !295, !tbaa !289
%9 = xor <16 x i1> %4, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, !dbg !296
%10 = zext <16 x i1> %9 to <16 x i8>, !dbg !296
%11 = shl <16 x i8> %8, %10, !dbg !296
store <16 x i8> %11, <16 x i8>* %7, align 16, !dbg !297, !tbaa !289
ret void, !dbg !298
}
Using the platform-specific vector intrinsics seems to avoid this issue.