Skip to content

Commit c5c3de3

Browse files
committed
[DirectX] replace byte splitting via vector bitcast with scalar
instructions - instead of bitcasting and extract element lets use trunc or trunc and logical shift right to split. - fixes #139020
1 parent 11713e8 commit c5c3de3

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

llvm/lib/Target/DirectX/DXILLegalizePass.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#include "DXILLegalizePass.h"
1010
#include "DirectX.h"
11+
#include "llvm/ADT/APInt.h"
12+
#include "llvm/IR/Constants.h"
1113
#include "llvm/IR/Function.h"
1214
#include "llvm/IR/IRBuilder.h"
1315
#include "llvm/IR/InstIterator.h"
@@ -419,6 +421,49 @@ static void updateFnegToFsub(Instruction &I,
419421
ToRemove.push_back(&I);
420422
}
421423

424+
static void
425+
legalizeGetHighLowi64Bytes(Instruction &I,
426+
SmallVectorImpl<Instruction *> &ToRemove,
427+
DenseMap<Value *, Value *> &ReplacedValues) {
428+
if (auto *BitCast = dyn_cast<BitCastInst>(&I)) {
429+
if (BitCast->getDestTy() ==
430+
FixedVectorType::get(Type::getInt32Ty(I.getContext()), 2) &&
431+
BitCast->getSrcTy()->isIntegerTy(64)) {
432+
ToRemove.push_back(BitCast);
433+
ReplacedValues[BitCast] = BitCast->getOperand(0);
434+
}
435+
}
436+
437+
if (auto *Extract = dyn_cast<ExtractElementInst>(&I)) {
438+
auto *VecTy = dyn_cast<FixedVectorType>(Extract->getVectorOperandType());
439+
if (VecTy && VecTy->getElementType()->isIntegerTy(32) &&
440+
VecTy->getNumElements() == 2) {
441+
if (auto *Index = dyn_cast<ConstantInt>(Extract->getIndexOperand())) {
442+
unsigned Idx = Index->getZExtValue();
443+
IRBuilder<> Builder(&I);
444+
assert(dyn_cast<BitCastInst>(Extract->getVectorOperand()));
445+
auto *Replacement = ReplacedValues[Extract->getVectorOperand()];
446+
if (Idx == 0) {
447+
Value *LowBytes = Builder.CreateTrunc(
448+
Replacement, Type::getInt32Ty(I.getContext()));
449+
ReplacedValues[Extract] = LowBytes;
450+
} else {
451+
assert(Idx == 1);
452+
Value *LogicalShiftRight = Builder.CreateLShr(
453+
Replacement,
454+
ConstantInt::get(
455+
Replacement->getType(),
456+
APInt(Replacement->getType()->getIntegerBitWidth(), 32)));
457+
Value *HighBytes = Builder.CreateTrunc(
458+
LogicalShiftRight, Type::getInt32Ty(I.getContext()));
459+
ReplacedValues[Extract] = HighBytes;
460+
}
461+
ToRemove.push_back(Extract);
462+
}
463+
}
464+
}
465+
}
466+
422467
namespace {
423468
class DXILLegalizationPipeline {
424469

@@ -453,6 +498,7 @@ class DXILLegalizationPipeline {
453498
LegalizationPipeline.push_back(legalizeMemCpy);
454499
LegalizationPipeline.push_back(removeMemSet);
455500
LegalizationPipeline.push_back(updateFnegToFsub);
501+
LegalizationPipeline.push_back(legalizeGetHighLowi64Bytes);
456502
}
457503
};
458504

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes='dxil-legalize' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
3+
4+
define void @split_via_extract(i64 noundef %a) {
5+
; CHECK-LABEL: define void @split_via_extract(
6+
; CHECK-SAME: i64 noundef [[A:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[A]] to i32
9+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A]], 32
10+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
11+
; CHECK-NEXT: ret void
12+
;
13+
entry:
14+
%vecA = bitcast i64 %a to <2 x i32>
15+
%low = extractelement <2 x i32> %vecA, i32 0 ; low 32 bits
16+
%high = extractelement <2 x i32> %vecA, i32 1 ; high 32 bits
17+
ret void
18+
}

0 commit comments

Comments
 (0)