Skip to content

Commit 6780385

Browse files
Merge branch 'llvm:main' into r586519_bazel_fix
2 parents fc1db13 + defbbf0 commit 6780385

File tree

8 files changed

+144
-74
lines changed

8 files changed

+144
-74
lines changed

flang/module/cudadevice.f90

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -653,29 +653,29 @@ attributes(device) real(8) function sinpi(x) bind(c,name='__nv_sinpi')
653653
end function
654654
end interface
655655

656-
interface __double2ull_rn
657-
attributes(device) integer(8) function __double2ull_rn(r) bind(c)
656+
interface __double2ull_rd
657+
attributes(device) integer(8) function __double2ull_rd(r) bind(c, name='__nv_double2ull_rd')
658658
!dir$ ignore_tkr (d) r
659659
double precision, value :: r
660660
end function
661661
end interface
662662

663-
interface __double2ull_rz
664-
attributes(device) integer(8) function __double2ull_rz(r) bind(c)
663+
interface __double2ull_rn
664+
attributes(device) integer(8) function __double2ull_rn(r) bind(c, name='__nv_double2ull_rn')
665665
!dir$ ignore_tkr (d) r
666666
double precision, value :: r
667667
end function
668668
end interface
669669

670670
interface __double2ull_ru
671-
attributes(device) integer(8) function __double2ull_ru(r) bind(c)
671+
attributes(device) integer(8) function __double2ull_ru(r) bind(c, name='__nv_double2ull_ru')
672672
!dir$ ignore_tkr (d) r
673673
double precision, value :: r
674674
end function
675675
end interface
676676

677-
interface __double2ull_rd
678-
attributes(device) integer(8) function __double2ull_rd(r) bind(c)
677+
interface __double2ull_rz
678+
attributes(device) integer(8) function __double2ull_rz(r) bind(c, name='__nv_double2ull_rz')
679679
!dir$ ignore_tkr (d) r
680680
double precision, value :: r
681681
end function
@@ -779,6 +779,34 @@ attributes(device) real(8) function sinpi(x) bind(c,name='__nv_sinpi')
779779
end function
780780
end interface
781781

782+
interface __drcp_rd
783+
attributes(device) double precision function __drcp_rd(x) bind(c, name='__nv_drcp_rd')
784+
!dir$ ignore_tkr (d) x
785+
double precision, value :: x
786+
end function
787+
end interface
788+
789+
interface __drcp_rn
790+
attributes(device) double precision function __drcp_rn(x) bind(c, name='__nv_drcp_rn')
791+
!dir$ ignore_tkr (d) x
792+
double precision, value :: x
793+
end function
794+
end interface
795+
796+
interface __drcp_ru
797+
attributes(device) double precision function __drcp_ru(x) bind(c, name='__nv_drcp_ru')
798+
!dir$ ignore_tkr (d) x
799+
double precision, value :: x
800+
end function
801+
end interface
802+
803+
interface __drcp_rz
804+
attributes(device) double precision function __drcp_rz(x) bind(c, name='__nv_drcp_rz')
805+
!dir$ ignore_tkr (d) x
806+
double precision, value :: x
807+
end function
808+
end interface
809+
782810
interface __dsqrt_rd
783811
attributes(device) double precision function __dsqrt_rd(x) bind(c, name='__nv_dsqrt_rd')
784812
!dir$ ignore_tkr (d) x

flang/test/Lower/CUDA/cuda-libdevice.cuf

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,35 @@ end subroutine
115115
! CHECK: %{{.*}} = fir.call @__nv_double2ll_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
116116
! CHECK: %{{.*}} = fir.call @__nv_double2ll_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
117117
! CHECK: %{{.*}} = fir.call @__nv_double2ll_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
118+
119+
120+
attributes(global) subroutine test_drcp_rX()
121+
double precision :: res
122+
double precision :: r
123+
res = __drcp_rd(r)
124+
res = __drcp_rn(r)
125+
res = __drcp_ru(r)
126+
res = __drcp_rz(r)
127+
end subroutine
128+
129+
! CHECK-LABEL: _QPtest_drcp_rx
130+
! CHECK: %{{.*}} = fir.call @__nv_drcp_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f64
131+
! CHECK: %{{.*}} = fir.call @__nv_drcp_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f64
132+
! CHECK: %{{.*}} = fir.call @__nv_drcp_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f64
133+
! CHECK: %{{.*}} = fir.call @__nv_drcp_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f64
134+
135+
attributes(global) subroutine test_double2ull_rX()
136+
integer(8) :: res
137+
double precision :: r
138+
res = __double2ull_rd(r)
139+
res = __double2ull_rn(r)
140+
res = __double2ull_ru(r)
141+
res = __double2ull_rz(r)
142+
end subroutine
143+
144+
! CHECK-LABEL: _QPtest_double2ull_rx
145+
! CHECK: %{{.*}} = fir.call @__nv_double2ull_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
146+
! CHECK: %{{.*}} = fir.call @__nv_double2ull_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
147+
! CHECK: %{{.*}} = fir.call @__nv_double2ull_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
148+
! CHECK: %{{.*}} = fir.call @__nv_double2ull_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
149+

llvm/lib/Target/RISCV/RISCVMoveMerger.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,11 @@ RISCVMoveMerge::mergePairedInsns(MachineBasicBlock::iterator I,
137137
NextI = next_nodbg(NextI, E);
138138
DebugLoc DL = I->getDebugLoc();
139139

140+
// Make a copy so we can update the kill flag in the MoveFromAToS case. The
141+
// copied operand needs to be scoped outside the if since we make a pointer
142+
// to it.
143+
MachineOperand PairedSource = *PairedRegs.Source;
144+
140145
// The order of S-reg depends on which instruction holds A0, instead of
141146
// the order of register pair.
142147
// e,g.
@@ -147,8 +152,15 @@ RISCVMoveMerge::mergePairedInsns(MachineBasicBlock::iterator I,
147152
// mv a1, s1 => cm.mva01s s2,s1
148153
bool StartWithX10 = ARegInFirstPair == RISCV::X10;
149154
if (isMoveFromAToS(Opcode)) {
150-
Sreg1 = StartWithX10 ? FirstPair.Source : PairedRegs.Source;
151-
Sreg2 = StartWithX10 ? PairedRegs.Source : FirstPair.Source;
155+
// We are moving one of the copies earlier so its kill flag may become
156+
// invalid. Clear the copied kill flag if there are any reads of the
157+
// register between the new location and the old location.
158+
for (auto It = std::next(I); It != Paired && PairedSource.isKill(); ++It)
159+
if (It->readsRegister(PairedSource.getReg(), TRI))
160+
PairedSource.setIsKill(false);
161+
162+
Sreg1 = StartWithX10 ? FirstPair.Source : &PairedSource;
163+
Sreg2 = StartWithX10 ? &PairedSource : FirstPair.Source;
152164
} else {
153165
Sreg1 = StartWithX10 ? FirstPair.Destination : PairedRegs.Destination;
154166
Sreg2 = StartWithX10 ? PairedRegs.Destination : FirstPair.Destination;

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 27 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -10564,41 +10564,35 @@ class InstructionsCompatibilityAnalysis {
1056410564
unsigned MainOpcode = 0;
1056510565
Instruction *MainOp = nullptr;
1056610566

10567-
/// Checks if the opcode is supported as the main opcode for copyable
10568-
/// elements.
10569-
static bool isSupportedOpcode(const unsigned Opcode) {
10570-
return Opcode == Instruction::Add || Opcode == Instruction::LShr;
10571-
}
10572-
1057310567
/// Identifies the best candidate value, which represents main opcode
1057410568
/// operation.
1057510569
/// Currently the best candidate is the Add instruction with the parent
1057610570
/// block with the highest DFS incoming number (block, that dominates other).
1057710571
void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) {
1057810572
BasicBlock *Parent = nullptr;
1057910573
// Checks if the instruction has supported opcode.
10580-
auto IsSupportedInstruction = [&](Instruction *I) {
10581-
return I && isSupportedOpcode(I->getOpcode()) &&
10574+
auto IsSupportedOpcode = [&](Instruction *I) {
10575+
return I && I->getOpcode() == Instruction::Add &&
1058210576
(!doesNotNeedToBeScheduled(I) || !R.isVectorized(I));
1058310577
};
1058410578
// Exclude operands instructions immediately to improve compile time, it
1058510579
// will be unable to schedule anyway.
1058610580
SmallDenseSet<Value *, 8> Operands;
10587-
SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
1058810581
for (Value *V : VL) {
1058910582
auto *I = dyn_cast<Instruction>(V);
1059010583
if (!I)
1059110584
continue;
1059210585
if (!DT.isReachableFromEntry(I->getParent()))
1059310586
continue;
10594-
if (Candidates.empty()) {
10595-
Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
10587+
if (!MainOp) {
10588+
MainOp = I;
1059610589
Parent = I->getParent();
1059710590
Operands.insert(I->op_begin(), I->op_end());
1059810591
continue;
1059910592
}
1060010593
if (Parent == I->getParent()) {
10601-
Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
10594+
if (!IsSupportedOpcode(MainOp) && !Operands.contains(I))
10595+
MainOp = I;
1060210596
Operands.insert(I->op_begin(), I->op_end());
1060310597
continue;
1060410598
}
@@ -10610,35 +10604,24 @@ class InstructionsCompatibilityAnalysis {
1061010604
(NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
1061110605
"Different nodes should have different DFS numbers");
1061210606
if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn()) {
10613-
Candidates.clear();
10614-
Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
10607+
MainOp = I;
1061510608
Parent = I->getParent();
1061610609
Operands.clear();
1061710610
Operands.insert(I->op_begin(), I->op_end());
1061810611
}
1061910612
}
10620-
unsigned BestOpcodeNum = 0;
10621-
MainOp = nullptr;
10622-
for (const auto &P : Candidates) {
10623-
if (P.second.size() < BestOpcodeNum)
10624-
continue;
10625-
for (Instruction *I : P.second) {
10626-
if (IsSupportedInstruction(I) && !Operands.contains(I)) {
10627-
MainOp = I;
10628-
BestOpcodeNum = P.second.size();
10629-
break;
10630-
}
10631-
}
10613+
if (!IsSupportedOpcode(MainOp) || Operands.contains(MainOp)) {
10614+
MainOp = nullptr;
10615+
return;
1063210616
}
10633-
if (MainOp)
10634-
MainOpcode = MainOp->getOpcode();
10617+
MainOpcode = MainOp->getOpcode();
1063510618
}
1063610619

1063710620
/// Returns the idempotent value for the \p MainOp with the detected \p
1063810621
/// MainOpcode. For Add, returns 0. For Or, it should choose between false and
1063910622
/// the operand itself, since V or V == V.
1064010623
Value *selectBestIdempotentValue() const {
10641-
assert(isSupportedOpcode(MainOpcode) && "Unsupported opcode");
10624+
assert(MainOpcode == Instruction::Add && "Unsupported opcode");
1064210625
return ConstantExpr::getBinOpIdentity(MainOpcode, MainOp->getType(),
1064310626
!MainOp->isCommutative());
1064410627
}
@@ -10651,8 +10634,13 @@ class InstructionsCompatibilityAnalysis {
1065110634
return {V, V};
1065210635
if (!S.isCopyableElement(V))
1065310636
return convertTo(cast<Instruction>(V), S).second;
10654-
assert(isSupportedOpcode(MainOpcode) && "Unsupported opcode");
10655-
return {V, selectBestIdempotentValue()};
10637+
switch (MainOpcode) {
10638+
case Instruction::Add:
10639+
return {V, selectBestIdempotentValue()};
10640+
default:
10641+
break;
10642+
}
10643+
llvm_unreachable("Unsupported opcode");
1065610644
}
1065710645

1065810646
/// Builds operands for the original instructions.
@@ -10865,21 +10853,6 @@ class InstructionsCompatibilityAnalysis {
1086510853
}
1086610854
if (!Res)
1086710855
return InstructionsState::invalid();
10868-
constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
10869-
InstructionCost ScalarCost = TTI.getInstructionCost(S.getMainOp(), Kind);
10870-
InstructionCost VectorCost;
10871-
FixedVectorType *VecTy =
10872-
getWidenedType(S.getMainOp()->getType(), VL.size());
10873-
switch (MainOpcode) {
10874-
case Instruction::Add:
10875-
case Instruction::LShr:
10876-
VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
10877-
break;
10878-
default:
10879-
llvm_unreachable("Unexpected instruction.");
10880-
}
10881-
if (VectorCost > ScalarCost)
10882-
return InstructionsState::invalid();
1088310856
return S;
1088410857
}
1088510858
assert(Operands.size() == 2 && "Unexpected number of operands!");
@@ -21117,7 +21090,6 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
2111721090
ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
2111821091
const auto *It = find(Op, CD->getInst());
2111921092
assert(It != Op.end() && "Lane not set");
21120-
SmallPtrSet<Instruction *, 4> Visited;
2112121093
do {
2112221094
int Lane = std::distance(Op.begin(), It);
2112321095
assert(Lane >= 0 && "Lane not set");
@@ -21139,15 +21111,13 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
2113921111
(InsertInReadyList && UseSD->isReady()))
2114021112
WorkList.push_back(UseSD);
2114121113
}
21142-
} else if (Visited.insert(In).second) {
21143-
if (ScheduleData *UseSD = getScheduleData(In)) {
21144-
CD->incDependencies();
21145-
if (!UseSD->isScheduled())
21146-
CD->incrementUnscheduledDeps(1);
21147-
if (!UseSD->hasValidDependencies() ||
21148-
(InsertInReadyList && UseSD->isReady()))
21149-
WorkList.push_back(UseSD);
21150-
}
21114+
} else if (ScheduleData *UseSD = getScheduleData(In)) {
21115+
CD->incDependencies();
21116+
if (!UseSD->isScheduled())
21117+
CD->incrementUnscheduledDeps(1);
21118+
if (!UseSD->hasValidDependencies() ||
21119+
(InsertInReadyList && UseSD->isReady()))
21120+
WorkList.push_back(UseSD);
2115121121
}
2115221122
It = find(make_range(std::next(It), Op.end()), CD->getInst());
2115321123
} while (It != Op.end());
@@ -21905,11 +21875,9 @@ bool BoUpSLP::collectValuesToDemote(
2190521875
return all_of(E.Scalars, [&](Value *V) {
2190621876
if (isa<PoisonValue>(V))
2190721877
return true;
21908-
APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
21909-
if (E.isCopyableElement(V))
21910-
return MaskedValueIsZero(V, ShiftedBits, SimplifyQuery(*DL));
2191121878
auto *I = cast<Instruction>(V);
2191221879
KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
21880+
APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
2191321881
return AmtKnownBits.getMaxValue().ult(BitWidth) &&
2191421882
MaskedValueIsZero(I->getOperand(0), ShiftedBits,
2191521883
SimplifyQuery(*DL));
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=riscv32 -mattr=+zcmp -run-pass=riscv-move-merge -verify-machineinstrs %s -o - | FileCheck %s
3+
---
4+
name: mov-merge
5+
tracksRegLiveness: true
6+
body: |
7+
bb.0.entry:
8+
liveins: $x8, $x9
9+
; CHECK-LABEL: name: mov-merge
10+
; CHECK: liveins: $x8, $x9
11+
; CHECK-NEXT: {{ $}}
12+
; CHECK-NEXT: $x12 = ADDI $x0, -3
13+
; CHECK-NEXT: SW renamable $x9, $x2, 56
14+
; CHECK-NEXT: CM_MVA01S killed renamable $x9, renamable $x8, implicit-def $x10, implicit-def $x11
15+
; CHECK-NEXT: SW renamable $x8, $x2, 60
16+
; CHECK-NEXT: PseudoRET
17+
$x12 = ADDI $x0, -3
18+
SW renamable $x9, $x2, 56
19+
$x10 = ADDI killed renamable $x9, 0
20+
SW renamable $x8, $x2, 60
21+
$x11 = ADDI killed renamable $x8, 0
22+
PseudoRET
23+
...

llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@ define i32 @test(ptr %c) {
88
; CHECK-NEXT: [[BITLEN:%.*]] = getelementptr i8, ptr [[C]], i64 136
99
; CHECK-NEXT: [[INCDEC_PTR_3_1:%.*]] = getelementptr i8, ptr [[C]], i64 115
1010
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[BITLEN]], align 8
11-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0>
12-
; CHECK-NEXT: [[TMP5:%.*]] = lshr <8 x i64> [[TMP1]], zeroinitializer
11+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 0, i32 0>
12+
; CHECK-NEXT: [[TMP2:%.*]] = lshr <6 x i64> [[TMP1]], zeroinitializer
13+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 0, i32 poison, i32 poison>
14+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <6 x i64> [[TMP2]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
15+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 4, i32 5>
1316
; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[TMP5]] to <8 x i8>
1417
; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[INCDEC_PTR_3_1]], align 1
1518
; CHECK-NEXT: ret i32 0

llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,10 @@ define <4 x float> @PR16739_byref_alt(ptr nocapture readonly dereferenceable(16)
101101
define <4 x float> @PR16739_byval(ptr nocapture readonly dereferenceable(16) %x) {
102102
; CHECK-LABEL: @PR16739_byval(
103103
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[X:%.*]], align 16
104-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
105-
; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[TMP2]], <i64 0, i64 32, i64 0, i64 0>
104+
; CHECK-NEXT: [[T1:%.*]] = load i64, ptr [[X]], align 16
105+
; CHECK-NEXT: [[T8:%.*]] = lshr i64 [[T1]], 32
106+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 1>
107+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[T8]], i32 1
106108
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
107109
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
108110
; CHECK-NEXT: ret <4 x float> [[TMP5]]

llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,10 @@ define <4 x float> @PR16739_byref_alt(ptr nocapture readonly dereferenceable(16)
101101
define <4 x float> @PR16739_byval(ptr nocapture readonly dereferenceable(16) %x) {
102102
; CHECK-LABEL: @PR16739_byval(
103103
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[X:%.*]], align 16
104-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
105-
; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[TMP2]], <i64 0, i64 32, i64 0, i64 0>
104+
; CHECK-NEXT: [[T1:%.*]] = load i64, ptr [[X]], align 16
105+
; CHECK-NEXT: [[T8:%.*]] = lshr i64 [[T1]], 32
106+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 1>
107+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[T8]], i32 1
106108
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
107109
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
108110
; CHECK-NEXT: ret <4 x float> [[TMP5]]

0 commit comments

Comments
 (0)