Skip to content

Commit 10e8b97

Browse files
committed
handle fdiv and other instructions where v2f32 is illegal
Requires us to lower EXTRACT_VECTOR_ELT as well.
1 parent 8a47d35 commit 10e8b97

File tree

2 files changed

+15
-0
lines changed

2 files changed

+15
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
929929
{ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS}) {
930930
setOperationAction(Op, MVT::f16, Promote);
931931
setOperationAction(Op, MVT::f32, Legal);
932+
setOperationAction(Op, MVT::v2f32, Expand);
932933
setOperationAction(Op, MVT::f64, Legal);
933934
setOperationAction(Op, MVT::v2f16, Expand);
934935
setOperationAction(Op, MVT::v2bf16, Expand);

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2875,6 +2875,14 @@ let hasSideEffects = false in {
28752875
(ins Int64Regs:$s),
28762876
"{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
28772877
[]>;
2878+
def I64toF32H : NVPTXInst<(outs Float32Regs:$high),
2879+
(ins Int64Regs:$s),
2880+
"{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}",
2881+
[]>;
2882+
def I64toF32L : NVPTXInst<(outs Float32Regs:$low),
2883+
(ins Int64Regs:$s),
2884+
"{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
2885+
[]>;
28782886

28792887
// PTX 7.1 lets you avoid a temp register and just use _ as a "sink" for the
28802888
// unused high/low part.
@@ -2917,6 +2925,12 @@ foreach vt = [v2f16, v2bf16, v2i16] in {
29172925
def : Pat<(extractelt vt:$src, 0), (I32toI16L $src)>;
29182926
def : Pat<(extractelt vt:$src, 1), (I32toI16H $src)>;
29192927
}
2928+
2929+
def : Pat<(extractelt v2f32:$src, 0),
2930+
(I64toF32L $src)>;
2931+
def : Pat<(extractelt v2f32:$src, 1),
2932+
(I64toF32H $src)>;
2933+
29202934
def : Pat<(v2f16 (build_vector f16:$a, f16:$b)),
29212935
(V2I16toI32 $a, $b)>;
29222936
def : Pat<(v2bf16 (build_vector bf16:$a, bf16:$b)),

0 commit comments

Comments
 (0)