@@ -702,57 +702,66 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
702
702
// intrinsics.
703
703
setOperationAction (ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
704
704
705
- // Turn FP extload into load/fpextend
706
- setLoadExtAction (ISD::EXTLOAD, MVT::f32 , MVT::f16 , Expand);
707
- setLoadExtAction (ISD::EXTLOAD, MVT::f64 , MVT::f16 , Expand);
708
- setLoadExtAction (ISD::EXTLOAD, MVT::f32 , MVT::bf16 , Expand);
709
- setLoadExtAction (ISD::EXTLOAD, MVT::f64 , MVT::bf16 , Expand);
710
- setLoadExtAction (ISD::EXTLOAD, MVT::f64 , MVT::f32 , Expand);
711
- setLoadExtAction (ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
712
- setLoadExtAction (ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
713
- setLoadExtAction (ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand);
714
- setLoadExtAction (ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand);
715
- setLoadExtAction (ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
716
- setLoadExtAction (ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
717
- setLoadExtAction (ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
718
- setLoadExtAction (ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
719
- setLoadExtAction (ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
720
- setLoadExtAction (ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
721
- setLoadExtAction (ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
722
- setLoadExtAction (ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
723
- setLoadExtAction (ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand);
724
- setLoadExtAction (ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand);
725
- // Turn FP truncstore into trunc + store.
726
- // FIXME: vector types should also be expanded
727
- setTruncStoreAction (MVT::f32 , MVT::f16 , Expand);
728
- setTruncStoreAction (MVT::f64 , MVT::f16 , Expand);
729
- setTruncStoreAction (MVT::f32 , MVT::bf16 , Expand);
730
- setTruncStoreAction (MVT::f64 , MVT::bf16 , Expand);
731
- setTruncStoreAction (MVT::f64 , MVT::f32 , Expand);
732
- setTruncStoreAction (MVT::v2f32, MVT::v2f16, Expand);
733
- setTruncStoreAction (MVT::v2f32, MVT::v2bf16, Expand);
705
+ // FP extload/truncstore is not legal in PTX. We need to expand all these.
706
+ for (auto FloatVTs :
707
+ {MVT::fp_valuetypes (), MVT::fp_fixedlen_vector_valuetypes ()}) {
708
+ for (MVT ValVT : FloatVTs) {
709
+ for (MVT MemVT : FloatVTs) {
710
+ setLoadExtAction (ISD::EXTLOAD, ValVT, MemVT, Expand);
711
+ setTruncStoreAction (ValVT, MemVT, Expand);
712
+ }
713
+ }
714
+ }
734
715
735
- // PTX does not support load / store predicate registers
736
- setOperationAction (ISD::LOAD, MVT::i1, Custom);
737
- setOperationAction (ISD::STORE, MVT::i1, Custom);
716
+ // To improve CodeGen we'll legalize any-extend loads to zext loads. This is
717
+ // how they'll be lowered in ISel anyway, and by doing this a little earlier
718
+ // we allow for more DAG combine opportunities.
719
+ for (auto IntVTs :
720
+ {MVT::integer_valuetypes (), MVT::integer_fixedlen_vector_valuetypes ()})
721
+ for (MVT ValVT : IntVTs)
722
+ for (MVT MemVT : IntVTs)
723
+ if (isTypeLegal (ValVT))
724
+ setLoadExtAction (ISD::EXTLOAD, ValVT, MemVT, Custom);
738
725
726
+ // PTX does not support load / store predicate registers
727
+ setOperationAction ({ISD::LOAD, ISD::STORE}, MVT::i1, Custom);
739
728
for (MVT VT : MVT::integer_valuetypes ()) {
740
- setLoadExtAction (ISD::SEXTLOAD, VT, MVT::i1, Promote);
741
- setLoadExtAction (ISD::ZEXTLOAD, VT, MVT::i1, Promote);
742
- setLoadExtAction (ISD::EXTLOAD, VT, MVT::i1, Promote);
729
+ setLoadExtAction ({ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}, VT, MVT::i1,
730
+ Promote);
743
731
setTruncStoreAction (VT, MVT::i1, Expand);
744
732
}
745
733
734
+ // Disable generations of extload/truncstore for v2i16/v2i8. The generic
735
+ // expansion for these nodes when they are unaligned is incorrect if the
736
+ // type is a vector.
737
+ //
738
+ // TODO: Fix the generic expansion for these nodes found in
739
+ // TargetLowering::expandUnalignedLoad/Store.
740
+ setLoadExtAction ({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
741
+ MVT::v2i8, Expand);
742
+ setTruncStoreAction (MVT::v2i16, MVT::v2i8, Expand);
743
+
744
+ // Register custom handling for illegal type loads/stores. We'll try to custom
745
+ // lower almost all illegal types and logic in the lowering will discard cases
746
+ // we can't handle.
747
+ setOperationAction ({ISD::LOAD, ISD::STORE}, {MVT::i128 , MVT::f128 }, Custom);
748
+ for (MVT VT : MVT::fixedlen_vector_valuetypes ())
749
+ if (!isTypeLegal (VT) && VT.getStoreSizeInBits () <= 256 )
750
+ setOperationAction ({ISD::STORE, ISD::LOAD}, VT, Custom);
751
+
752
+ // Custom legalization for LDU intrinsics.
753
+ // TODO: The logic to lower these is not very robust and we should rewrite it.
754
+ // Perhaps LDU should not be represented as an intrinsic at all.
755
+ setOperationAction (ISD::INTRINSIC_W_CHAIN, MVT::i8 , Custom);
756
+ for (MVT VT : MVT::fixedlen_vector_valuetypes ())
757
+ if (IsPTXVectorType (VT))
758
+ setOperationAction (ISD::INTRINSIC_W_CHAIN, VT, Custom);
759
+
746
760
setCondCodeAction ({ISD::SETNE, ISD::SETEQ, ISD::SETUGE, ISD::SETULE,
747
761
ISD::SETUGT, ISD::SETULT, ISD::SETGT, ISD::SETLT,
748
762
ISD::SETGE, ISD::SETLE},
749
763
MVT::i1, Expand);
750
764
751
- // expand extload of vector of integers.
752
- setLoadExtAction ({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
753
- MVT::v2i8, Expand);
754
- setTruncStoreAction (MVT::v2i16, MVT::v2i8, Expand);
755
-
756
765
// This is legal in NVPTX
757
766
setOperationAction (ISD::ConstantFP, MVT::f64 , Legal);
758
767
setOperationAction (ISD::ConstantFP, MVT::f32 , Legal);
@@ -767,24 +776,12 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
767
776
// DEBUGTRAP can be lowered to PTX brkpt
768
777
setOperationAction (ISD::DEBUGTRAP, MVT::Other, Legal);
769
778
770
- // Register custom handling for vector loads/stores
771
- for (MVT VT : MVT::fixedlen_vector_valuetypes ())
772
- if (IsPTXVectorType (VT))
773
- setOperationAction ({ISD::LOAD, ISD::STORE, ISD::INTRINSIC_W_CHAIN}, VT,
774
- Custom);
775
-
776
- setOperationAction ({ISD::LOAD, ISD::STORE, ISD::INTRINSIC_W_CHAIN},
777
- {MVT::i128 , MVT::f128 }, Custom);
778
-
779
779
// Support varargs.
780
780
setOperationAction (ISD::VASTART, MVT::Other, Custom);
781
781
setOperationAction (ISD::VAARG, MVT::Other, Custom);
782
782
setOperationAction (ISD::VACOPY, MVT::Other, Expand);
783
783
setOperationAction (ISD::VAEND, MVT::Other, Expand);
784
784
785
- // Custom handling for i8 intrinsics
786
- setOperationAction (ISD::INTRINSIC_W_CHAIN, MVT::i8 , Custom);
787
-
788
785
setOperationAction ({ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX},
789
786
{MVT::i16 , MVT::i32 , MVT::i64 }, Legal);
790
787
@@ -3092,39 +3089,14 @@ static void replaceLoadVector(SDNode *N, SelectionDAG &DAG,
3092
3089
SmallVectorImpl<SDValue> &Results,
3093
3090
const NVPTXSubtarget &STI);
3094
3091
3095
- SDValue NVPTXTargetLowering::LowerLOAD (SDValue Op, SelectionDAG &DAG) const {
3096
- if (Op.getValueType () == MVT::i1)
3097
- return LowerLOADi1 (Op, DAG);
3098
-
3099
- EVT VT = Op.getValueType ();
3100
-
3101
- if (NVPTX::isPackedVectorTy (VT)) {
3102
- // v2f32/v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to
3103
- // handle unaligned loads and have to handle it here.
3104
- LoadSDNode *Load = cast<LoadSDNode>(Op);
3105
- EVT MemVT = Load->getMemoryVT ();
3106
- if (!allowsMemoryAccessForAlignment (*DAG.getContext (), DAG.getDataLayout (),
3107
- MemVT, *Load->getMemOperand ())) {
3108
- SDValue Ops[2 ];
3109
- std::tie (Ops[0 ], Ops[1 ]) = expandUnalignedLoad (Load, DAG);
3110
- return DAG.getMergeValues (Ops, SDLoc (Op));
3111
- }
3112
- }
3113
-
3114
- return SDValue ();
3115
- }
3116
-
3117
3092
// v = ld i1* addr
3118
3093
// =>
3119
3094
// v1 = ld i8* addr (-> i16)
3120
3095
// v = trunc i16 to i1
3121
- SDValue NVPTXTargetLowering::LowerLOADi1 (SDValue Op, SelectionDAG &DAG) const {
3122
- SDNode *Node = Op.getNode ();
3123
- LoadSDNode *LD = cast<LoadSDNode>(Node);
3124
- SDLoc dl (Node);
3096
+ static SDValue lowerLOADi1 (LoadSDNode *LD, SelectionDAG &DAG) {
3097
+ SDLoc dl (LD);
3125
3098
assert (LD->getExtensionType () == ISD::NON_EXTLOAD);
3126
- assert (Node->getValueType (0 ) == MVT::i1 &&
3127
- " Custom lowering for i1 load only" );
3099
+ assert (LD->getValueType (0 ) == MVT::i1 && " Custom lowering for i1 load only" );
3128
3100
SDValue newLD = DAG.getExtLoad (ISD::ZEXTLOAD, dl, MVT::i16 , LD->getChain (),
3129
3101
LD->getBasePtr (), LD->getPointerInfo (),
3130
3102
MVT::i8 , LD->getAlign (),
@@ -3133,8 +3105,27 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
3133
3105
// The legalizer (the caller) is expecting two values from the legalized
3134
3106
// load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
3135
3107
// in LegalizeDAG.cpp which also uses MergeValues.
3136
- SDValue Ops[] = { result, LD->getChain () };
3137
- return DAG.getMergeValues (Ops, dl);
3108
+ return DAG.getMergeValues ({result, LD->getChain ()}, dl);
3109
+ }
3110
+
3111
+ SDValue NVPTXTargetLowering::LowerLOAD (SDValue Op, SelectionDAG &DAG) const {
3112
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
3113
+
3114
+ if (Op.getValueType () == MVT::i1)
3115
+ return lowerLOADi1 (LD, DAG);
3116
+
3117
+ // To improve CodeGen we'll legalize any-extend loads to zext loads. This is
3118
+ // how they'll be lowered in ISel anyway, and by doing this a little earlier
3119
+ // we allow for more DAG combine opportunities.
3120
+ if (LD->getExtensionType () == ISD::EXTLOAD) {
3121
+ assert (LD->getValueType (0 ).isInteger () && LD->getMemoryVT ().isInteger () &&
3122
+ " Unexpected fpext-load" );
3123
+ return DAG.getExtLoad (ISD::ZEXTLOAD, SDLoc (Op), Op.getValueType (),
3124
+ LD->getChain (), LD->getBasePtr (), LD->getMemoryVT (),
3125
+ LD->getMemOperand ());
3126
+ }
3127
+
3128
+ llvm_unreachable (" Unexpected custom lowering for load" );
3138
3129
}
3139
3130
3140
3131
SDValue NVPTXTargetLowering::LowerSTORE (SDValue Op, SelectionDAG &DAG) const {
@@ -3144,17 +3135,6 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
3144
3135
if (VT == MVT::i1)
3145
3136
return LowerSTOREi1 (Op, DAG);
3146
3137
3147
- // v2f32/v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to
3148
- // handle unaligned stores and have to handle it here.
3149
- if (NVPTX::isPackedVectorTy (VT) &&
3150
- !allowsMemoryAccessForAlignment (*DAG.getContext (), DAG.getDataLayout (),
3151
- VT, *Store->getMemOperand ()))
3152
- return expandUnalignedStore (Store, DAG);
3153
-
3154
- // v2f16/v2bf16/v2i16 don't need special handling.
3155
- if (NVPTX::isPackedVectorTy (VT) && VT.is32BitVector ())
3156
- return SDValue ();
3157
-
3158
3138
// Lower store of any other vector type, including v2f32 as we want to break
3159
3139
// it apart since this is not a widely-supported type.
3160
3140
return LowerSTOREVector (Op, DAG);
@@ -4010,14 +3990,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
4010
3990
case Intrinsic::nvvm_ldu_global_i:
4011
3991
case Intrinsic::nvvm_ldu_global_f:
4012
3992
case Intrinsic::nvvm_ldu_global_p: {
4013
- auto &DL = I.getDataLayout ();
4014
3993
Info.opc = ISD::INTRINSIC_W_CHAIN;
4015
- if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
4016
- Info.memVT = getValueType (DL, I.getType ());
4017
- else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
4018
- Info.memVT = getPointerTy (DL);
4019
- else
4020
- Info.memVT = getValueType (DL, I.getType ());
3994
+ Info.memVT = getValueType (I.getDataLayout (), I.getType ());
4021
3995
Info.ptrVal = I.getArgOperand (0 );
4022
3996
Info.offset = 0 ;
4023
3997
Info.flags = MachineMemOperand::MOLoad;
0 commit comments