@@ -725,62 +725,40 @@ def : Pat<(v2f16 (build_vector (f16 (fpround_oneuse f32:$lo)),
725725// selp instructions that don't have any pattern matches; we explicitly use
726726// them within this file.
727727let hasSideEffects = false in {
728- multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
729- def rr : NVPTXInst<(outs RC:$dst),
730- (ins RC:$a, RC:$b, Int1Regs:$p),
731- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
732- def ri : NVPTXInst<(outs RC:$dst),
733- (ins RC:$a, ImmCls:$b, Int1Regs:$p),
734- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
735- def ir : NVPTXInst<(outs RC:$dst),
736- (ins ImmCls:$a, RC:$b, Int1Regs:$p),
737- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
738- def ii : NVPTXInst<(outs RC:$dst),
739- (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
740- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
741- }
742-
743- multiclass SELP_PATTERN<string TypeStr, ValueType T, RegisterClass RC,
744- Operand ImmCls, SDNode ImmNode> {
728+ multiclass SELP_PATTERN<string TypeStr, RegTyInfo t> {
729+ defvar asm_str = "selp." # TypeStr # " \t$dst, $a, $b, $p;";
745730 def rr :
746- NVPTXInst<(outs RC:$dst),
747- (ins RC:$a, RC:$b, Int1Regs:$p),
748- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;") ,
749- [(set T :$dst, (select i1:$p, T :$a, T :$b))]>;
731+ NVPTXInst<(outs t. RC:$dst),
732+ (ins t. RC:$a, t. RC:$b, Int1Regs:$p),
733+ asm_str ,
734+ [(set t.Ty :$dst, (select i1:$p, t.Ty :$a, t.Ty :$b))]>;
750735 def ri :
751- NVPTXInst<(outs RC:$dst),
752- (ins RC:$a, ImmCls :$b, Int1Regs:$p),
753- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;") ,
754- [(set T :$dst, (select i1:$p, T :$a, (T ImmNode:$b) ))]>;
736+ NVPTXInst<(outs t. RC:$dst),
737+ (ins t. RC:$a, t.Imm :$b, Int1Regs:$p),
738+ asm_str ,
739+ [(set t.Ty :$dst, (select i1:$p, t.Ty :$a, t. ImmNode:$b))]>;
755740 def ir :
756- NVPTXInst<(outs RC:$dst),
757- (ins ImmCls :$a, RC:$b, Int1Regs:$p),
758- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;") ,
759- [(set T :$dst, (select i1:$p, ImmNode:$a, T :$b))]>;
741+ NVPTXInst<(outs t. RC:$dst),
742+ (ins t.Imm :$a, t. RC:$b, Int1Regs:$p),
743+ asm_str ,
744+ [(set t.Ty :$dst, (select i1:$p, t. ImmNode:$a, t.Ty :$b))]>;
760745 def ii :
761- NVPTXInst<(outs RC:$dst),
762- (ins ImmCls :$a, ImmCls :$b, Int1Regs:$p),
763- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;") ,
764- [(set T :$dst, (select i1:$p, ImmNode:$a, ImmNode:$b))]>;
746+ NVPTXInst<(outs t. RC:$dst),
747+ (ins t.Imm :$a, t.Imm :$b, Int1Regs:$p),
748+ asm_str ,
749+ [(set t.Ty :$dst, (select i1:$p, t. ImmNode:$a, t. ImmNode:$b))]>;
765750 }
766751}
767752
768753// Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as
769754// good.
770- defm SELP_b16 : SELP_PATTERN<"b16", i16, Int16Regs, i16imm, imm>;
771- defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>;
772- defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>;
773- defm SELP_b32 : SELP_PATTERN<"b32", i32, Int32Regs, i32imm, imm>;
774- defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>;
775- defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
776- defm SELP_b64 : SELP_PATTERN<"b64", i64, Int64Regs, i64imm, imm>;
777- defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
778- defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
779- defm SELP_f16 : SELP_PATTERN<"b16", f16, Int16Regs, f16imm, fpimm>;
780- defm SELP_bf16 : SELP_PATTERN<"b16", bf16, Int16Regs, bf16imm, fpimm>;
781-
782- defm SELP_f32 : SELP_PATTERN<"f32", f32, Float32Regs, f32imm, fpimm>;
783- defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>;
755+ defm SELP_b16 : SELP_PATTERN<"b16", I16RT>;
756+ defm SELP_b32 : SELP_PATTERN<"b32", I32RT>;
757+ defm SELP_b64 : SELP_PATTERN<"b64", I64RT>;
758+ defm SELP_f16 : SELP_PATTERN<"b16", F16RT>;
759+ defm SELP_bf16 : SELP_PATTERN<"b16", BF16RT>;
760+ defm SELP_f32 : SELP_PATTERN<"f32", F32RT>;
761+ defm SELP_f64 : SELP_PATTERN<"f64", F64RT>;
784762
785763// This does not work as tablegen fails to infer the type of 'imm'.
786764// def v2f16imm : Operand<v2f16>;
@@ -2023,9 +2001,9 @@ def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
20232001
20242002// i1 compare -> i32
20252003def : Pat<(i32 (setne i1:$a, i1:$b)),
2026- (SELP_u32ii -1, 0, (XORb1rr $a, $b))>;
2004+ (SELP_b32ii -1, 0, (XORb1rr $a, $b))>;
20272005def : Pat<(i32 (setne i1:$a, i1:$b)),
2028- (SELP_u32ii 0, -1, (XORb1rr $a, $b))>;
2006+ (SELP_b32ii 0, -1, (XORb1rr $a, $b))>;
20292007
20302008
20312009
@@ -2690,7 +2668,7 @@ foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
26902668
26912669// sint -> f16
26922670def : Pat<(f16 (sint_to_fp i1:$a)),
2693- (CVT_f16_s32 (SELP_s32ii -1, 0, $a), CvtRN)>;
2671+ (CVT_f16_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
26942672def : Pat<(f16 (sint_to_fp Int16Regs:$a)),
26952673 (CVT_f16_s16 $a, CvtRN)>;
26962674def : Pat<(f16 (sint_to_fp i32:$a)),
@@ -2700,7 +2678,7 @@ def : Pat<(f16 (sint_to_fp i64:$a)),
27002678
27012679// uint -> f16
27022680def : Pat<(f16 (uint_to_fp i1:$a)),
2703- (CVT_f16_u32 (SELP_u32ii 1, 0, $a), CvtRN)>;
2681+ (CVT_f16_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
27042682def : Pat<(f16 (uint_to_fp Int16Regs:$a)),
27052683 (CVT_f16_u16 $a, CvtRN)>;
27062684def : Pat<(f16 (uint_to_fp i32:$a)),
@@ -2710,7 +2688,7 @@ def : Pat<(f16 (uint_to_fp i64:$a)),
27102688
27112689// sint -> bf16
27122690def : Pat<(bf16 (sint_to_fp i1:$a)),
2713- (CVT_bf16_s32 (SELP_u32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
2691+ (CVT_bf16_s32 (SELP_b32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
27142692def : Pat<(bf16 (sint_to_fp i16:$a)),
27152693 (CVT_bf16_s16 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
27162694def : Pat<(bf16 (sint_to_fp i32:$a)),
@@ -2720,7 +2698,7 @@ def : Pat<(bf16 (sint_to_fp i64:$a)),
27202698
27212699// uint -> bf16
27222700def : Pat<(bf16 (uint_to_fp i1:$a)),
2723- (CVT_bf16_u32 (SELP_u32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
2701+ (CVT_bf16_u32 (SELP_b32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
27242702def : Pat<(bf16 (uint_to_fp i16:$a)),
27252703 (CVT_bf16_u16 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
27262704def : Pat<(bf16 (uint_to_fp i32:$a)),
@@ -2730,7 +2708,7 @@ def : Pat<(bf16 (uint_to_fp i64:$a)),
27302708
27312709// sint -> f32
27322710def : Pat<(f32 (sint_to_fp i1:$a)),
2733- (CVT_f32_s32 (SELP_s32ii -1, 0, $a), CvtRN)>;
2711+ (CVT_f32_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
27342712def : Pat<(f32 (sint_to_fp i16:$a)),
27352713 (CVT_f32_s16 $a, CvtRN)>;
27362714def : Pat<(f32 (sint_to_fp i32:$a)),
@@ -2740,7 +2718,7 @@ def : Pat<(f32 (sint_to_fp i64:$a)),
27402718
27412719// uint -> f32
27422720def : Pat<(f32 (uint_to_fp i1:$a)),
2743- (CVT_f32_u32 (SELP_u32ii 1, 0, $a), CvtRN)>;
2721+ (CVT_f32_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
27442722def : Pat<(f32 (uint_to_fp i16:$a)),
27452723 (CVT_f32_u16 $a, CvtRN)>;
27462724def : Pat<(f32 (uint_to_fp i32:$a)),
@@ -2750,7 +2728,7 @@ def : Pat<(f32 (uint_to_fp i64:$a)),
27502728
27512729// sint -> f64
27522730def : Pat<(f64 (sint_to_fp i1:$a)),
2753- (CVT_f64_s32 (SELP_s32ii -1, 0, $a), CvtRN)>;
2731+ (CVT_f64_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
27542732def : Pat<(f64 (sint_to_fp i16:$a)),
27552733 (CVT_f64_s16 $a, CvtRN)>;
27562734def : Pat<(f64 (sint_to_fp i32:$a)),
@@ -2760,7 +2738,7 @@ def : Pat<(f64 (sint_to_fp i64:$a)),
27602738
27612739// uint -> f64
27622740def : Pat<(f64 (uint_to_fp i1:$a)),
2763- (CVT_f64_u32 (SELP_u32ii 1, 0, $a), CvtRN)>;
2741+ (CVT_f64_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
27642742def : Pat<(f64 (uint_to_fp i16:$a)),
27652743 (CVT_f64_u16 $a, CvtRN)>;
27662744def : Pat<(f64 (uint_to_fp i32:$a)),
@@ -2862,27 +2840,27 @@ def : Pat<(i64 (fp_to_uint f64:$a)),
28622840
28632841// sext i1
28642842def : Pat<(i16 (sext i1:$a)),
2865- (SELP_s16ii -1, 0, $a)>;
2843+ (SELP_b16ii -1, 0, $a)>;
28662844def : Pat<(i32 (sext i1:$a)),
2867- (SELP_s32ii -1, 0, $a)>;
2845+ (SELP_b32ii -1, 0, $a)>;
28682846def : Pat<(i64 (sext i1:$a)),
2869- (SELP_s64ii -1, 0, $a)>;
2847+ (SELP_b64ii -1, 0, $a)>;
28702848
28712849// zext i1
28722850def : Pat<(i16 (zext i1:$a)),
2873- (SELP_u16ii 1, 0, $a)>;
2851+ (SELP_b16ii 1, 0, $a)>;
28742852def : Pat<(i32 (zext i1:$a)),
2875- (SELP_u32ii 1, 0, $a)>;
2853+ (SELP_b32ii 1, 0, $a)>;
28762854def : Pat<(i64 (zext i1:$a)),
2877- (SELP_u64ii 1, 0, $a)>;
2855+ (SELP_b64ii 1, 0, $a)>;
28782856
28792857// anyext i1
28802858def : Pat<(i16 (anyext i1:$a)),
2881- (SELP_u16ii -1, 0, $a)>;
2859+ (SELP_b16ii -1, 0, $a)>;
28822860def : Pat<(i32 (anyext i1:$a)),
2883- (SELP_u32ii -1, 0, $a)>;
2861+ (SELP_b32ii -1, 0, $a)>;
28842862def : Pat<(i64 (anyext i1:$a)),
2885- (SELP_u64ii -1, 0, $a)>;
2863+ (SELP_b64ii -1, 0, $a)>;
28862864
28872865// sext i16
28882866def : Pat<(i32 (sext i16:$a)),
0 commit comments