@@ -67,6 +67,15 @@ class THREADMASK_INFO<bit sync> {
6767// Synchronization and shuffle functions
6868//-----------------------------------
6969let isConvergent = true in {
70+ def INT_BARRIER0 : NVPTXInst<(outs), (ins),
71+ "bar.sync \t0;",
72+ [(int_nvvm_barrier0)]>;
73+ def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
74+ "bar.sync \t$src1;",
75+ [(int_nvvm_barrier_n i32:$src1)]>;
76+ def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
77+ "bar.sync \t$src1, $src2;",
78+ [(int_nvvm_barrier i32:$src1, i32:$src2)]>;
7079def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
7180 !strconcat("{{ \n\t",
7281 ".reg .pred \t%p1; \n\t",
@@ -93,51 +102,39 @@ def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93102 "}}"),
94103 [(set i32:$dst, (int_nvvm_barrier0_or i32:$pred))]>;
95104
105+ def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
106+ [(int_nvvm_bar_sync imm:$i)]>;
107+
96108def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
97109 [(int_nvvm_bar_warp_sync imm:$i)]>,
98110 Requires<[hasPTX<60>, hasSM<30>]>;
99111def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
100112 [(int_nvvm_bar_warp_sync i32:$i)]>,
101113 Requires<[hasPTX<60>, hasSM<30>]>;
102114
103- multiclass BARRIER1<string asmstr, Intrinsic intrinsic, list<Predicate> requires = []> {
104- def _i : BasicNVPTXInst<(outs), (ins i32imm:$i), asmstr,
105- [(intrinsic imm:$i)]>,
106- Requires<requires>;
107-
108- def _r : BasicNVPTXInst<(outs), (ins Int32Regs:$i), asmstr,
109- [(intrinsic i32:$i)]>,
110- Requires<requires>;
111- }
112-
113- multiclass BARRIER2<string asmstr, Intrinsic intrinsic, list<Predicate> requires = []> {
114- def _rr : BasicNVPTXInst<(outs), (ins Int32Regs:$i, Int32Regs:$j), asmstr,
115- [(intrinsic i32:$i, i32:$j)]>,
116- Requires<requires>;
117-
118- def _ri : BasicNVPTXInst<(outs), (ins Int32Regs:$i, i32imm:$j), asmstr,
119- [(intrinsic i32:$i, imm:$j)]>,
120- Requires<requires>;
121-
122- def _ir : BasicNVPTXInst<(outs), (ins i32imm:$i, Int32Regs:$j), asmstr,
123- [(intrinsic imm:$i, i32:$j)]>,
124- Requires<requires>;
125-
126- def _ii : BasicNVPTXInst<(outs), (ins i32imm:$i, i32imm:$j), asmstr,
127- [(intrinsic imm:$i, imm:$j)]>,
128- Requires<requires>;
129- }
130-
131- // Note the "bar.sync" variants could be renamed to the equivalent corresponding
132- // "barrier.*.aligned" variants. We use the older syntax for compatibility with
133- // older versions of the PTX ISA.
134- defm BARRIER_CTA_SYNC_ALIGNED_ALL : BARRIER1<"bar.sync", int_nvvm_barrier_cta_sync_aligned_all>;
135- defm BARRIER_CTA_SYNC_ALIGNED : BARRIER2<"bar.sync", int_nvvm_barrier_cta_sync_aligned>;
136- defm BARRIER_CTA_ARRIVE_ALIGNED : BARRIER2<"bar.arrive", int_nvvm_barrier_cta_arrive_aligned>;
115+ def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
116+ [(int_nvvm_barrier_sync imm:$i)]>,
117+ Requires<[hasPTX<60>, hasSM<30>]>;
118+ def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
119+ [(int_nvvm_barrier_sync i32:$i)]>,
120+ Requires<[hasPTX<60>, hasSM<30>]>;
137121
138- defm BARRIER_CTA_SYNC_ALL : BARRIER1<"barrier.sync", int_nvvm_barrier_cta_sync_all, [hasPTX<60>]>;
139- defm BARRIER_CTA_SYNC : BARRIER2<"barrier.sync", int_nvvm_barrier_cta_sync, [hasPTX<60>]>;
140- defm BARRIER_CTA_ARRIVE : BARRIER2<"barrier.arrive", int_nvvm_barrier_cta_arrive, [hasPTX<60>]>;
122+ def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
123+ "barrier.sync \t$id, $cnt;",
124+ [(int_nvvm_barrier_sync_cnt i32:$id, i32:$cnt)]>,
125+ Requires<[hasPTX<60>, hasSM<30>]>;
126+ def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
127+ "barrier.sync \t$id, $cnt;",
128+ [(int_nvvm_barrier_sync_cnt i32:$id, imm:$cnt)]>,
129+ Requires<[hasPTX<60>, hasSM<30>]>;
130+ def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
131+ "barrier.sync \t$id, $cnt;",
132+ [(int_nvvm_barrier_sync_cnt imm:$id, i32:$cnt)]>,
133+ Requires<[hasPTX<60>, hasSM<30>]>;
134+ def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
135+ "barrier.sync \t$id, $cnt;",
136+ [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
137+ Requires<[hasPTX<60>, hasSM<30>]>;
141138
142139class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
143140 list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
0 commit comments