@@ -67,6 +67,15 @@ class THREADMASK_INFO<bit sync> {
67
67
// Synchronization and shuffle functions
68
68
//-----------------------------------
69
69
let isConvergent = true in {
70
+ def INT_BARRIER0 : NVPTXInst<(outs), (ins),
71
+ "bar.sync \t0;",
72
+ [(int_nvvm_barrier0)]>;
73
+ def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
74
+ "bar.sync \t$src1;",
75
+ [(int_nvvm_barrier_n i32:$src1)]>;
76
+ def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
77
+ "bar.sync \t$src1, $src2;",
78
+ [(int_nvvm_barrier i32:$src1, i32:$src2)]>;
70
79
def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
71
80
!strconcat("{{ \n\t",
72
81
".reg .pred \t%p1; \n\t",
@@ -93,51 +102,39 @@ def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93
102
"}}"),
94
103
[(set i32:$dst, (int_nvvm_barrier0_or i32:$pred))]>;
95
104
105
+ def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
106
+ [(int_nvvm_bar_sync imm:$i)]>;
107
+
96
108
def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
97
109
[(int_nvvm_bar_warp_sync imm:$i)]>,
98
110
Requires<[hasPTX<60>, hasSM<30>]>;
99
111
def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
100
112
[(int_nvvm_bar_warp_sync i32:$i)]>,
101
113
Requires<[hasPTX<60>, hasSM<30>]>;
102
114
103
- multiclass BARRIER1<string asmstr, Intrinsic intrinsic, list<Predicate> requires = []> {
104
- def _i : BasicNVPTXInst<(outs), (ins i32imm:$i), asmstr,
105
- [(intrinsic imm:$i)]>,
106
- Requires<requires>;
107
-
108
- def _r : BasicNVPTXInst<(outs), (ins Int32Regs:$i), asmstr,
109
- [(intrinsic i32:$i)]>,
110
- Requires<requires>;
111
- }
112
-
113
- multiclass BARRIER2<string asmstr, Intrinsic intrinsic, list<Predicate> requires = []> {
114
- def _rr : BasicNVPTXInst<(outs), (ins Int32Regs:$i, Int32Regs:$j), asmstr,
115
- [(intrinsic i32:$i, i32:$j)]>,
116
- Requires<requires>;
117
-
118
- def _ri : BasicNVPTXInst<(outs), (ins Int32Regs:$i, i32imm:$j), asmstr,
119
- [(intrinsic i32:$i, imm:$j)]>,
120
- Requires<requires>;
121
-
122
- def _ir : BasicNVPTXInst<(outs), (ins i32imm:$i, Int32Regs:$j), asmstr,
123
- [(intrinsic imm:$i, i32:$j)]>,
124
- Requires<requires>;
125
-
126
- def _ii : BasicNVPTXInst<(outs), (ins i32imm:$i, i32imm:$j), asmstr,
127
- [(intrinsic imm:$i, imm:$j)]>,
128
- Requires<requires>;
129
- }
130
-
131
- // Note the "bar.sync" variants could be renamed to the equivalent corresponding
132
- // "barrier.*.aligned" variants. We use the older syntax for compatibility with
133
- // older versions of the PTX ISA.
134
- defm BARRIER_CTA_SYNC_ALIGNED_ALL : BARRIER1<"bar.sync", int_nvvm_barrier_cta_sync_aligned_all>;
135
- defm BARRIER_CTA_SYNC_ALIGNED : BARRIER2<"bar.sync", int_nvvm_barrier_cta_sync_aligned>;
136
- defm BARRIER_CTA_ARRIVE_ALIGNED : BARRIER2<"bar.arrive", int_nvvm_barrier_cta_arrive_aligned>;
115
+ def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
116
+ [(int_nvvm_barrier_sync imm:$i)]>,
117
+ Requires<[hasPTX<60>, hasSM<30>]>;
118
+ def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
119
+ [(int_nvvm_barrier_sync i32:$i)]>,
120
+ Requires<[hasPTX<60>, hasSM<30>]>;
137
121
138
- defm BARRIER_CTA_SYNC_ALL : BARRIER1<"barrier.sync", int_nvvm_barrier_cta_sync_all, [hasPTX<60>]>;
139
- defm BARRIER_CTA_SYNC : BARRIER2<"barrier.sync", int_nvvm_barrier_cta_sync, [hasPTX<60>]>;
140
- defm BARRIER_CTA_ARRIVE : BARRIER2<"barrier.arrive", int_nvvm_barrier_cta_arrive, [hasPTX<60>]>;
122
+ def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
123
+ "barrier.sync \t$id, $cnt;",
124
+ [(int_nvvm_barrier_sync_cnt i32:$id, i32:$cnt)]>,
125
+ Requires<[hasPTX<60>, hasSM<30>]>;
126
+ def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
127
+ "barrier.sync \t$id, $cnt;",
128
+ [(int_nvvm_barrier_sync_cnt i32:$id, imm:$cnt)]>,
129
+ Requires<[hasPTX<60>, hasSM<30>]>;
130
+ def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
131
+ "barrier.sync \t$id, $cnt;",
132
+ [(int_nvvm_barrier_sync_cnt imm:$id, i32:$cnt)]>,
133
+ Requires<[hasPTX<60>, hasSM<30>]>;
134
+ def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
135
+ "barrier.sync \t$id, $cnt;",
136
+ [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
137
+ Requires<[hasPTX<60>, hasSM<30>]>;
141
138
142
139
class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
143
140
list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
0 commit comments