77module multi_warp_dispatcher import bgpu_pkg :: * ; # (
88 // / Number of instructions to fetch for the warp
99 parameter int unsigned FetchWidth = 1 ,
10+ // / Number of instructions to dispatch simultaneously
11+ // Each warp dispatches atmost one instruction per cycle -> saves complexity in dispatcher
12+ // but multiple warps can dispatch simultaneously
13+ parameter int unsigned DispatchWidth = 1 ,
1014 // / Number of instructions that can write back simultaneously
1115 parameter int unsigned WritebackWidth = 1 ,
1216 // / Number of inflight instructions per warp
@@ -73,19 +77,19 @@ module multi_warp_dispatcher import bgpu_pkg::*; #(
7377 input op_reg_idx_t [FetchWidth- 1 : 0 ] dec_operands_i,
7478
7579 // / To Operand Collector
76- input logic opc_ready_i,
77- output logic disp_valid_o,
78- output iid_t disp_tag_o,
79- output pc_t disp_pc_o,
80- output act_mask_t disp_act_mask_o,
81- output inst_t disp_inst_o,
82- output reg_idx_t disp_dst_o,
83- output op_is_reg_t disp_operands_is_reg_o,
84- output op_reg_idx_t disp_operands_o,
80+ input logic [DispatchWidth - 1 : 0 ] opc_ready_i,
81+ output logic [DispatchWidth - 1 : 0 ] disp_valid_o,
82+ output iid_t [DispatchWidth - 1 : 0 ] disp_tag_o,
83+ output pc_t [DispatchWidth - 1 : 0 ] disp_pc_o,
84+ output act_mask_t [DispatchWidth - 1 : 0 ] disp_act_mask_o,
85+ output inst_t [DispatchWidth - 1 : 0 ] disp_inst_o,
86+ output reg_idx_t [DispatchWidth - 1 : 0 ] disp_dst_o,
87+ output op_is_reg_t [DispatchWidth - 1 : 0 ] disp_operands_is_reg_o,
88+ output op_reg_idx_t [DispatchWidth - 1 : 0 ] disp_operands_o,
8589
8690 // / From Operand Collector -> instruction has read its operands
87- input logic opc_eu_handshake_i,
88- input iid_t opc_eu_tag_i,
91+ input logic [DispatchWidth - 1 : 0 ] opc_eu_handshake_i,
92+ input iid_t [DispatchWidth - 1 : 0 ] opc_eu_tag_i,
8993
9094 // / From Execution Units
9195 input logic [WritebackWidth- 1 : 0 ] eu_valid_i,
@@ -120,18 +124,20 @@ module multi_warp_dispatcher import bgpu_pkg::*; #(
120124 tag_t [WritebackWidth- 1 : 0 ] eu_tag;
121125
122126 // Round Robin Arbiter
123- warp_mask_t arb_gnt;
124- warp_mask_t rr_inst_ready;
127+ warp_mask_t arb_gnts;
128+ warp_mask_t [DispatchWidth- 1 : 0 ] arb_gnt;
129+ warp_mask_t [DispatchWidth- 1 : 0 ] rr_inst_ready;
125130
126- wid_t arb_sel_wid;
131+ wid_t [DispatchWidth- 1 : 0 ] arb_sel_wid;
132+ disp_data_t [DispatchWidth- 1 : 0 ] arb_sel_data;
127133 disp_data_t [NumWarps- 1 : 0 ] arb_in_data;
128- disp_data_t arb_sel_data;
129134
130135 // Decoded Demultiplexer
131136 fetch_mask_t [NumWarps- 1 : 0 ] dec_decoded_unused_ibe;
132137
133138 // OPC EU Handshake Demultiplexer
134139 warp_mask_t opc_eu_handshake_warp;
140+ tag_t [NumWarps- 1 : 0 ] opc_eu_tag;
135141
136142 // #######################################################################################
137143 // # Dispatcher per warp #
@@ -175,14 +181,26 @@ module multi_warp_dispatcher import bgpu_pkg::*; #(
175181 // OPC EU Handshake Demultiplexer
176182 always_comb begin
177183 opc_eu_handshake_warp = '0 ;
178- opc_eu_handshake_warp[opc_eu_tag_i[WidWidth- 1 : 0 ]] = opc_eu_handshake_i;
184+ opc_eu_tag = '0 ;
185+ for (int didx = 0 ; didx < DispatchWidth; didx++ ) begin
186+ opc_eu_handshake_warp[opc_eu_tag_i[didx][WidWidth- 1 : 0 ]] = opc_eu_handshake_i[didx];
187+ opc_eu_tag[opc_eu_tag_i[didx][WidWidth- 1 : 0 ]] = opc_eu_tag_i[didx][WidWidth+ : TagWidth];
188+ end
179189 end
180190
181191 // Extract EU Tags
182192 for (genvar wb = 0 ; wb < WritebackWidth; wb++ ) begin : gen_eu_tags
183193 assign eu_tag[wb] = eu_tag_i[wb][WidWidth+ : TagWidth];
184194 end : gen_eu_tags
185195
196+ // Combine all arbiter grants
197+ always_comb begin
198+ arb_gnts = '0 ;
199+ for (int didx = 0 ; didx < DispatchWidth; didx++ ) begin
200+ arb_gnts | = arb_gnt[didx];
201+ end
202+ end
203+
186204 // Dispatcher per Warp
187205 for (genvar warp = 0 ; warp < NumWarps; warp++ ) begin : gen_dispatcher
188206 dispatcher # (
@@ -216,8 +234,8 @@ module multi_warp_dispatcher import bgpu_pkg::*; #(
216234 .dec_operands_is_reg_i ( dec_operands_is_reg_i ),
217235 .dec_operands_i ( dec_operands_i ),
218236
219- .opc_ready_i ( arb_gnt [warp] ),
220- .disp_valid_o ( rr_inst_ready[warp] ),
237+ .opc_ready_i ( arb_gnts [warp] ),
238+ .disp_valid_o ( rr_inst_ready[0 ][ warp] ),
221239 .disp_pc_o ( arb_in_data [warp].pc ),
222240 .disp_act_mask_o ( arb_in_data [warp].act_mask ),
223241 .disp_tag_o ( arb_in_data [warp].tag ),
@@ -226,8 +244,8 @@ module multi_warp_dispatcher import bgpu_pkg::*; #(
226244 .disp_operands_is_reg_o ( arb_in_data [warp].operands_is_reg ),
227245 .disp_operands_o ( arb_in_data [warp].operands ),
228246
229- .opc_eu_handshake_i ( opc_eu_handshake_warp[warp] ),
230- .opc_eu_tag_i ( opc_eu_tag_i[WidWidth + : TagWidth ] ),
247+ .opc_eu_handshake_i ( opc_eu_handshake_warp[warp] ),
248+ .opc_eu_tag_i ( opc_eu_tag [warp ] ),
231249
232250 .eu_valid_i ( eu_valid[warp] ),
233251 .eu_tag_i ( eu_tag )
@@ -238,38 +256,70 @@ module multi_warp_dispatcher import bgpu_pkg::*; #(
238256 // # Round Robin Arbiter #
239257 // #######################################################################################
240258
241- rr_arb_tree # (
242- .DataType ( disp_data_t ),
243- .NumIn ( NumWarps ),
244- .ExtPrio ( 1'b0 ),
245- .AxiVldRdy ( 1'b0 ),
246- .LockIn ( 1'b0 ),
247- .FairArb ( 1'b1 )
248- ) i_rr_arb (
249- .clk_i ( clk_i ),
250- .rst_ni ( rst_ni ),
251-
252- .req_i ( rr_inst_ready ),
253- .gnt_o ( arb_gnt ),
254- .data_i ( arb_in_data ),
255-
256- // Directly to Operand Collector
257- .req_o ( disp_valid_o ),
258- .gnt_i ( opc_ready_i ),
259- .data_o ( arb_sel_data ),
260- .idx_o ( arb_sel_wid ),
261-
262- // Unused
263- .flush_i ( 1'b0 ),
264- .rr_i ( '0 )
265- );
266-
267- assign disp_tag_o = { arb_sel_data.tag, arb_sel_wid} ;
268- assign disp_pc_o = arb_sel_data.pc;
269- assign disp_act_mask_o = arb_sel_data.act_mask;
270- assign disp_inst_o = arb_sel_data.inst;
271- assign disp_dst_o = arb_sel_data.dst_reg;
272- assign disp_operands_is_reg_o = arb_sel_data.operands_is_reg;
273- assign disp_operands_o = arb_sel_data.operands;
259+ for (genvar didx = 0 ; didx < DispatchWidth; didx++ ) begin : gen_rr_arb
260+ if (didx > 0 ) begin : gen_upper_rr_inst_ready
261+ assign rr_inst_ready[didx] = rr_inst_ready[didx- 1 ] & (~ arb_gnt[didx- 1 ]);
262+ end : gen_upper_rr_inst_ready
263+
264+ rr_arb_tree # (
265+ .DataType ( disp_data_t ),
266+ .NumIn ( NumWarps ),
267+ .ExtPrio ( 1'b0 ),
268+ .AxiVldRdy ( 1'b0 ),
269+ .LockIn ( 1'b0 ),
270+ .FairArb ( 1'b1 )
271+ ) i_rr_arb (
272+ .clk_i ( clk_i ),
273+ .rst_ni ( rst_ni ),
274+
275+ .req_i ( rr_inst_ready[didx] ),
276+ .gnt_o ( arb_gnt [didx] ),
277+ .data_i ( arb_in_data ),
278+
279+ // Directly to Operand Collector
280+ .req_o ( disp_valid_o[didx] ),
281+ .gnt_i ( opc_ready_i [didx] ),
282+ .data_o ( arb_sel_data[didx] ),
283+ .idx_o ( arb_sel_wid [didx] ),
284+
285+ // Unused
286+ .flush_i ( 1'b0 ),
287+ .rr_i ( '0 )
288+ );
289+
290+ assign disp_tag_o [didx] = { arb_sel_data[didx].tag, arb_sel_wid[didx]} ;
291+ assign disp_pc_o [didx] = arb_sel_data[didx].pc;
292+ assign disp_act_mask_o [didx] = arb_sel_data[didx].act_mask;
293+ assign disp_inst_o [didx] = arb_sel_data[didx].inst;
294+ assign disp_dst_o [didx] = arb_sel_data[didx].dst_reg;
295+ assign disp_operands_is_reg_o[didx] = arb_sel_data[didx].operands_is_reg;
296+ assign disp_operands_o [didx] = arb_sel_data[didx].operands;
297+ end : gen_rr_arb
298+
299+ // #######################################################################################
300+ // # Assertions #
301+ // #######################################################################################
302+
303+ `ifndef SYNTHESIS
304+ for (genvar didx = 0 ; didx < DispatchWidth; didx++ ) begin : gen_out_asserts
305+ for (genvar other_didx = 0 ; other_didx < DispatchWidth; other_didx++ )
306+ begin : gen_out_asserts_inner
307+ if (didx != other_didx) begin : gen_diff_didx
308+ // Check for OPC EU Handshake for the same warp received on multiple dispatch outputs
309+ assert property (@ (posedge clk_i) disable iff (! rst_ni)
310+ (opc_eu_handshake_i[didx] && opc_eu_handshake_i[other_didx]
311+ - > opc_eu_tag_i[didx][WidWidth- 1 : 0 ]
312+ != opc_eu_tag_i[other_didx][WidWidth- 1 : 0 ]))
313+ else $error (" OPC EU Handshake for the same warp received!" );
314+
315+ // Check that no two dispatch outputs dispatch to the same warp in the same cycle
316+ assert property (@ (posedge clk_i) disable iff (! rst_ni)
317+ (disp_valid_o[didx] && disp_valid_o[other_didx]
318+ - > arb_gnt[didx] != arb_gnt[other_didx]))
319+ else $error (" Two outputs dispatching to the same warp in the same cycle!" );
320+ end : gen_diff_didx
321+ end : gen_out_asserts_inner
322+ end : gen_out_asserts
323+ `endif // SYNTHESIS
274324
275325endmodule : multi_warp_dispatcher
0 commit comments