@@ -53,6 +53,9 @@ localparam IMUL_DLY=9-4-2;
5353
5454localparam AGU_PORT_IDX = (PORT_IDX >= NUM_ALUS ) ? (PORT_IDX - NUM_ALUS ) : PORT_IDX ;
5555
56+ localparam AVAIL_SR_LEN = 5 ;
57+
58+ typedef logic [1 : 0 ] AvailWait_t ;
5659typedef struct packed
5760{
5861 logic [IMM_BITS - 1 : 0 ] imm;
@@ -74,98 +77,106 @@ typedef struct packed
7477
7578R_ST_UOp queue[SIZE - 1 : 0 ];
7679
77- reg [$clog2 (SIZE ) : 0 ] insertIndex;
80+ reg [$clog2 (SIZE + 1 ) - 1 : 0 ] insertIndex;
7881reg [32 : 0 ] reservedWBs;
7982
80- reg [NUM_OPERANDS - 1 : 0 ] newAvail[SIZE - 1 : 0 ];
81- reg [NUM_OPERANDS - 1 : 0 ] newAvail_dl[SIZE - 1 : 0 ];
83+ reg [SIZE - 1 : 0 ][NUM_OPERANDS - 1 : 0 ] newAvail_c[AVAIL_SR_LEN - 1 : 0 ];
84+ reg [SIZE - 1 : 0 ][NUM_OPERANDS - 1 : 0 ] newAvail_r[AVAIL_SR_LEN - 1 : 0 ];
85+ always_ff @ (posedge clk) begin
86+ for (integer i = 0 ; i < AVAIL_SR_LEN ; i= i+ 1 ) begin
87+ for (integer j = 0 ; j < SIZE ; j= j+ 1 ) begin
88+ if (deq.valid && collapseMask_c[j] && ! IN_stall && ! IN_branch.taken)
89+ newAvail_r[i][j] <= (j == (SIZE - 1 )) ? '0 : newAvail_c[i][j+ 1 ];
90+ else
91+ newAvail_r[i][j] <= newAvail_c[i][j];
92+ end
93+ end
94+ end
8295
8396always_comb begin
84- for (integer i = 0 ; i < SIZE ; i= i+ 1 ) begin
85-
86- for (integer k = 0 ; k < NUM_OPERANDS ; k= k+ 1 ) begin
87- newAvail[i][k] = 0 ;
88- newAvail_dl[i][k] = 0 ;
89- end
97+ newAvail_c[0 ] = '0 ;
98+ for (integer j = 1 ; j < AVAIL_SR_LEN - 1 ; j= j+ 1 )
99+ newAvail_c[j] = newAvail_r[j+ 1 ];
100+ newAvail_c[AVAIL_SR_LEN - 1 ] = '0 ;
90101
102+ for (integer i = 0 ; i < SIZE ; i= i+ 1 ) begin
91103 for (integer j = 0 ; j < RESULT_BUS_COUNT ; j= j+ 1 ) begin
92104 for (integer k = 0 ; k < NUM_OPERANDS ; k= k+ 1 )
93105 if (IN_flagUOp[j].valid && ! IN_flagUOp[j].tagDst[$bits (Tag)- 1 ] && queue[i].tags[k] == IN_flagUOp[j].tagDst)
94- newAvail [i][k] = 1 ;
106+ newAvail_c[ 0 ] [i][k] = 1 ;
95107 end
96108
97109 for (integer j = 0 ; j < NUM_ALUS ; j= j+ 1 ) begin
98110 if (IN_issueUOps[j].valid && ! IN_issueUOps[j].tagDst[$bits (Tag)- 1 ]) begin
99- if (IN_issueUOps[j].fu == FU_INT || IN_issueUOps[j].fu == FU_BRANCH || IN_issueUOps[j].fu == FU_BITMANIP
100- ) begin
101- for ( integer k = 0 ; k < NUM_OPERANDS ; k = k + 1 )
102- if (queue[i].tags[k] == IN_issueUOps[j].tagDst) newAvail [i][k] = 1 ;
103- end
104- else if (IN_issueUOps[j].fu == FU_FPU || IN_issueUOps[j].fu == FU_FMUL ) begin
105- for ( integer k = 0 ; k < NUM_OPERANDS ; k = k + 1 )
106- if (queue[i].tags[k] == IN_issueUOps[j].tagDst) newAvail_dl[i][k] = 1 ;
107- end
111+ for ( integer k = 0 ; k < NUM_OPERANDS ; k = k + 1 )
112+ if (queue[i].tags[k] == IN_issueUOps[j].tagDst ) begin
113+ case (IN_issueUOps[j].fu )
114+ FU_INT , FU_BRANCH , FU_BITMANIP : newAvail_c[ 0 ] [i][k] = 1 ;
115+ FU_FPU , FU_FMUL : newAvail_c[ 1 ][i][k] = 1 ;
116+ FU_MUL : if (i < insertIndex) newAvail_c[ 4 ][i][k] = 1 ;
117+ default : ;
118+ endcase
119+ end
108120 end
109121 end
110122 end
111123end
112124
125+ logic [NUM_OPERANDS - 1 : 0 ] queueAvail_c[1 : 0 ][SIZE - 1 : 0 ];
126+ always_comb begin
127+ for (integer i = 0 ; i < SIZE ; i= i+ 1 ) begin
128+ queueAvail_c[0 ][i] = queue[i].avail | newAvail_c[0 ][i];
129+ queueAvail_c[1 ][i] = queueAvail_c[0 ][i] | newAvail_c[1 ][i];
130+ end
131+ end
132+
113133// If store data queues wish to defer any op,
114134// we must defer all following ones as well to
115135// maintain ordering.
116- reg defer[NUM_UOPS - 1 : 0 ];
117- always_comb begin
118- defer[0 ] = IN_defer[0 ];
119- for (integer i = 1 ; i < NUM_UOPS ; i= i+ 1 )
120- defer[i] = defer[i- 1 ] | IN_defer[i];
121- end
136+ logic [NUM_UOPS - 1 : 0 ] defer;
137+ PrefixRed # (NUM_UOPS ) deferProp (IN_defer, defer);
122138
123- R_UOp enqCandidates[NUM_ENQUEUE - 1 : 0 ];
139+ // Select enqueue candidates
140+ logic [NUM_UOPS - 1 : 0 ] isBaseCand_c;
124141always_comb begin
125- logic [$clog2 (NUM_ENQUEUE )- 1 : 0 ] idx = 0 ;
126- logic [$clog2 (SIZE ): 0 ] qIdx = insertIndex;
127- logic limit = 0 ;
128-
129- for (integer i = 0 ; i < NUM_ENQUEUE ; i= i+ 1 )
130- enqCandidates[i] = R_UOp'{ valid: 0 , validIQ: 0 , default : 'x } ;
131-
132- for (integer i = 0 ; i < NUM_UOPS ; i= i+ 1 ) begin
133- OUT_stall[i] = 0 ;
134- // check if this is a candidate to enqueue
135- if (IN_uop[i].validIQ[PORT_IDX ] && HasFU (IN_uop[i].fu) &&
136-
142+ for (integer i = 0 ; i < NUM_UOPS ; i++ ) begin
143+ isBaseCand_c[i] = IN_uop[i].validIQ[PORT_IDX ] && HasFU (IN_uop[i].fu) &&
137144 (! (IN_uop[i].fu == FU_AGU && IN_uop[i].opcode < LSU_SC_W ) || (IN_uop[i].loadSqN[0 ] == AGU_PORT_IDX [0 ])) &&
138145 (! (IN_uop[i].fu == FU_AGU && IN_uop[i].opcode >= LSU_SC_W ) || (IN_uop[i].storeSqN[0 ] == AGU_PORT_IDX [0 ])) &&
139146 (! (IN_uop[i].fu == FU_ATOMIC ) || (IN_uop[i].storeSqN[0 ] == AGU_PORT_IDX [0 ])) &&
140147
141148 (PORT_IDX >= NUM_ALUS || IN_uopOrdering[i] == IntUOpOrder_t ' (PORT_IDX )) &&
142149
143150 // Edge Case: INT ports do not enqueue AMOSWAP (no int uop needed)
144- (PORT_IDX >= NUM_ALUS || IN_uop[i].fu != FU_ATOMIC || IN_uop[i].opcode != ATOMIC_AMOSWAP_W )
145- ) begin
146- // check if we have capacity to enqueue this op now
147- if (! limit && qIdx != $bits (qIdx)'(SIZE ) && ! IN_branch.taken && ! defer[i]) begin
148-
149- if (NUM_ENQUEUE == NUM_UOPS )
150- enqCandidates[i] = IN_uop[i];
151- else begin
152- enqCandidates[idx] = IN_uop[i];
153- { limit, idx} = idx + 1 ;
154- end
155-
156- OUT_stall[i] = 0 ;
157- qIdx = qIdx + 1 ;
158- end
159- else OUT_stall[i] = 1 ;
160- end
151+ (PORT_IDX >= NUM_ALUS || IN_uop[i].fu != FU_ATOMIC || IN_uop[i].opcode != ATOMIC_AMOSWAP_W );
161152 end
162153end
154+ logic [$clog2 (NUM_ENQUEUE + 1 )- 1 : 0 ] numAllowedEnq_c;
155+ always_comb begin
156+ // verilator lint_off WIDTHTRUNC
157+ // verilator lint_off WIDTHEXPAND
158+ logic [$clog2 (SIZE + 1 )- 1 : 0 ] diff = SIZE - insertIndex;
159+ numAllowedEnq_c = diff < NUM_ENQUEUE ? diff : NUM_ENQUEUE ;
160+ // verilator lint_on WIDTHTRUNC
161+ // verilator lint_on WIDTHEXPAND
162+ end
163+ R_UOp enqCandidates[NUM_ENQUEUE - 1 : 0 ];
164+ OpDownsample # (NUM_UOPS , NUM_ENQUEUE , $bits(R_UOp)) enqDS
165+ (
166+ .IN_ops (IN_uop),
167+ .IN_opBaseValid (isBaseCand_c),
168+ .IN_opValid (~ (defer | { NUM_UOPS { IN_branch.taken}} )),
169+ .OUT_opStall (OUT_stall),
170+
171+ .IN_dynMaxNumOut (numAllowedEnq_c),
172+ .OUT_ops (enqCandidates)
173+ );
163174
164175reg [SIZE - 1 : 0 ] deqCandidate_c;
165176always_comb begin
166177 for (integer i = 0 ; i < SIZE ; i= i+ 1 ) begin
167178 deqCandidate_c[i] = (i < insertIndex) &&
168- & (queue[i].avail | newAvail [i]) &&
179+ & (queueAvail_c[ 0 ] [i]) &&
169180 (! HasFU (FU_DIV ) || queue[i].fu != FU_DIV || ! IN_doNotIssueDiv) &&
170181 (! HasFU (FU_FDIV ) || queue[i].fu != FU_FDIV || ! IN_doNotIssueFDiv) &&
171182 ! ((queue[i].fu == FU_INT || queue[i].fu == FU_BRANCH || queue[i].fu == FU_BITMANIP ||
@@ -192,21 +203,29 @@ always_comb begin
192203 end
193204end
194205
206+
207+
195208struct packed
196209{
197210 logic [$clog2 (SIZE )- 1 : 0 ] idx;
198211 logic valid;
199212} deq;
200213PriorityEncoder # (SIZE ) penc (deqCandidate_c, '{ deq.idx} , '{ deq.valid} );
201214
215+ logic [SIZE - 1 : 0 ] collapseMask_c;
216+ always_comb begin
217+ for (integer i = 0 ; i < SIZE ; i= i+ 1 )
218+ collapseMask_c[i] = (i >= deq.idx);
219+ end
220+
202221always_ff @ (posedge clk /* or posedge rst*/ ) begin
203222
204223 reg [ID_LEN : 0 ] newInsertIndex = 'x ;
205224
206225 // Update availability
207- for (integer i = 0 ; i < SIZE ; i= i+ 1 ) begin
208- queue[i].avail <= queue[i].avail | newAvail[i] | newAvail_dl [i];
209- end
226+ for (integer i = 0 ; i < SIZE ; i= i+ 1 )
227+ queue[i].avail <= queueAvail_c[ 1 ] [i];
228+
210229 reservedWBs <= { 1'b0 , reservedWBs[32 : 1 ]} ;
211230
212231 if (rst) begin
@@ -222,7 +241,11 @@ always_ff@(posedge clk /*or posedge rst*/) begin
222241 newInsertIndex = 0 ;
223242 // Set insert index to first invalid entry
224243 for (integer i = 0 ; i < SIZE ; i= i+ 1 ) begin
225- if (i < insertIndex && $signed (queue[i].sqN - IN_branch.sqN) <= 0 ) begin
244+ if (i < insertIndex &&
245+ (IN_branch.flush ?
246+ $signed (queue[i].sqN - IN_branch.sqN) < 0 :
247+ $signed (queue[i].sqN - IN_branch.sqN) <= 0 )
248+ ) begin
226249 newInsertIndex = i[$clog2 (SIZE ): 0 ] + 1 ;
227250 end
228251 end
@@ -288,9 +311,9 @@ always_ff@(posedge clk /*or posedge rst*/) begin
288311
289312 // Shift other ops forward
290313 for (integer i = 0 ; i < SIZE - 1 ; i= i+ 1 ) begin
291- if (i >= deq.idx ) begin
314+ if (collapseMask_c[i] ) begin
292315 queue[i] <= queue[i+ 1 ];
293- queue[i].avail <= queue[i + 1 ].avail | newAvail[i + 1 ] | newAvail_dl [i+ 1 ];
316+ queue[i].avail <= queueAvail_c[ 1 ] [i+ 1 ];
294317 end
295318 end
296319 end
0 commit comments