Skip to content

Commit e680967

Browse files
authored
IQ Refactor (#52)
1 parent eea09cd commit e680967

File tree

14 files changed

+193
-137
lines changed

14 files changed

+193
-137
lines changed

Makefile

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@ SLANG_FLAGS = \
2727
SLANG_HEADER_OUTPUT = sim/slang/slang.hpp
2828

2929
SRC_FILES = \
30+
src/lib/PriorityEncoder.sv \
31+
src/lib/OHEncoder.sv \
32+
src/lib/RangeMaskGen.sv \
33+
src/lib/PrefixSum.sv \
34+
src/lib/PrefixRed.sv \
35+
src/lib/OpDownsample.sv \
36+
src/lib/PopCnt.sv \
37+
src/lib/FIFO.sv \
3038
src/Config.sv \
3139
src/Include.sv \
3240
src/InstrDecoder.sv \
@@ -44,8 +52,6 @@ SRC_FILES = \
4452
src/Multiply.sv \
4553
src/Divide.sv \
4654
src/MMIO.sv \
47-
src/LZCnt.sv \
48-
src/PopCnt.sv \
4955
src/BranchSelector.sv \
5056
src/MemRTL.sv \
5157
src/MemRTL2W.sv \
@@ -77,15 +83,11 @@ SRC_FILES = \
7783
src/ExternalAXISim.sv \
7884
src/CacheWriteInterface.sv \
7985
src/CacheReadInterface.sv \
80-
src/FIFO.sv \
8186
src/RegFileRTL.sv \
8287
src/BranchHandler.sv \
83-
src/PriorityEncoder.sv \
8488
src/StoreDataIQ.sv \
8589
src/StoreDataLoad.sv \
8690
src/StoreQueueBackend.sv \
87-
src/OHEncoder.sv \
88-
src/RangeMaskGen.sv \
8991
src/Scheduler.sv \
9092
src/ResultFlagsSplit.sv \
9193
src/InstrAligner.sv \

scripts/test_suite.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
arr.remove(f"{test_dir}/rv32ui-p-ma_data")
1616
arr.remove(f"{test_dir}/rv32ui-v-ma_data")
1717

18-
categories = ["rv32ui", "rv32uc", "rv32si", "rv32mi"]
18+
categories = ["rv32mi", "rv32si", "rv32ui", "rv32um", "rv32uc", "rv32ua", "rv32uzba", "rv32uzbb", "rv32uzbs"]
1919

2020
binary = "./obj_dir/VTop"
2121

src/CacheLineManager.sv

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ end
5252
typedef logic[$clog2(NUM_CT_READS)-1:0] ReadIdx;
5353

5454
ReadIdx readIdx_c;
55-
PriorityEncoder#(NUM_CT_READS, 1) penc(readUnused_c, '{readIdx_c}, '{null});
55+
logic _unusedSignal;
56+
PriorityEncoder#(NUM_CT_READS, 1) penc(readUnused_c, '{readIdx_c}, '{_unusedSignal});
5657
ReadIdx readIdx_r[1:0];
5758
always_ff@(posedge clk) readIdx_r <= {readIdx_r[0], readIdx_c};
5859

src/IntALU.sv

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,18 @@ assign OUT_zcFwd.result = resC;
2929
assign OUT_zcFwd.tag = IN_uop.tagDst;
3030
assign OUT_zcFwd.valid = IN_uop.valid && HasFU(IN_uop.fu) && !IN_uop.tagDst[$bits(Tag)-1];
3131

32-
wire[5:0] resLzTz;
32+
wire[4:0] resLzTz;
33+
wire resLzTzValid;
3334

3435
reg[31:0] srcAbitRev;
3536
always_comb begin
3637
for (integer i = 0; i < 32; i=i+1)
3738
srcAbitRev[i] = srcA[31-i];
3839
end
39-
LZCnt lzc (
40-
.in(IN_uop.opcode == BM_CLZ ? srcA : srcAbitRev),
41-
.out(resLzTz)
40+
PriorityEncoder#(32) lzc(
41+
.IN_data(IN_uop.opcode == BM_CLZ ? srcAbitRev : srcA),
42+
.OUT_idx('{resLzTz}),
43+
.OUT_idxValid('{resLzTzValid})
4244
);
4345

4446
wire[5:0] resPopCnt;
@@ -98,7 +100,7 @@ always_comb begin
98100

99101
FU_BITMANIP: if (HasFU(FU_BITMANIP)) case (IN_uop.opcode)
100102
BM_CLZ,
101-
BM_CTZ: resC = {26'b0, resLzTz};
103+
BM_CTZ: resC = resLzTzValid ? {27'b0, resLzTz} : 32'd32;
102104
BM_CPOP: resC = {26'b0, resPopCnt};
103105
BM_ROL: resC = (srcA << srcB[4:0]) | (srcA >> (32 - srcB[4:0]));
104106
BM_ROR: resC = (srcA >> srcB[4:0]) | (srcA << (32 - srcB[4:0]));

src/IssueQueue.sv

Lines changed: 85 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ localparam IMUL_DLY=9-4-2;
5353

5454
localparam AGU_PORT_IDX = (PORT_IDX >= NUM_ALUS) ? (PORT_IDX - NUM_ALUS) : PORT_IDX;
5555

56+
localparam AVAIL_SR_LEN = 5;
57+
58+
typedef logic[1:0] AvailWait_t;
5659
typedef struct packed
5760
{
5861
logic[IMM_BITS-1:0] imm;
@@ -74,98 +77,106 @@ typedef struct packed
7477

7578
R_ST_UOp queue[SIZE-1:0];
7679

77-
reg[$clog2(SIZE):0] insertIndex;
80+
reg[$clog2(SIZE+1)-1:0] insertIndex;
7881
reg[32:0] reservedWBs;
7982

80-
reg[NUM_OPERANDS-1:0] newAvail[SIZE-1:0];
81-
reg[NUM_OPERANDS-1:0] newAvail_dl[SIZE-1:0];
83+
reg[SIZE-1:0][NUM_OPERANDS-1:0] newAvail_c[AVAIL_SR_LEN-1:0];
84+
reg[SIZE-1:0][NUM_OPERANDS-1:0] newAvail_r[AVAIL_SR_LEN-1:0];
85+
always_ff@(posedge clk) begin
86+
for (integer i = 0; i < AVAIL_SR_LEN; i=i+1) begin
87+
for (integer j = 0; j < SIZE; j=j+1) begin
88+
if (deq.valid && collapseMask_c[j] && !IN_stall && !IN_branch.taken)
89+
newAvail_r[i][j] <= (j == (SIZE-1)) ? '0 : newAvail_c[i][j+1];
90+
else
91+
newAvail_r[i][j] <= newAvail_c[i][j];
92+
end
93+
end
94+
end
8295

8396
always_comb begin
84-
for (integer i = 0; i < SIZE; i=i+1) begin
85-
86-
for (integer k = 0; k < NUM_OPERANDS; k=k+1) begin
87-
newAvail[i][k] = 0;
88-
newAvail_dl[i][k] = 0;
89-
end
97+
newAvail_c[0] = '0;
98+
for (integer j = 1; j < AVAIL_SR_LEN-1; j=j+1)
99+
newAvail_c[j] = newAvail_r[j+1];
100+
newAvail_c[AVAIL_SR_LEN-1] = '0;
90101

102+
for (integer i = 0; i < SIZE; i=i+1) begin
91103
for (integer j = 0; j < RESULT_BUS_COUNT; j=j+1) begin
92104
for (integer k = 0; k < NUM_OPERANDS; k=k+1)
93105
if (IN_flagUOp[j].valid && !IN_flagUOp[j].tagDst[$bits(Tag)-1] && queue[i].tags[k] == IN_flagUOp[j].tagDst)
94-
newAvail[i][k] = 1;
106+
newAvail_c[0][i][k] = 1;
95107
end
96108

97109
for (integer j = 0; j < NUM_ALUS; j=j+1) begin
98110
if (IN_issueUOps[j].valid && !IN_issueUOps[j].tagDst[$bits(Tag)-1]) begin
99-
if (IN_issueUOps[j].fu == FU_INT || IN_issueUOps[j].fu == FU_BRANCH || IN_issueUOps[j].fu == FU_BITMANIP
100-
) begin
101-
for (integer k = 0; k < NUM_OPERANDS; k=k+1)
102-
if (queue[i].tags[k] == IN_issueUOps[j].tagDst) newAvail[i][k] = 1;
103-
end
104-
else if (IN_issueUOps[j].fu == FU_FPU || IN_issueUOps[j].fu == FU_FMUL) begin
105-
for (integer k = 0; k < NUM_OPERANDS; k=k+1)
106-
if (queue[i].tags[k] == IN_issueUOps[j].tagDst) newAvail_dl[i][k] = 1;
107-
end
111+
for (integer k = 0; k < NUM_OPERANDS; k=k+1)
112+
if (queue[i].tags[k] == IN_issueUOps[j].tagDst) begin
113+
case (IN_issueUOps[j].fu)
114+
FU_INT, FU_BRANCH, FU_BITMANIP: newAvail_c[0][i][k] = 1;
115+
FU_FPU, FU_FMUL: newAvail_c[1][i][k] = 1;
116+
FU_MUL: if (i < insertIndex) newAvail_c[4][i][k] = 1;
117+
default: ;
118+
endcase
119+
end
108120
end
109121
end
110122
end
111123
end
112124

125+
logic[NUM_OPERANDS-1:0] queueAvail_c[1:0][SIZE-1:0];
126+
always_comb begin
127+
for (integer i = 0; i < SIZE; i=i+1) begin
128+
queueAvail_c[0][i] = queue[i].avail | newAvail_c[0][i];
129+
queueAvail_c[1][i] = queueAvail_c[0][i] | newAvail_c[1][i];
130+
end
131+
end
132+
113133
// If store data queues wish to defer any op,
114134
// we must defer all following ones as well to
115135
// maintain ordering.
116-
reg defer[NUM_UOPS-1:0];
117-
always_comb begin
118-
defer[0] = IN_defer[0];
119-
for (integer i = 1; i < NUM_UOPS; i=i+1)
120-
defer[i] = defer[i-1] | IN_defer[i];
121-
end
136+
logic[NUM_UOPS-1:0] defer;
137+
PrefixRed#(NUM_UOPS) deferProp(IN_defer, defer);
122138

123-
R_UOp enqCandidates[NUM_ENQUEUE-1:0];
139+
// Select enqueue candidates
140+
logic[NUM_UOPS-1:0] isBaseCand_c;
124141
always_comb begin
125-
logic[$clog2(NUM_ENQUEUE)-1:0] idx = 0;
126-
logic[$clog2(SIZE):0] qIdx = insertIndex;
127-
logic limit = 0;
128-
129-
for (integer i = 0; i < NUM_ENQUEUE; i=i+1)
130-
enqCandidates[i] = R_UOp'{valid: 0, validIQ: 0, default: 'x};
131-
132-
for (integer i = 0; i < NUM_UOPS; i=i+1) begin
133-
OUT_stall[i] = 0;
134-
// check if this is a candidate to enqueue
135-
if (IN_uop[i].validIQ[PORT_IDX] && HasFU(IN_uop[i].fu) &&
136-
142+
for (integer i = 0; i < NUM_UOPS; i++) begin
143+
isBaseCand_c[i] = IN_uop[i].validIQ[PORT_IDX] && HasFU(IN_uop[i].fu) &&
137144
(!(IN_uop[i].fu == FU_AGU && IN_uop[i].opcode < LSU_SC_W) || (IN_uop[i].loadSqN[0] == AGU_PORT_IDX[0])) &&
138145
(!(IN_uop[i].fu == FU_AGU && IN_uop[i].opcode >= LSU_SC_W) || (IN_uop[i].storeSqN[0] == AGU_PORT_IDX[0])) &&
139146
(!(IN_uop[i].fu == FU_ATOMIC) || (IN_uop[i].storeSqN[0] == AGU_PORT_IDX[0])) &&
140147

141148
(PORT_IDX >= NUM_ALUS || IN_uopOrdering[i] == IntUOpOrder_t'(PORT_IDX)) &&
142149

143150
// Edge Case: INT ports do not enqueue AMOSWAP (no int uop needed)
144-
(PORT_IDX >= NUM_ALUS || IN_uop[i].fu != FU_ATOMIC || IN_uop[i].opcode != ATOMIC_AMOSWAP_W)
145-
) begin
146-
// check if we have capacity to enqueue this op now
147-
if (!limit && qIdx != $bits(qIdx)'(SIZE) && !IN_branch.taken && !defer[i]) begin
148-
149-
if (NUM_ENQUEUE == NUM_UOPS)
150-
enqCandidates[i] = IN_uop[i];
151-
else begin
152-
enqCandidates[idx] = IN_uop[i];
153-
{limit, idx} = idx + 1;
154-
end
155-
156-
OUT_stall[i] = 0;
157-
qIdx = qIdx + 1;
158-
end
159-
else OUT_stall[i] = 1;
160-
end
151+
(PORT_IDX >= NUM_ALUS || IN_uop[i].fu != FU_ATOMIC || IN_uop[i].opcode != ATOMIC_AMOSWAP_W);
161152
end
162153
end
154+
logic[$clog2(NUM_ENQUEUE+1)-1:0] numAllowedEnq_c;
155+
always_comb begin
156+
// verilator lint_off WIDTHTRUNC
157+
// verilator lint_off WIDTHEXPAND
158+
logic[$clog2(SIZE+1)-1:0] diff = SIZE - insertIndex;
159+
numAllowedEnq_c = diff < NUM_ENQUEUE ? diff : NUM_ENQUEUE;
160+
// verilator lint_on WIDTHTRUNC
161+
// verilator lint_on WIDTHEXPAND
162+
end
163+
R_UOp enqCandidates[NUM_ENQUEUE-1:0];
164+
OpDownsample#(NUM_UOPS, NUM_ENQUEUE, $bits(R_UOp)) enqDS
165+
(
166+
.IN_ops(IN_uop),
167+
.IN_opBaseValid(isBaseCand_c),
168+
.IN_opValid(~(defer | {NUM_UOPS{IN_branch.taken}})),
169+
.OUT_opStall(OUT_stall),
170+
171+
.IN_dynMaxNumOut(numAllowedEnq_c),
172+
.OUT_ops(enqCandidates)
173+
);
163174

164175
reg[SIZE-1:0] deqCandidate_c;
165176
always_comb begin
166177
for (integer i = 0; i < SIZE; i=i+1) begin
167178
deqCandidate_c[i] = (i < insertIndex) &&
168-
&(queue[i].avail | newAvail[i]) &&
179+
&(queueAvail_c[0][i]) &&
169180
(!HasFU(FU_DIV) || queue[i].fu != FU_DIV || !IN_doNotIssueDiv) &&
170181
(!HasFU(FU_FDIV) || queue[i].fu != FU_FDIV || !IN_doNotIssueFDiv) &&
171182
!((queue[i].fu == FU_INT || queue[i].fu == FU_BRANCH || queue[i].fu == FU_BITMANIP ||
@@ -192,21 +203,29 @@ always_comb begin
192203
end
193204
end
194205

206+
207+
195208
struct packed
196209
{
197210
logic[$clog2(SIZE)-1:0] idx;
198211
logic valid;
199212
} deq;
200213
PriorityEncoder #(SIZE) penc(deqCandidate_c, '{deq.idx}, '{deq.valid});
201214

215+
logic[SIZE-1:0] collapseMask_c;
216+
always_comb begin
217+
for (integer i = 0; i < SIZE; i=i+1)
218+
collapseMask_c[i] = (i >= deq.idx);
219+
end
220+
202221
always_ff@(posedge clk /*or posedge rst*/) begin
203222

204223
reg[ID_LEN:0] newInsertIndex = 'x;
205224

206225
// Update availability
207-
for (integer i = 0; i < SIZE; i=i+1) begin
208-
queue[i].avail <= queue[i].avail | newAvail[i] | newAvail_dl[i];
209-
end
226+
for (integer i = 0; i < SIZE; i=i+1)
227+
queue[i].avail <= queueAvail_c[1][i];
228+
210229
reservedWBs <= {1'b0, reservedWBs[32:1]};
211230

212231
if (rst) begin
@@ -222,7 +241,11 @@ always_ff@(posedge clk /*or posedge rst*/) begin
222241
newInsertIndex = 0;
223242
// Set insert index to first invalid entry
224243
for (integer i = 0; i < SIZE; i=i+1) begin
225-
if (i < insertIndex && $signed(queue[i].sqN - IN_branch.sqN) <= 0) begin
244+
if (i < insertIndex &&
245+
(IN_branch.flush ?
246+
$signed(queue[i].sqN - IN_branch.sqN) < 0 :
247+
$signed(queue[i].sqN - IN_branch.sqN) <= 0)
248+
) begin
226249
newInsertIndex = i[$clog2(SIZE):0] + 1;
227250
end
228251
end
@@ -288,9 +311,9 @@ always_ff@(posedge clk /*or posedge rst*/) begin
288311

289312
// Shift other ops forward
290313
for (integer i = 0; i < SIZE-1; i=i+1) begin
291-
if (i >= deq.idx) begin
314+
if (collapseMask_c[i]) begin
292315
queue[i] <= queue[i+1];
293-
queue[i].avail <= queue[i+1].avail | newAvail[i+1] | newAvail_dl[i+1];
316+
queue[i].avail <= queueAvail_c[1][i+1];
294317
end
295318
end
296319
end

src/LZCnt.sv

Lines changed: 0 additions & 62 deletions
This file was deleted.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)