|
| 1 | +package treecorel3 |
| 2 | + |
| 3 | +import chisel._ |
| 4 | +import chisel.uitl._ |
| 5 | + |
| 6 | +class CacheReq(implicit p: Parameters) extends Bundle { |
| 7 | + val addr = UInt(xlen.W) |
| 8 | + val data = UInt(xlen.W) |
| 9 | + val mask = UInt((xlen / 8).W) |
| 10 | +} |
| 11 | + |
| 12 | +class CacheResp(implicit p: Parameters) extends Bundle { |
| 13 | + val data = UInt(xlen.W) |
| 14 | +} |
| 15 | + |
| 16 | +class CacheIO(implicit val p: Parameters) extends Bundle { |
| 17 | + val abort = Input(Bool()) |
| 18 | + val req = Flipped(Valid(new CacheReq)) |
| 19 | + val resp = Valid(new CacheResp) |
| 20 | +} |
| 21 | + |
| 22 | +class CacheModuleIO(implicit val p: Parameters) extends Bundle { |
| 23 | + val cpu = new CacheIO |
| 24 | + val nasti = new NastiIO |
| 25 | +} |
| 26 | + |
| 27 | +trait CacheParams extends CoreParams with HasNastiParameters { |
| 28 | + val nWays = p(NWays) // Not used... |
| 29 | + val nSets = p(NSets) |
| 30 | + val bBytes = p(CacheBlockBytes) |
| 31 | + val bBits = bBytes << 3 |
| 32 | + val blen = log2Ceil(bBytes) |
| 33 | + val slen = log2Ceil(nSets) |
| 34 | + val tlen = xlen - (slen + blen) |
| 35 | + val nWords = bBits / xlen |
| 36 | + val wBytes = xlen / 8 |
| 37 | + val byteOffsetBits = log2Ceil(wBytes) |
| 38 | + val dataBeats = bBits / nastiXDataBits |
| 39 | +} |
| 40 | + |
| 41 | +class MetaData(implicit val p: Parameters) extends Bundle with CacheParams { |
| 42 | + val tag = UInt(tlen.W) |
| 43 | +} |
| 44 | + |
| 45 | +class Cache(implicit val p: Parameters) extends Module with CacheParams { |
| 46 | + import Chisel._ // FIXME: read enable signals for memories are broken by new chisel |
| 47 | + val io = IO(new CacheModuleIO) |
| 48 | + // cache states |
| 49 | + val (s_IDLE :: s_READ_CACHE :: s_WRITE_CACHE :: s_WRITE_BACK :: s_WRITE_ACK :: |
| 50 | + s_REFILL_READY :: s_REFILL :: Nil) = Enum(7) |
| 51 | + val state = RegInit(s_IDLE) |
| 52 | + // memory |
| 53 | + val v = RegInit(0.U(nSets.W)) |
| 54 | + val d = RegInit(0.U(nSets.W)) |
| 55 | + val metaMem = SeqMem(nSets, new MetaData) |
| 56 | + val dataMem = Seq.fill(nWords)(SeqMem(nSets, Vec(wBytes, UInt(8.W)))) |
| 57 | + |
| 58 | + val addr_reg = Reg(io.cpu.req.bits.addr.cloneType) |
| 59 | + val cpu_data = Reg(io.cpu.req.bits.data.cloneType) |
| 60 | + val cpu_mask = Reg(io.cpu.req.bits.mask.cloneType) |
| 61 | + |
| 62 | + // Counters |
| 63 | + require(dataBeats > 0) |
| 64 | + val (read_count, read_wrap_out) = Counter(io.nasti.r.fire(), dataBeats) |
| 65 | + val (write_count, write_wrap_out) = Counter(io.nasti.w.fire(), dataBeats) |
| 66 | + |
| 67 | + val is_idle = state === s_IDLE |
| 68 | + val is_read = state === s_READ_CACHE |
| 69 | + val is_write = state === s_WRITE_CACHE |
| 70 | + val is_alloc = state === s_REFILL && read_wrap_out |
| 71 | + val is_alloc_reg = RegNext(is_alloc) |
| 72 | + |
| 73 | + val hit = Wire(Bool()) |
| 74 | + val wen = is_write && (hit || is_alloc_reg) && !io.cpu.abort || is_alloc |
| 75 | + val ren = !wen && (is_idle || is_read) && io.cpu.req.valid |
| 76 | + val ren_reg = RegNext(ren) |
| 77 | + |
| 78 | + val addr = io.cpu.req.bits.addr |
| 79 | + val idx = addr(slen + blen - 1, blen) |
| 80 | + val tag_reg = addr_reg(xlen - 1, slen + blen) |
| 81 | + val idx_reg = addr_reg(slen + blen - 1, blen) |
| 82 | + val off_reg = addr_reg(blen - 1, byteOffsetBits) |
| 83 | + |
| 84 | + val rmeta = metaMem.read(idx, ren) |
| 85 | + val rdata = Cat((dataMem.map(_.read(idx, ren).asUInt)).reverse) |
| 86 | + val rdata_buf = RegEnable(rdata, ren_reg) |
| 87 | + val refill_buf = Reg(Vec(dataBeats, UInt(nastiXDataBits.W))) |
| 88 | + val read = Mux(is_alloc_reg, refill_buf.asUInt, Mux(ren_reg, rdata, rdata_buf)) |
| 89 | + |
| 90 | + hit := v(idx_reg) && rmeta.tag === tag_reg // NOTE: important!!!Q |
| 91 | + |
| 92 | + // Read Mux |
| 93 | + io.cpu.resp.bits.data := Vec.tabulate(nWords)(i => read((i + 1) * xlen - 1, i * xlen))(off_reg) |
| 94 | + io.cpu.resp.valid := is_idle || is_read && hit || is_alloc_reg && !cpu_mask.orR |
| 95 | + |
| 96 | + when(io.cpu.resp.valid) { |
| 97 | + addr_reg := addr |
| 98 | + cpu_data := io.cpu.req.bits.data |
| 99 | + cpu_mask := io.cpu.req.bits.mask |
| 100 | + } |
| 101 | + |
| 102 | + val wmeta = Wire(new MetaData) |
| 103 | + wmeta.tag := tag_reg |
| 104 | + |
| 105 | + val wmask = Mux(!is_alloc, (cpu_mask << Cat(off_reg, 0.U(byteOffsetBits.W))).zext, SInt(-1)) |
| 106 | + val wdata = Mux( |
| 107 | + !is_alloc, |
| 108 | + Fill(nWords, cpu_data), |
| 109 | + if (refill_buf.size == 1) io.nasti.r.bits.data |
| 110 | + else Cat(io.nasti.r.bits.data, Cat(refill_buf.init.reverse)) |
| 111 | + ) |
| 112 | + when(wen) { |
| 113 | + v := v.bitSet(idx_reg, true.B) |
| 114 | + d := d.bitSet(idx_reg, !is_alloc) |
| 115 | + when(is_alloc) { |
| 116 | + metaMem.write(idx_reg, wmeta) |
| 117 | + } |
| 118 | + dataMem.zipWithIndex.foreach { |
| 119 | + case (mem, i) => |
| 120 | + val data = Vec.tabulate(wBytes)(k => wdata(i * xlen + (k + 1) * 8 - 1, i * xlen + k * 8)) |
| 121 | + mem.write(idx_reg, data, wmask((i + 1) * wBytes - 1, i * wBytes).toBools) |
| 122 | + mem.suggestName(s"dataMem_${i}") |
| 123 | + } |
| 124 | + } |
| 125 | + |
| 126 | + io.nasti.ar.bits := NastiReadAddressChannel(0.U, Cat(tag_reg, idx_reg) << blen.U, log2Up(nastiXDataBits / 8).U, (dataBeats - 1).U) |
| 127 | + io.nasti.ar.valid := false.B |
| 128 | + // read data |
| 129 | + io.nasti.r.ready := state === s_REFILL |
| 130 | + when(io.nasti.r.fire()) { refill_buf(read_count) := io.nasti.r.bits.data } |
| 131 | + |
| 132 | + // write addr |
| 133 | + io.nasti.aw.bits := NastiWriteAddressChannel(0.U, Cat(rmeta.tag, idx_reg) << blen.U, log2Up(nastiXDataBits / 8).U, (dataBeats - 1).U) |
| 134 | + io.nasti.aw.valid := false.B |
| 135 | + // write data |
| 136 | + io.nasti.w.bits := NastiWriteDataChannel(Vec.tabulate(dataBeats)(i => read((i + 1) * nastiXDataBits - 1, i * nastiXDataBits))(write_count), None, write_wrap_out) |
| 137 | + io.nasti.w.valid := false.B |
| 138 | + // write resp |
| 139 | + io.nasti.b.ready := false.B |
| 140 | + |
| 141 | + // Cache FSM |
| 142 | + val is_dirty = v(idx_reg) && d(idx_reg) |
| 143 | + switch(state) { |
| 144 | + is(s_IDLE) { |
| 145 | + when(io.cpu.req.valid) { |
| 146 | + state := Mux(io.cpu.req.bits.mask.orR, s_WRITE_CACHE, s_READ_CACHE) |
| 147 | + } |
| 148 | + } |
| 149 | + is(s_READ_CACHE) { |
| 150 | + when(hit) { |
| 151 | + when(io.cpu.req.valid) { |
| 152 | + state := Mux(io.cpu.req.bits.mask.orR, s_WRITE_CACHE, s_READ_CACHE) |
| 153 | + }.otherwise { |
| 154 | + state := s_IDLE |
| 155 | + } |
| 156 | + }.otherwise { |
| 157 | + io.nasti.aw.valid := is_dirty |
| 158 | + io.nasti.ar.valid := !is_dirty |
| 159 | + when(io.nasti.aw.fire()) { |
| 160 | + state := s_WRITE_BACK |
| 161 | + }.elsewhen(io.nasti.ar.fire()) { |
| 162 | + state := s_REFILL |
| 163 | + } |
| 164 | + } |
| 165 | + } |
| 166 | + is(s_WRITE_CACHE) { |
| 167 | + when(hit || is_alloc_reg || io.cpu.abort) { |
| 168 | + state := s_IDLE |
| 169 | + }.otherwise { |
| 170 | + io.nasti.aw.valid := is_dirty |
| 171 | + io.nasti.ar.valid := !is_dirty |
| 172 | + when(io.nasti.aw.fire()) { |
| 173 | + state := s_WRITE_BACK |
| 174 | + }.elsewhen(io.nasti.ar.fire()) { |
| 175 | + state := s_REFILL |
| 176 | + } |
| 177 | + } |
| 178 | + } |
| 179 | + is(s_WRITE_BACK) { |
| 180 | + io.nasti.w.valid := true.B |
| 181 | + when(write_wrap_out) { |
| 182 | + state := s_WRITE_ACK |
| 183 | + } |
| 184 | + } |
| 185 | + is(s_WRITE_ACK) { |
| 186 | + io.nasti.b.ready := true.B |
| 187 | + when(io.nasti.b.fire()) { |
| 188 | + state := s_REFILL_READY |
| 189 | + } |
| 190 | + } |
| 191 | + is(s_REFILL_READY) { |
| 192 | + io.nasti.ar.valid := true.B |
| 193 | + when(io.nasti.ar.fire()) { |
| 194 | + state := s_REFILL |
| 195 | + } |
| 196 | + } |
| 197 | + is(s_REFILL) { |
| 198 | + when(read_wrap_out) { |
| 199 | + state := Mux(cpu_mask.orR, s_WRITE_CACHE, s_IDLE) |
| 200 | + } |
| 201 | + } |
| 202 | + } |
| 203 | +} |
0 commit comments