@@ -21,16 +21,18 @@ import chisel3._
2121import chisel3 .util ._
2222import coupledL2 .prefetch .PfSource
2323import coupledL2 .utils ._
24- import coupledL2 .tl2tl .MSHRStatus
2524import utility ._
2625
2726// TODO: Accommodate CHI
2827class TopDownMonitor ()(implicit p : Parameters ) extends L2Module {
2928 val banks = 1 << bankBits
3029 val io = IO (new Bundle () {
3130 val dirResult = Vec (banks, Flipped (ValidIO (new DirResult )))
32- val msStatus = Vec (banks, Vec (mshrsAll, Flipped (ValidIO (new MSHRStatus ))))
33- val latePF = Vec (banks, Flipped (ValidIO (UInt (PfSource .pfSourceBits.W ))))
31+ val msStatus = Vec (banks, Vec (mshrsAll, Flipped (ValidIO (new MSHRStatus ))))
32+ val msAlloc = Vec (banks, Vec (mshrsAll, Flipped (ValidIO (new MSHRAllocStatus ))))
33+ val hitPfInMSHR = Vec (banks, Flipped (ValidIO (UInt (PfSource .pfSourceBits.W ))))
34+ val pfSent = Vec (banks, Flipped (ValidIO (UInt (MemReqSource .reqSourceBits.W ))))
35+ val pfLateInMSHR = Vec (banks, Flipped (ValidIO (UInt (MemReqSource .reqSourceBits.W ))))
3436 val debugTopDown = new Bundle {
3537 val robTrueCommit = Input (UInt (64 .W ))
3638 val robHeadPaddr = Flipped (Valid (UInt (36 .W )))
@@ -55,28 +57,34 @@ class TopDownMonitor()(implicit p: Parameters) extends L2Module {
5557 }
5658
5759 io.debugTopDown.l2MissMatch := Cat (addrMatchVec.flatten).orR
58- XSPerfAccumulate (s " ${cacheParams.name}MissMatch " , io.debugTopDown.l2MissMatch)
60+ XSPerfAccumulate (s " RobBlockBy ${cacheParams.name}Miss " , io.debugTopDown.l2MissMatch)
5961
6062 /* ====== PART TWO ======
6163 * Count the parallel misses, and divide them into CPU/Prefetch
6264 */
63- def allMSHRMatchVec ( cond : MSHRStatus => Bool ): IndexedSeq [Bool ] = {
64- io.msStatus.zipWithIndex. flatMap {
65- case ( slice, i) =>
65+ def allValidMatchVec [ T <: Data ]( vec : Vec [ Vec [ ValidIO [ T ]]])( cond : T => Bool ): IndexedSeq [Bool ] = {
66+ vec. flatMap{
67+ case slice =>
6668 slice.map {
6769 ms => ms.valid && cond(ms.bits)
6870 }
6971 }
7072 }
7173
72- val missVecCPU = allMSHRMatchVec(s => s.fromA && s.is_miss && ! s.is_prefetch)
73- val missVecPref = allMSHRMatchVec(s => s.fromA && s.is_miss && s.is_prefetch)
74- // val missVecAll = allMSHRMatchVec(s => s.fromA && s.is_miss)
75-
74+ val missVecCPU = allValidMatchVec(io.msStatus)(s => s.fromA && s.is_miss && ! s.is_prefetch)
75+ val missVecPref = allValidMatchVec(io.msStatus)(s => s.fromA && s.is_miss && s.is_prefetch)
76+ // val missVecAll = allValidMatchVec(io.msStatus)(s => s.fromA && s.is_miss)
7677 val totalMSHRs = banks * mshrsAll
77- XSPerfHistogram (" parallel_misses_CPU" , PopCount (missVecCPU), true .B , 0 , totalMSHRs, 1 )
78- XSPerfHistogram (" parallel_misses_Pref" , PopCount (missVecPref), true .B , 0 , totalMSHRs, 1 )
79- XSPerfHistogram (" parallel_misses_All" , PopCount (missVecCPU)+ PopCount (missVecPref), true .B , 0 , 32 , 1 )
78+ XSPerfHistogram (" mshr_cycles_CPU" , PopCount (missVecCPU), true .B , 0 , totalMSHRs, 1 )
79+ XSPerfHistogram (" mshr_cycles_Prefetch" , PopCount (missVecPref), true .B , 0 , totalMSHRs, 1 )
80+ XSPerfHistogram (" mshr_cycles_All" , PopCount (missVecCPU)+ PopCount (missVecPref), true .B , 0 , totalMSHRs, 1 )
81+
82+ // count the miss times
83+ val missCountCPU = allValidMatchVec(io.msAlloc)(s => s.fromA && s.is_miss && ! s.is_prefetch)
84+ val missCountPref = allValidMatchVec(io.msAlloc)(s => s.fromA && s.is_miss && s.is_prefetch)
85+ XSPerfAccumulate (" mshr_count_CPU" , PopCount (missCountCPU))
86+ XSPerfAccumulate (" mshr_count_Prefetch" , PopCount (missCountPref))
87+ XSPerfAccumulate (" mshr_count_All" , PopCount (missCountCPU) + PopCount (missCountPref))
8088
8189 /* ====== PART THREE ======
8290 * Distinguish req sources and count num & miss
@@ -92,11 +100,6 @@ class TopDownMonitor()(implicit p: Parameters) extends L2Module {
92100 }
93101 }
94102
95- def reqFromCPU (r : DirResult ): Bool = {
96- r.replacerInfo.reqSource === MemReqSource .CPULoadData .id.U ||
97- r.replacerInfo.reqSource === MemReqSource .CPUStoreData .id.U
98- }
99-
100103 for (i <- 0 until MemReqSource .ReqSourceCount .id) {
101104 val sourceMatchVec = dirResultMatchVec(r => r.replacerInfo.reqSource === i.U )
102105 val sourceMatchVecMiss = dirResultMatchVec(r => r.replacerInfo.reqSource === i.U && ! r.hit)
@@ -120,43 +123,62 @@ class TopDownMonitor()(implicit p: Parameters) extends L2Module {
120123 )
121124
122125 // sent/useful vector
123- val l2prefetchSentVec = pfTypes.map { case (_, reqSrc, _) => dirResultMatchVec(r => r.replacerInfo.reqSource === reqSrc) }
124- val l2prefetchUsefulVec = pfTypes.map { case (_, _, pfSrc) =>
125- dirResultMatchVec(r => reqFromCPU(r) && r.hit &&
126+ val l2pfSentVec = pfTypes.map { case (_, reqSrc, _) => io.pfSent.map(r => r.valid && r.bits === reqSrc) }
127+ val l2pfSentToPipeVec = pfTypes.map { case (_, reqSrc, _) => dirResultMatchVec(r => r.replacerInfo.reqSource === reqSrc) }
128+ val l2hitPfInCacheVec = pfTypes.map { case (_, _, pfSrc) =>
129+ dirResultMatchVec(r => MemReqSource .isCPUReq(r.replacerInfo.reqSource) && r.hit &&
126130 r.meta.prefetch.getOrElse(false .B ) && r.meta.prefetchSrc.getOrElse(PfSource .NoWhere .id.U ) === pfSrc)
127131 }
128- val l2prefetchLateVec = pfTypes.map { case (_, _, pfSrc) =>
129- io.latePF .map(r => r.valid && r.bits === pfSrc)
132+ val l2hitPfInMSHRVec = pfTypes.map { case (_, _, pfSrc) =>
133+ io.hitPfInMSHR .map(r => r.valid && r.bits === pfSrc)
130134 }
131-
132- // to summary
133- val l2prefetchSent = dirResultMatchVec(
134- r => MemReqSource .isL2Prefetch(r.replacerInfo.reqSource)
135+ val l2pfLateInCache = pfTypes.map { case (_, reqSrc, _) =>
136+ dirResultMatchVec(r => MemReqSource .isL2Prefetch(r.replacerInfo.reqSource) && r.hit &&
137+ ! r.meta.prefetch.getOrElse(false .B ) && r.replacerInfo.reqSource === reqSrc)
138+ }
139+ val l2pfLateInMSHR = pfTypes.map { case (_, reqSrc, _) =>
140+ io.pfLateInMSHR.map(r => r.valid && r.bits === reqSrc)
141+ }
142+ val l2hitPfVec = l2hitPfInCacheVec.zip(l2hitPfInMSHRVec).map { case (c, m) => PopCount (c) + PopCount (m) }
143+ val l2pfLateVec = l2pfLateInCache.zip(l2pfLateInMSHR).map { case (c, m) => PopCount (c) + PopCount (m) }
144+ val l2demandMiss = dirResultMatchVec(
145+ r => MemReqSource .isCPUReq(r.replacerInfo.reqSource) && ! r.hit
135146 )
136- val l2prefetchUseful = dirResultMatchVec(
137- r => reqFromCPU(r ) && r.hit && r.meta.prefetch.getOrElse( false . B )
147+ val l2prefetchMiss = dirResultMatchVec(
148+ r => MemReqSource .isL2Prefetch(r.replacerInfo.reqSource ) && ! r.hit
138149 )
139- val l2demandMiss = dirResultMatchVec(
140- r => reqFromCPU(r ) && ! r.hit
150+ val l1prefetchMiss = dirResultMatchVec(
151+ r => MemReqSource .isL1Prefetch(r.replacerInfo.reqSource ) && ! r.hit
141152 )
142- val l2prefetchLate = io.latePF.map(_.valid)
143- // TODO: get difference prefetchSrc for detailed analysis
144- // FIXME lyq: it's abnormal l2prefetchLate / l2prefetchUseful is more than 1
145153
146154 // PF Accuracy/Coverage/Late Accumulate/Rolling
147- XSPerfAccumulate (" l2prefetchSent" , PopCount (l2prefetchSent))
148- XSPerfAccumulate (" l2prefetchUseful" , PopCount (l2prefetchUseful))
149155 XSPerfAccumulate (" l2demandMiss" , PopCount (l2demandMiss))
150- XSPerfAccumulate (" l2prefetchLate" , PopCount (l2prefetchLate))
151- XSPerfRolling (" L2PrefetchAccuracy" , PopCount (l2prefetchUseful), PopCount (l2prefetchSent), 1000 , io.debugTopDown.robTrueCommit, clock, reset)
152- XSPerfRolling (" L2PrefetchCoverage" , PopCount (l2prefetchUseful), PopCount (l2prefetchUseful) + PopCount (l2demandMiss), 1000 , io.debugTopDown.robTrueCommit, clock, reset)
153- XSPerfRolling (" L2PrefetchLate" , PopCount (l2prefetchLate), PopCount (l2prefetchUseful), 1000 , io.debugTopDown.robTrueCommit, clock, reset)
154- for ((name, _, _, sent, useful, late) <- pfTypes zip l2prefetchSentVec zip l2prefetchUsefulVec zip l2prefetchLateVec map { case (((a, b), c), d) => (a._1, a._2, a._3, b, c, d) }) {
155- XSPerfAccumulate (s " l2prefetchSent $name" , PopCount (sent))
156- XSPerfAccumulate (s " l2prefetchUseful $name" , PopCount (useful))
157- XSPerfAccumulate (s " l2prefetchLate $name" , PopCount (late))
158- XSPerfRolling (s " L2PrefetchAccuracy $name" , PopCount (useful), PopCount (sent), 1000 , io.debugTopDown.robTrueCommit, clock, reset)
159- XSPerfRolling (s " L2PrefetchCoverage $name" , PopCount (useful), PopCount (useful) + PopCount (l2demandMiss), 1000 , io.debugTopDown.robTrueCommit, clock, reset)
156+ XSPerfAccumulate (" l1prefetchMiss" , PopCount (l1prefetchMiss))
157+ XSPerfAccumulate (" l2prefetchMiss" , PopCount (l2prefetchMiss))
158+ XSPerfAccumulate (" l2prefetchSent" , PopCount (l2pfSentVec.flatten))
159+ XSPerfAccumulate (" l2prefetchSentToPipe" , PopCount (l2pfSentToPipeVec.flatten))
160+ XSPerfAccumulate (" l2prefetchHit" , l2hitPfVec.reduce(_ + _))
161+ XSPerfAccumulate (" l2prefetchHitInCache" , PopCount (l2hitPfInCacheVec.flatten))
162+ XSPerfAccumulate (" l2prefetchHitInMSHR" , PopCount (l2hitPfInMSHRVec.flatten))
163+ XSPerfAccumulate (" l2prefetchLate" , l2pfLateVec.reduce(_ + _))
164+ XSPerfAccumulate (" l2prefetchLateInCache" , PopCount (l2pfLateInCache.flatten))
165+ XSPerfAccumulate (" l2prefetchLateInMSHR" , PopCount (l2pfLateInMSHR.flatten))
166+ XSPerfRolling (" L2PrefetchAccuracy" , l2hitPfVec.reduce(_ + _), PopCount (l2pfSentVec.flatten), 1000 , io.debugTopDown.robTrueCommit, clock, reset)
167+ XSPerfRolling (" L2PrefetchLate" , l2pfLateVec.reduce(_ + _), PopCount (l2pfSentVec.flatten), 1000 , io.debugTopDown.robTrueCommit, clock, reset)
168+ XSPerfRolling (" L2PrefetchCoverage" , l2hitPfVec.reduce(_ + _), l2hitPfVec.reduce(_ + _) + PopCount (l2demandMiss), 1000 , io.debugTopDown.robTrueCommit, clock, reset)
169+ for ((x, i) <- pfTypes.zipWithIndex) {
170+ val name = x._1
171+ XSPerfAccumulate (s " l2prefetchSent $name" , PopCount (l2pfSentVec(i)))
172+ XSPerfAccumulate (s " l2prefetchSentToPipe $name" , PopCount (l2pfSentToPipeVec(i)))
173+ XSPerfAccumulate (s " l2prefetchHit $name" , l2hitPfVec(i))
174+ XSPerfAccumulate (s " l2prefetchHitInCache $name" , PopCount (l2hitPfInCacheVec(i)))
175+ XSPerfAccumulate (s " l2prefetchHitInMSHR $name" , PopCount (l2hitPfInMSHRVec(i)))
176+ XSPerfAccumulate (s " l2prefetchLate $name" , l2pfLateVec(i))
177+ XSPerfAccumulate (s " l2prefetchLateInCache $name" , PopCount (l2pfLateInCache(i)))
178+ XSPerfAccumulate (s " l2prefetchLateInMSHR $name" , PopCount (l2pfLateInMSHR(i)))
179+ XSPerfRolling (s " L2PrefetchAccuracy $name" , l2hitPfVec(i), PopCount (l2pfSentVec(i)), 1000 , io.debugTopDown.robTrueCommit, clock, reset)
180+ XSPerfRolling (s " L2PrefetchLate $name" , l2pfLateVec(i), PopCount (l2pfSentVec(i)), 1000 , io.debugTopDown.robTrueCommit, clock, reset)
181+ XSPerfRolling (s " L2PrefetchCoverage $name" , l2hitPfVec(i), l2hitPfVec(i) + PopCount (l2demandMiss), 1000 , io.debugTopDown.robTrueCommit, clock, reset)
160182 }
161183
162184}
0 commit comments