Skip to content

Commit 252c3e5

Browse files
committed
add ICache miss calculation in PMU
1 parent 438d3c2 commit 252c3e5

File tree

5 files changed

+92
-32
lines changed

5 files changed

+92
-32
lines changed

GorgonMeducer.perf_counter.pdsc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@
364364
-->
365365

366366
<components>
367-
<component Cclass="Utilities" Cversion="2.5.4" Cgroup="perf_counter" Csub="Core" Cvariant="Source" isDefaultVariant="true" condition="CMSIS-CORE">
367+
<component Cclass="Utilities" Cversion="2.5.5-dev" Cgroup="perf_counter" Csub="Core" Cvariant="Source" isDefaultVariant="true" condition="CMSIS-CORE">
368368
<description>A dedicated performance counter for Cortex-M systick.</description>
369369
<files>
370370
<file category="preIncludeGlobal" name="perfc_common.h" condition="No GCC"/>
@@ -386,7 +386,7 @@
386386
</Pre_Include_Global_h>
387387
</component>
388388

389-
<component Cclass="Utilities" Cversion="2.5.4" Cgroup="perf_counter" Csub="Core" Cvariant="SourceNoWrapper" isDefaultVariant="false" condition="CMSIS-CORE">
389+
<component Cclass="Utilities" Cversion="2.5.5-dev" Cgroup="perf_counter" Csub="Core" Cvariant="SourceNoWrapper" isDefaultVariant="false" condition="CMSIS-CORE">
390390
<description>A dedicated performance counter for Cortex-M systick.</description>
391391
<files>
392392
<file category="preIncludeGlobal" name="perfc_common.h" condition="No GCC"/>
@@ -399,7 +399,7 @@
399399
#define __PERF_COUNTER__ 1
400400
</RTE_Components_h>
401401
</component>
402-
<component Cclass="Utilities" Cversion="1.3.0" Cgroup="perf_counter" Csub="Porting" Cvariant="User Defined" isDefaultVariant="false" condition="perf_counter">
402+
<component Cclass="Utilities" Cgroup="perf_counter" Csub="Porting" Cvariant="User Defined" Cversion="1.3.0" isDefaultVariant="false" condition="perf_counter">
403403
<description>A user define system timer</description>
404404
<files>
405405
<file category="sourceC" name="template/perfc_port_user.c" attr="config" version="1.3.0"/>
@@ -419,7 +419,7 @@
419419
</Pre_Include_Global_h>
420420
</component>
421421

422-
<component Cclass="Utilities" Cversion="1.3.0" Cgroup="perf_counter" Csub="Porting" Cvariant="PMU" isDefaultVariant="true" condition="PMU Devices">
422+
<component Cclass="Utilities" Cgroup="perf_counter" Csub="Porting" Cvariant="PMU" Cversion="1.4.0" isDefaultVariant="true" condition="PMU Devices">
423423
<description>Using the Performande Monitor Unit</description>
424424
<files>
425425
<file category="sourceC" name="perfc_port_pmu.c"/>
@@ -463,7 +463,7 @@
463463
</Pre_Include_Global_h>
464464
</component>
465465

466-
<component Cclass="Utilities" Cversion="2.5.4" Cgroup="perf_counter" Csub="RTX5 Patch" condition="RTX5 Patch">
466+
<component Cclass="Utilities" Cversion="2.5.5-dev" Cgroup="perf_counter" Csub="RTX5 Patch" condition="RTX5 Patch">
467467
<description>A Patch for RTX5</description>
468468
<files>
469469
<file category="source" name="os/perf_os_patch_rtx5.c"/>
@@ -475,7 +475,7 @@
475475
</Pre_Include_Global_h>
476476
</component>
477477

478-
<component Cclass="Utilities" Cversion="2.5.4" Cgroup="perf_counter" Csub="FreeRTOS Patch" condition="perf_counter">
478+
<component Cclass="Utilities" Cversion="2.5.5-dev" Cgroup="perf_counter" Csub="FreeRTOS Patch" condition="perf_counter">
479479
<description>A Patch for FreeRTOS</description>
480480
<files>
481481
<file category="sourceC" name="os/perf_os_patch_freertos.c"/>
@@ -498,7 +498,7 @@ extern void __freertos_evr_on_task_switched_in(void *ptTCB, unsigned int uxTopPr
498498
</Pre_Include_Global_h>
499499
</component>
500500

501-
<component Cclass="Utilities" Cversion="2.5.4" Cgroup="perf_counter" Csub="RT-Thread Patch" condition="perf_counter">
501+
<component Cclass="Utilities" Cversion="2.5.5-dev" Cgroup="perf_counter" Csub="RT-Thread Patch" condition="perf_counter">
502502
<description>A Patch for RT-Thread</description>
503503
<files>
504504
<file category="sourceC" name="os/perf_os_patch_rt_thread.c"/>
@@ -516,7 +516,7 @@ extern void __rt_thread_scheduler_hook(struct rt_thread *from, struct rt_thread
516516
</Pre_Include_Global_h>
517517
</component>
518518

519-
<component Cclass="Utilities" Cversion="2.5.4" Cgroup="perf_counter" Csub="ThreadX Patch" condition="perf_counter">
519+
<component Cclass="Utilities" Cversion="2.5.5-dev" Cgroup="perf_counter" Csub="ThreadX Patch" condition="perf_counter">
520520
<description>A Patch for ThreadX</description>
521521
<files>
522522
<file category="sourceC" name="os/perf_os_patch_threadx.c"/>

example/example.uvoptx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,7 @@
752752

753753
<Group>
754754
<GroupName>perf_counter_lib</GroupName>
755-
<tvExp>0</tvExp>
755+
<tvExp>1</tvExp>
756756
<tvExpOptDlg>0</tvExpOptDlg>
757757
<cbSel>0</cbSel>
758758
<RteFlg>0</RteFlg>

perf_counter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ extern "C" {
4848
*/
4949
#define __PERF_COUNTER_VER_MAJOR__ 2
5050
#define __PERF_COUNTER_VER_MINOR__ 5
51-
#define __PERF_COUNTER_VER_REVISE__ 4
51+
#define __PERF_COUNTER_VER_REVISE__ 5
5252

53-
#define __PERF_COUNTER_VER_STR__ ""
53+
#define __PERF_COUNTER_VER_STR__ "dev"
5454

5555
#define __PER_COUNTER_VER__ (__PERF_COUNTER_VER_MAJOR__ * 10000ul \
5656
+__PERF_COUNTER_VER_MINOR__ * 100ul \

perfc_port_pmu.c

Lines changed: 62 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1419,13 +1419,15 @@ enum {
14191419
PMU_CNT_INSTRUCTION = 0,
14201420
PMU_CNT_MEM_ACCESS = 1,
14211421
PMU_CNT_L1_DCACHE_REFILL = 2,
1422+
PMU_CNT_L1_ICACHE_REFILL = 3,
1423+
__PMU_CNT_DEFAULT_START,
14221424
};
14231425

14241426
/*============================ GLOBAL VARIABLES ==============================*/
14251427
/*============================ LOCAL VARIABLES ===============================*/
14261428

14271429
#if __PMU_NUM_EVENTCNT >= 6
1428-
# define __COUNTER_NUM__ (3 + (__PMU_NUM_EVENTCNT - 6))
1430+
# define __COUNTER_NUM__ (4 + (__PMU_NUM_EVENTCNT - 6))
14291431
#elif __PMU_NUM_EVENTCNT > 2
14301432
# define __COUNTER_NUM__ (__PMU_NUM_EVENTCNT >> 1)
14311433
#else
@@ -1508,23 +1510,30 @@ void perfc_port_pmu_insert_to_debug_monitor_handler(void)
15081510
if (chCounter < 6) {
15091511
break ;
15101512
}
1513+
if (PMU->OVSCLR & PMU_OVSCLR_CNT4_STATUS_Msk) {
1514+
PMU->OVSCLR = PMU_OVSCLR_CNT4_STATUS_Msk;
1515+
1516+
s_dwEventCounter[PMU_CNT_L1_DCACHE_REFILL] += (uint32_t)0x10000;
1517+
}
15111518
if (PMU->OVSCLR & PMU_OVSCLR_CNT5_STATUS_Msk) {
15121519
PMU->OVSCLR = PMU_OVSCLR_CNT5_STATUS_Msk;
15131520

1514-
s_dwEventCounter[PMU_CNT_L1_DCACHE_REFILL] += (uint64_t)0x100000000;
1521+
s_dwEventCounter[PMU_CNT_L1_ICACHE_REFILL] += (uint32_t)0x10000;
15151522
}
15161523

15171524
if (chCounter > 6) {
15181525

1526+
int_fast8_t chCountIndex = __PMU_CNT_DEFAULT_START;
15191527
for (uint_fast8_t n = 6; n < chCounter; n++) {
15201528
uint32_t wMask = (1<<n);
15211529
if (PMU->OVSSET & wMask) {
15221530
/* counter overflow is detected */
15231531
PMU->OVSCLR = wMask; /* clear Overflow Flag */
15241532

15251533
/* update corresponding counter */
1526-
s_dwEventCounter[n - 3] += 0x10000;
1534+
s_dwEventCounter[chCountIndex] += 0x10000;
15271535
}
1536+
chCountIndex++;
15281537
}
15291538
}
15301539
} while(0);
@@ -1591,22 +1600,34 @@ bool perfc_port_init_system_timer(bool bIsTimeOccupied)
15911600
PMU->CNTENSET = PMU_CNTENSET_CNT2_ENABLE_Msk
15921601
| PMU_CNTENSET_CNT3_ENABLE_Msk;
15931602
}
1603+
1604+
if (chCounter >= 5) {
1605+
/* 32bit counter for L1 DCache Refill */
1606+
PMU->EVTYPER[4] = ARM_PMU_L1D_CACHE_REFILL;
1607+
1608+
/* clear counter 4 overflow flag */
1609+
PMU->OVSCLR = PMU_OVSCLR_CNT4_STATUS_Msk;
1610+
1611+
/* enable counter 4 interrupt */
1612+
PMU->INTENSET = PMU_INTENSET_CNT4_ENABLE_Msk;
1613+
1614+
/* enable counter 4 */
1615+
PMU->CNTENSET = PMU_CNTENSET_CNT4_ENABLE_Msk;
1616+
1617+
}
15941618

15951619
if (chCounter >= 6) {
1596-
/* 32bit counter for all Data memory Accesses */
1597-
PMU->EVTYPER[4] = ARM_PMU_L1D_CACHE_REFILL;
1598-
PMU->EVTYPER[5] = ARM_PMU_CHAIN;
1620+
/* 32bit counter for L1 ICache Refill */
1621+
PMU->EVTYPER[5] = ARM_PMU_L1I_CACHE_REFILL;
15991622

1600-
/* clear counter 4/5 overflow flag */
1601-
PMU->OVSCLR = PMU_OVSCLR_CNT4_STATUS_Msk
1602-
| PMU_OVSCLR_CNT5_STATUS_Msk;
1623+
/* clear counter 5 overflow flag */
1624+
PMU->OVSCLR = PMU_OVSCLR_CNT5_STATUS_Msk;
16031625

16041626
/* enable counter 5 interrupt */
16051627
PMU->INTENSET = PMU_INTENSET_CNT5_ENABLE_Msk;
16061628

1607-
/* enable counter 4/5 */
1608-
PMU->CNTENSET = PMU_CNTENSET_CNT4_ENABLE_Msk
1609-
| PMU_CNTENSET_CNT5_ENABLE_Msk;
1629+
/* enable counter 5 */
1630+
PMU->CNTENSET = PMU_CNTENSET_CNT5_ENABLE_Msk;
16101631

16111632
}
16121633

@@ -1616,7 +1637,7 @@ bool perfc_port_init_system_timer(bool bIsTimeOccupied)
16161637

16171638
PMU->OVSCLR = wMask; /* clear overflow flag */
16181639
PMU->INTENSET = wMask; /* enable interrupt */
1619-
PMU->CNTENSET = wMask; /* enable counter */
1640+
//PMU->CNTENSET = wMask; /* enable counter */
16201641
}
16211642
}
16221643

@@ -1696,23 +1717,44 @@ uint64_t perfc_pmu_get_memory_access_count(void)
16961717

16971718
uint64_t perfc_pmu_get_L1_dcache_refill_count(void)
16981719
{
1699-
uint32_t wHigh16, wLow16;
1720+
uint32_t wLow16;
17001721
uint64_t dwResult;
17011722
bool bIsOverflow = false;
17021723

17031724
__IRQ_SAFE {
1704-
do {
1705-
wHigh16 = PMU->EVCNTR[5];
1706-
wLow16 = PMU->EVCNTR[4];
1707-
} while(wHigh16 < PMU->EVCNTR[5]);
1725+
wLow16 = PMU->EVCNTR[4];
17081726
dwResult = s_dwEventCounter[PMU_CNT_L1_DCACHE_REFILL];
1727+
bIsOverflow = (0 != (PMU->OVSCLR & PMU_OVSCLR_CNT4_STATUS_Msk));
1728+
}
1729+
1730+
dwResult += wLow16;
1731+
1732+
if (bIsOverflow) {
1733+
dwResult += (uint64_t)1<<16;
1734+
}
1735+
1736+
/* force to disable DWT */
1737+
DWT->CTRL = 0;
1738+
1739+
return dwResult;
1740+
}
1741+
1742+
uint64_t perfc_pmu_get_L1_icache_refill_count(void)
1743+
{
1744+
uint32_t wLow16;
1745+
uint64_t dwResult;
1746+
bool bIsOverflow = false;
1747+
1748+
__IRQ_SAFE {
1749+
wLow16 = PMU->EVCNTR[5];
1750+
dwResult = s_dwEventCounter[PMU_CNT_L1_ICACHE_REFILL];
17091751
bIsOverflow = (0 != (PMU->OVSCLR & PMU_OVSCLR_CNT5_STATUS_Msk));
17101752
}
17111753

1712-
dwResult += wLow16 | (wHigh16 << 16);
1754+
dwResult += wLow16;
17131755

17141756
if (bIsOverflow) {
1715-
dwResult += (uint64_t)1<<32;
1757+
dwResult += (uint64_t)1<<16;
17161758
}
17171759

17181760
/* force to disable DWT */

perfc_port_pmu.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,13 @@
3434
uint64_t dwNoInstr; \
3535
uint64_t dwNoMemAccess; \
3636
uint64_t dwNoL1DCacheRefill; \
37+
uint64_t dwNoL1ICacheRefill; \
3738
int64_t lCycles; \
3839
uint32_t wInstrCalib; \
3940
uint32_t wMemAccessCalib; \
4041
float fCPI; \
4142
float fDCacheMissRate; \
43+
float fICacheMissRate; \
4244
} __PERF_INFO__ = {0}, \
4345
({ \
4446
__PERF_INFO__.dwNoInstr = perfc_pmu_get_instruction_count(); \
@@ -49,6 +51,8 @@
4951
- __PERF_INFO__.dwNoMemAccess; \
5052
__PERF_INFO__.dwNoL1DCacheRefill \
5153
= perfc_pmu_get_L1_dcache_refill_count(); \
54+
__PERF_INFO__.dwNoL1ICacheRefill \
55+
= perfc_pmu_get_L1_icache_refill_count(); \
5256
__PERF_INFO__.dwNoInstr = perfc_pmu_get_instruction_count(); \
5357
__PERF_INFO__.dwNoMemAccess = perfc_pmu_get_memory_access_count(); \
5458
}), \
@@ -62,12 +66,19 @@
6266
__PERF_INFO__.dwNoL1DCacheRefill \
6367
= perfc_pmu_get_L1_dcache_refill_count() \
6468
- __PERF_INFO__.dwNoL1DCacheRefill; \
69+
__PERF_INFO__.dwNoL1ICacheRefill \
70+
= perfc_pmu_get_L1_icache_refill_count() \
71+
- __PERF_INFO__.dwNoL1ICacheRefill; \
6572
\
6673
__PERF_INFO__.fDCacheMissRate \
6774
= (float)( (double)__PERF_INFO__.dwNoL1DCacheRefill \
6875
/ (double)__PERF_INFO__.dwNoMemAccess) \
6976
* 100.0f; \
7077
\
78+
__PERF_INFO__.fICacheMissRate \
79+
= (float)( (double)__PERF_INFO__.dwNoL1ICacheRefill \
80+
/ (double)__PERF_INFO__.dwNoInstr) \
81+
* 100.0f; \
7182
__PERF_INFO__.fCPI = (float)( (double)__PERF_INFO__.lCycles \
7283
/ (double)__PERF_INFO__.dwNoInstr); \
7384
if (__PLOOC_VA_NUM_ARGS(__VA_ARGS__) == 0) { \
@@ -80,14 +91,18 @@
8091
"Memory Access Count: %"PRIi64"\r\n" \
8192
"L1 DCache Refill Count: %"PRIi64"\r\n" \
8293
"L1 DCache Miss Rate: %3.4f %% \r\n" \
94+
"L1 ICache Refill Count: %"PRIi64"\r\n" \
95+
"L1 ICache Miss Rate: %3.4f %% \r\n" \
8396
, \
8497
(__str), \
8598
__PERF_INFO__.dwNoInstr, \
8699
__PERF_INFO__.lCycles, \
87100
(double)__PERF_INFO__.fCPI, \
88101
__PERF_INFO__.dwNoMemAccess, \
89102
__PERF_INFO__.dwNoL1DCacheRefill, \
90-
(double)__PERF_INFO__.fDCacheMissRate \
103+
(double)__PERF_INFO__.fDCacheMissRate, \
104+
__PERF_INFO__.dwNoL1ICacheRefill, \
105+
(double)__PERF_INFO__.fICacheMissRate \
91106
); \
92107
} else { \
93108
__VA_ARGS__ \
@@ -185,6 +200,9 @@ uint64_t perfc_pmu_get_memory_access_count(void);
185200
extern
186201
uint64_t perfc_pmu_get_L1_dcache_refill_count(void);
187202

203+
extern
204+
uint64_t perfc_pmu_get_L1_icache_refill_count(void);
205+
188206
/*============================ IMPLEMENTATION ================================*/
189207

190208
__STATIC_INLINE

0 commit comments

Comments
 (0)