|
| 1 | +/* |
| 2 | + * gaudi2_events.h - Gaudi2 SPMU event definitions |
| 3 | + * |
| 4 | + * Event IDs from hl-prof-config -c gaudi2 --spmu-help |
| 5 | + * SPMU base addresses from gaudi2_blocks.h |
| 6 | + */ |
| 7 | + |
| 8 | +#ifndef _GAUDI2_EVENTS_H |
| 9 | +#define _GAUDI2_EVENTS_H |
| 10 | + |
| 11 | +typedef enum { |
| 12 | + GAUDI2_ENGINE_TPC = 0, |
| 13 | + GAUDI2_ENGINE_MME, |
| 14 | + GAUDI2_ENGINE_EDMA, |
| 15 | + GAUDI2_ENGINE_PDMA, |
| 16 | + GAUDI2_ENGINE_ROTATOR, |
| 17 | + GAUDI2_ENGINE_NIC, |
| 18 | + GAUDI2_ENGINE_HBM, |
| 19 | + GAUDI2_ENGINE_HMMU, |
| 20 | + GAUDI2_ENGINE_MAX |
| 21 | +} gaudi2_engine_type_t; |
| 22 | + |
| 23 | +/* Instance counts */ |
| 24 | +#define GAUDI2_NUM_DCORES 4 |
| 25 | +#define GAUDI2_TPC_PER_DCORE 6 /* 24 TPCs total */ |
| 26 | +#define GAUDI2_EDMA_PER_DCORE 2 /* 8 EDMAs total */ |
| 27 | +#define GAUDI2_MME_PER_DCORE 1 /* 4 MMEs total */ |
| 28 | +#define GAUDI2_HMMU_PER_DCORE 4 /* 16 HMMUs total */ |
| 29 | +#define GAUDI2_NUM_PDMA 2 |
| 30 | +#define GAUDI2_NUM_ROTATOR 2 |
| 31 | +#define GAUDI2_NUM_NIC 12 |
| 32 | +#define GAUDI2_NUM_HBM 6 |
| 33 | +#define GAUDI2_NUM_HBM_MC 2 /* 2 memory controllers per HBM */ |
| 34 | + |
| 35 | +#define GAUDI2_MAX_SPMU_COUNTERS 6 |
| 36 | + |
| 37 | +/* TPC SPMU events (81 events) */ |
| 38 | +typedef enum { |
| 39 | + TPC_SPMU_MEMORY2SB_BP = 0, |
| 40 | + TPC_SPMU_SB2MEMORY_BP = 1, |
| 41 | + TPC_SPMU_PQ_NOT_EMPTY_BUT_CQ_EMPTY = 2, |
| 42 | + TPC_SPMU_QM_PREFETCH_BUFFER_EMPTY = 3, |
| 43 | + TPC_SPMU_SB_2_CORE_BP = 4, |
| 44 | + TPC_SPMU_SB_2_CORE_BP_SB_FULL = 5, |
| 45 | + TPC_SPMU_SB_2_CORE_BP_SB_MEMORY = 6, |
| 46 | + TPC_SPMU_SB_2_CORE_BP_SB_LD_TNSR_FIFO_FULL = 7, |
| 47 | + TPC_SPMU_WB2CORE_BP = 8, |
| 48 | + TPC_SPMU_STALL_ON_ICACHE_MISS = 9, |
| 49 | + TPC_SPMU_STALL_ON_DCACHE_MISS = 10, |
| 50 | + TPC_SPMU_STALL_ON_POP_FROM_SB = 11, |
| 51 | + TPC_SPMU_STALL_ON_LOOKUP_CACHE_MISS = 12, |
| 52 | + TPC_SPMU_STALL_ON_IRQ_FULL = 13, |
| 53 | + TPC_SPMU_STALL_ON_MAX_COLORS = 14, |
| 54 | + TPC_SPMU_STALL_ON_UARCH_BUBBLE = 15, |
| 55 | + TPC_SPMU_STALL_VPU = 16, |
| 56 | + TPC_SPMU_STALL_SPU_ANY = 17, |
| 57 | + TPC_SPMU_STALL_ON_TSB_FULL = 18, |
| 58 | + TPC_SPMU_STALL_ON_ST_L_EXT = 19, |
| 59 | + TPC_SPMU_STALL_ON_LD_L_EXT = 20, |
| 60 | + TPC_SPMU_STALL = 21, |
| 61 | + |
| 62 | + /* Opcode execution events - configurable via TPC_CFG_OPCODE_EXEC */ |
| 63 | + TPC_SPMU_NUM_OF_OPCODE1_EXECUTED = 22, |
| 64 | + TPC_SPMU_NUM_OF_OPCODE2_EXECUTED = 23, |
| 65 | + TPC_SPMU_NUM_OF_OPCODE3_EXECUTED = 24, |
| 66 | + TPC_SPMU_NUM_OF_OPCODE4_EXECUTED = 25, |
| 67 | + |
| 68 | + /* Execution events */ |
| 69 | + TPC_SPMU_KERNEL_EXECUTED = 26, |
| 70 | + TPC_SPMU_SCALAR_PIPE_EXEC = 27, |
| 71 | + TPC_SPMU_VECTOR_PIPE_EXEC = 28, |
| 72 | + |
| 73 | + /* Cache events */ |
| 74 | + TPC_SPMU_ICACHE_MISS = 29, |
| 75 | + TPC_SPMU_ICACHE_HIT = 30, |
| 76 | + TPC_SPMU_KILLED_INSTRUCTION = 31, |
| 77 | + TPC_SPMU_LUT_MISS = 32, |
| 78 | + TPC_SPMU_DCACHE_MISS = 33, |
| 79 | + TPC_SPMU_DCACHE_HIT = 34, |
| 80 | + |
| 81 | + /* Out of bounds events */ |
| 82 | + TPC_SPMU_OUT_OF_BOUND_DIM0 = 35, |
| 83 | + TPC_SPMU_OUT_OF_BOUND_DIM1 = 36, |
| 84 | + TPC_SPMU_OUT_OF_BOUND_DIM2 = 37, |
| 85 | + TPC_SPMU_OUT_OF_BOUND_DIM3 = 38, |
| 86 | + TPC_SPMU_OUT_OF_BOUND_DIM4 = 39, |
| 87 | + |
| 88 | + /* Arithmetic exception events */ |
| 89 | + TPC_SPMU_DIV_BY_0 = 40, |
| 90 | + TPC_SPMU_SPU_MAC_OVERFLOW = 41, |
| 91 | + TPC_SPMU_SPU_ADDSUB_OVERFLOW = 42, |
| 92 | + TPC_SPMU_SPU_ABS_OVERFLOW = 43, |
| 93 | + TPC_SPMU_SPU_FMA_FP_DST_NAN = 44, |
| 94 | + TPC_SPMU_SPU_FMA_FP_DST_INF = 45, |
| 95 | + TPC_SPMU_SPU_CONVERT_FP_DST_NAN = 46, |
| 96 | + TPC_SPMU_SPU_CONVERT_FP_DST_INF = 47, |
| 97 | + TPC_SPMU_SPU_FP_DST_DENORM = 48, |
| 98 | + TPC_SPMU_VPU_MAC_OVERFLOW = 49, |
| 99 | + TPC_SPMU_VPU_ADDSUB_OVERFLOW = 50, |
| 100 | + TPC_SPMU_VPU_ABS_OVERFLOW = 51, |
| 101 | + TPC_SPMU_VPU_CONVERT_FP_DST_NAN = 52, |
| 102 | + TPC_SPMU_VPU_CONVERT_FP_DST_INF = 53, |
| 103 | + TPC_SPMU_VPU_FMA_FP_DST_NAN = 54, |
| 104 | + TPC_SPMU_VPU_FMA_FP_DST_INF = 55, |
| 105 | + TPC_SPMU_VPU_FP_DST_DENORM = 56, |
| 106 | + |
| 107 | + /* Additional events */ |
| 108 | + TPC_SPMU_STALL_ON_ST_TSNR_FULL = 57, |
| 109 | + TPC_SPMU_LUT_HIT = 58, |
| 110 | + TPC_SPMU_ADDRESS_EXCEED_VLM = 59, |
| 111 | + TPC_SPMU_LD_LOCK_RESEND = 60, |
| 112 | + TPC_SPMU_LD_L_PROT_VIO = 61, |
| 113 | + TPC_SPMU_ST_L_PROT_VIO = 62, |
| 114 | + TPC_SPMU_DCACHE_L0CD_MISMATCH = 63, |
| 115 | + TPC_SPMU_TPC_STALL_ON_LD_L_INT = 64, |
| 116 | + TPC_SPMU_SB_FIRST_RESPONSE = 65, |
| 117 | + TPC_SPMU_SB_LAST_RESPONSE = 66, |
| 118 | + |
| 119 | + /* SB occupancy events */ |
| 120 | + TPC_SPMU_SB_OCCUPANCY0 = 67, |
| 121 | + TPC_SPMU_SB_OCCUPANCY1 = 68, |
| 122 | + TPC_SPMU_SB_OCCUPANCY2 = 69, |
| 123 | + TPC_SPMU_SB_OCCUPANCY3 = 70, |
| 124 | + |
| 125 | + /* SB CAM events */ |
| 126 | + TPC_SPMU_SB_DBG_CAM0_MISS = 71, |
| 127 | + TPC_SPMU_SB_DBG_CAM0_HIT = 72, |
| 128 | + TPC_SPMU_SB_DBG_CAM0_UNCACHEABLE = 73, |
| 129 | + TPC_SPMU_SB_DBG_CAM1_MISS = 74, |
| 130 | + TPC_SPMU_SB_DBG_CAM1_HIT = 75, |
| 131 | + TPC_SPMU_SB_DBG_CAM1_UNCACHEABLE = 76, |
| 132 | + |
| 133 | + /* Additional cache events */ |
| 134 | + TPC_SPMU_NOC_2_SB_BP = 77, |
| 135 | + TPC_SPMU_DCACHE_HW_PREF = 78, |
| 136 | + TPC_SPMU_DCACHE_UC = 79, |
| 137 | + TPC_SPMU_DCACHE_DEALIGN = 80, |
| 138 | + |
| 139 | + TPC_SPMU_EVENT_MAX = 81 |
| 140 | +} gaudi2_tpc_spmu_event_t; |
| 141 | + |
| 142 | +/* EDMA SPMU Events (50 events, IDs 0-49) */ |
| 143 | +typedef enum { |
| 144 | + EDMA_SPMU_QMAN0_PQ_BUF_PEND_CNT_EN = 0, |
| 145 | + EDMA_SPMU_QMAN1_PQ_BUF_PEND_CNT_EN = 1, |
| 146 | + EDMA_SPMU_QMAN2_PQ_BUF_PEND_CNT_EN = 2, |
| 147 | + EDMA_SPMU_QMAN3_PQ_BUF_PEND_CNT_EN = 3, |
| 148 | + EDMA_SPMU_QMAN0_CQ_BUF_PEND_CNT_EN = 4, |
| 149 | + EDMA_SPMU_QMAN1_CQ_BUF_PEND_CNT_EN = 5, |
| 150 | + EDMA_SPMU_QMAN2_CQ_BUF_PEND_CNT_EN = 6, |
| 151 | + EDMA_SPMU_QMAN3_CQ_BUF_PEND_CNT_EN = 7, |
| 152 | + EDMA_SPMU_QMAN_CMDQ_CQ_BUF_PEND_CNT_EN = 8, |
| 153 | + EDMA_SPMU_QMAN_CMDQ_ARC_CQ_BUF_PEND_CNT_EN = 9, |
| 154 | + EDMA_SPMU_AXI_HBW_ERR = 10, |
| 155 | + EDMA_SPMU_AXI_LBW_ERR = 11, |
| 156 | + EDMA_SPMU_TRACE_FENCE_START = 12, |
| 157 | + EDMA_SPMU_TRACE_FENCE_DONE = 13, |
| 158 | + EDMA_SPMU_TRACE_CP_SW_STOP = 14, |
| 159 | + EDMA_SPMU_CP_ERR = 15, |
| 160 | + EDMA_SPMU_ARB_ERR = 16, |
| 161 | + EDMA_SPMU_TRACE_CHOICE_WIN_PUSH = 17, |
| 162 | + EDMA_SPMU_DBG_DMA_TRC_DESC_PUSH = 18, |
| 163 | + EDMA_SPMU_DBG_DMA_TRC_CPL_MSG_SENT = 19, |
| 164 | + EDMA_SPMU_DBG_DMA_TRC_RD_FRST_ADDR_PUSH = 20, |
| 165 | + EDMA_SPMU_DBG_DMA_TRC_RD_LAST_ADDR_PUSH = 21, |
| 166 | + EDMA_SPMU_DBG_DMA_TRC_WR_FRST_ADDR_PUSH = 22, |
| 167 | + EDMA_SPMU_DBG_DMA_TRC_WR_LAST_ADDR_PUSH = 23, |
| 168 | + EDMA_SPMU_DBG_DMA_TRC_RD_DATA_FRST = 24, |
| 169 | + EDMA_SPMU_DBG_DMA_TRC_RD_DATA_LAST = 25, |
| 170 | + EDMA_SPMU_DBG_DMA_TRC_WR_DATA_FRST = 26, |
| 171 | + EDMA_SPMU_DBG_DMA_TRC_WR_DATA_LAST = 27, |
| 172 | + EDMA_SPMU_DBG_DMA_SPMU_MESH2SB_BP = 28, |
| 173 | + EDMA_SPMU_DBG_DMA_SPMU_SB2MESH_BP = 29, |
| 174 | + EDMA_SPMU_DBG_DMA_SPMU_MESH2WB_BP = 30, |
| 175 | + EDMA_SPMU_DBG_DMA_SPMU_RD_CTX_END2START = 31, |
| 176 | + EDMA_SPMU_DBG_DMA_SPMU_WR_CTX_END2START = 32, |
| 177 | + EDMA_SPMU_DBG_DMA_SPMU_SB2AGU_BP = 33, |
| 178 | + EDMA_SPMU_DBG_DMA_SPMU_SB_FULL_BP = 34, |
| 179 | + EDMA_SPMU_DBG_DMA_SPMU_WB2AGU_BP = 35, |
| 180 | + EDMA_SPMU_DBG_DMA_SPMU_WB2GSKT_BP = 36, |
| 181 | + EDMA_SPMU_DBG_DMA_SPMU_SB_MON_CNT_0 = 37, |
| 182 | + EDMA_SPMU_DBG_DMA_SPMU_SB_MON_CNT_1 = 38, |
| 183 | + EDMA_SPMU_DBG_DMA_SPMU_SB_MON_CNT_2 = 39, |
| 184 | + EDMA_SPMU_DBG_DMA_SPMU_SB_MON_CNT_3 = 40, |
| 185 | + EDMA_SPMU_SB_2_INITIATOR_BP_SB_FULL = 41, |
| 186 | + EDMA_SPMU_SB_2_INITIATOR_BP = 42, |
| 187 | + EDMA_SPMU_SB_DBG_CAM0_MISS = 43, |
| 188 | + EDMA_SPMU_SB_DBG_CAM0_HIT = 44, |
| 189 | + EDMA_SPMU_SB_DBG_CAM0_UNCACHEABLE = 45, |
| 190 | + EDMA_SPMU_SB_DBG_CAM1_MISS = 46, |
| 191 | + EDMA_SPMU_SB_DBG_CAM1_HIT = 47, |
| 192 | + EDMA_SPMU_SB_DBG_CAM1_UNCACHEABLE = 48, |
| 193 | + EDMA_SPMU_SB_AXI_NOC_2_SB_BP = 49, |
| 194 | + |
| 195 | + EDMA_SPMU_EVENT_MAX = 50 |
| 196 | +} gaudi2_edma_spmu_event_t; |
| 197 | + |
| 198 | +/* MME CTRL SPMU Events (8 events, IDs 0-7) */ |
| 199 | +typedef enum { |
| 200 | + MME_CTRL_SPMU_CONV_END_STALL_DIAG = 0, |
| 201 | + MME_CTRL_SPMU_CONV_END_STALL_ACC = 1, |
| 202 | + MME_CTRL_SPMU_CONV_END_STALL_DIAG_STALL_ACC = 2, |
| 203 | + MME_CTRL_SPMU_OUTER_PRODUCT_STALL_ON_B = 3, |
| 204 | + MME_CTRL_SPMU_OUTER_PRODUCT_STALL_ON_A = 4, |
| 205 | + MME_CTRL_SPMU_NUM_OUTER_PRODUCTS = 5, |
| 206 | + MME_CTRL_SPMU_QM_PREFETCH_BUFFER_EMPTY = 6, |
| 207 | + MME_CTRL_SPMU_PQ_NOT_EMPTY_BUT_CQ_EMPTY = 7, |
| 208 | + |
| 209 | + MME_CTRL_SPMU_EVENT_MAX = 8 |
| 210 | +} gaudi2_mme_ctrl_spmu_event_t; |
| 211 | + |
| 212 | +/* SPMU Base Addresses */ |
| 213 | + |
| 214 | +/* TPC SPMU base addresses - pattern: DCORE + TPC offset + SPMU offset */ |
| 215 | +#define GAUDI2_DCORE0_TPC0_SPMU_BASE 0x1000007FF8001000ULL |
| 216 | +#define GAUDI2_DCORE0_TPC1_SPMU_BASE 0x1000007FF8201000ULL |
| 217 | +#define GAUDI2_DCORE0_TPC2_SPMU_BASE 0x1000007FF8401000ULL |
| 218 | +#define GAUDI2_DCORE0_TPC3_SPMU_BASE 0x1000007FF8601000ULL |
| 219 | +#define GAUDI2_DCORE0_TPC4_SPMU_BASE 0x1000007FF8801000ULL |
| 220 | +#define GAUDI2_DCORE0_TPC5_SPMU_BASE 0x1000007FF8A01000ULL |
| 221 | + |
| 222 | +#define GAUDI2_DCORE1_TPC0_SPMU_BASE 0x1000007FF9001000ULL |
| 223 | +#define GAUDI2_DCORE1_TPC1_SPMU_BASE 0x1000007FF9201000ULL |
| 224 | +#define GAUDI2_DCORE1_TPC2_SPMU_BASE 0x1000007FF9401000ULL |
| 225 | +#define GAUDI2_DCORE1_TPC3_SPMU_BASE 0x1000007FF9601000ULL |
| 226 | +#define GAUDI2_DCORE1_TPC4_SPMU_BASE 0x1000007FF9801000ULL |
| 227 | +#define GAUDI2_DCORE1_TPC5_SPMU_BASE 0x1000007FF9A01000ULL |
| 228 | + |
| 229 | +#define GAUDI2_DCORE2_TPC0_SPMU_BASE 0x1000007FFA001000ULL |
| 230 | +#define GAUDI2_DCORE2_TPC1_SPMU_BASE 0x1000007FFA201000ULL |
| 231 | +#define GAUDI2_DCORE2_TPC2_SPMU_BASE 0x1000007FFA401000ULL |
| 232 | +#define GAUDI2_DCORE2_TPC3_SPMU_BASE 0x1000007FFA601000ULL |
| 233 | +#define GAUDI2_DCORE2_TPC4_SPMU_BASE 0x1000007FFA801000ULL |
| 234 | +#define GAUDI2_DCORE2_TPC5_SPMU_BASE 0x1000007FFAA01000ULL |
| 235 | + |
| 236 | +#define GAUDI2_DCORE3_TPC0_SPMU_BASE 0x1000007FFB001000ULL |
| 237 | +#define GAUDI2_DCORE3_TPC1_SPMU_BASE 0x1000007FFB201000ULL |
| 238 | +#define GAUDI2_DCORE3_TPC2_SPMU_BASE 0x1000007FFB401000ULL |
| 239 | +#define GAUDI2_DCORE3_TPC3_SPMU_BASE 0x1000007FFB601000ULL |
| 240 | +#define GAUDI2_DCORE3_TPC4_SPMU_BASE 0x1000007FFB801000ULL |
| 241 | +#define GAUDI2_DCORE3_TPC5_SPMU_BASE 0x1000007FFBA01000ULL |
| 242 | + |
| 243 | +/* EDMA SPMU base addresses */ |
| 244 | +#define GAUDI2_DCORE0_EDMA0_SPMU_BASE 0x1000007FF0001000ULL |
| 245 | +#define GAUDI2_DCORE0_EDMA1_SPMU_BASE 0x1000007FF0201000ULL |
| 246 | +#define GAUDI2_DCORE1_EDMA0_SPMU_BASE 0x1000007FF1001000ULL |
| 247 | +#define GAUDI2_DCORE1_EDMA1_SPMU_BASE 0x1000007FF1201000ULL |
| 248 | +#define GAUDI2_DCORE2_EDMA0_SPMU_BASE 0x1000007FF2001000ULL |
| 249 | +#define GAUDI2_DCORE2_EDMA1_SPMU_BASE 0x1000007FF2201000ULL |
| 250 | +#define GAUDI2_DCORE3_EDMA0_SPMU_BASE 0x1000007FF3001000ULL |
| 251 | +#define GAUDI2_DCORE3_EDMA1_SPMU_BASE 0x1000007FF3201000ULL |
| 252 | + |
| 253 | +/* PDMA SPMU base addresses */ |
| 254 | +#define GAUDI2_PDMA0_SPMU_BASE 0x1000007FFC4A1000ULL |
| 255 | +#define GAUDI2_PDMA1_SPMU_BASE 0x1000007FFC4E1000ULL |
| 256 | + |
| 257 | +/* Debug Operation Codes (from habanalabs_accel.h) */ |
| 258 | +#define HL_DEBUG_OP_ETR 0 |
| 259 | +#define HL_DEBUG_OP_ETF 1 |
| 260 | +#define HL_DEBUG_OP_STM 2 |
| 261 | +#define HL_DEBUG_OP_FUNNEL 3 |
| 262 | +#define HL_DEBUG_OP_BMON 4 |
| 263 | +#define HL_DEBUG_OP_SPMU 5 |
| 264 | +#define HL_DEBUG_OP_TIMESTAMP 6 |
| 265 | +#define HL_DEBUG_OP_SET_MODE 7 |
| 266 | +#define HL_DEBUG_OP_FETCH_TRACE 8 |
| 267 | +#define HL_DEBUG_OP_DIO 9 |
| 268 | +#define HL_DEBUG_OP_READMEM 1024 |
| 269 | +#define HL_DEBUG_OP_MEMCPY 1025 |
| 270 | +#define HL_DEBUG_OP_SCHED_SUBMIT_BUF 1031 |
| 271 | +#define HL_DEBUG_OP_READBLOCK 1032 |
| 272 | + |
| 273 | +#define HL_DEBUG_MAX_AUX_VALUES 10 |
| 274 | + |
| 275 | +/* Event Info Structure */ |
| 276 | +typedef struct { |
| 277 | + const char *name; /* Event name (e.g., "TPC_KERNEL_EXECUTED") */ |
| 278 | + const char *description; |
| 279 | + gaudi2_engine_type_t engine; /* Engine type */ |
| 280 | + unsigned int event_id; /* Hardware event ID within engine */ |
| 281 | +} gaudi2_native_event_t; |
| 282 | + |
| 283 | +#endif /* _GAUDI2_EVENTS_H */ |
0 commit comments