Skip to content

Commit 7349f2f

Browse files
committed
add L3 shared cache info
Change-Id: I302d35bde3d4f725327d1dc4827957e369ca5f1b
1 parent d349e66 commit 7349f2f

File tree

2 files changed

+73
-22
lines changed

2 files changed

+73
-22
lines changed

src/cpucounters.cpp

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,6 +1101,7 @@ bool PCM::discoverSystemTopology()
11011101
uint32 smtMaskWidth = 0;
11021102
uint32 coreMaskWidth = 0;
11031103
uint32 l2CacheMaskShift = 0;
1104+
uint32 l3CacheMaskShift = 0;
11041105

11051106
struct domain
11061107
{
@@ -1111,7 +1112,7 @@ bool PCM::discoverSystemTopology()
11111112
{
11121113
TemporalThreadAffinity aff0(0);
11131114

1114-
if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift) == false)
1115+
if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift, l3CacheMaskShift) == false)
11151116
{
11161117
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
11171118
return false;
@@ -1151,20 +1152,18 @@ bool PCM::discoverSystemTopology()
11511152
for (size_t l = 0; l < topologyDomains.size(); ++l)
11521153
{
11531154
topologyDomainMap[topologyDomains[l].type] = topologyDomains[l];
1154-
#if 0
1155-
std::cerr << "Topology level: " << l <<
1156-
" type: " << topologyDomains[l].type <<
1157-
" (" << TopologyEntry::getDomainTypeStr(topologyDomains[l].type) << ")" <<
1158-
" width: " << topologyDomains[l].width <<
1159-
" levelShift: " << topologyDomains[l].levelShift <<
1160-
" nextLevelShift: " << topologyDomains[l].nextLevelShift << "\n";
1161-
#endif
1155+
DBG(1 , "Topology level: " , l ,
1156+
" type: " , topologyDomains[l].type ,
1157+
" (" , TopologyEntry::getDomainTypeStr(topologyDomains[l].type) , ")" ,
1158+
" width: " , topologyDomains[l].width ,
1159+
" levelShift: " , topologyDomains[l].levelShift ,
1160+
" nextLevelShift: " , topologyDomains[l].nextLevelShift);
11621161
}
11631162
}
11641163
}
11651164

11661165
#ifndef __APPLE__
1167-
auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry& entry)
1166+
auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift, &l3CacheMaskShift](TopologyEntry& entry)
11681167
{
11691168
auto getAPICID = [&](const uint32 leaf)
11701169
{
@@ -1218,6 +1217,7 @@ bool PCM::discoverSystemTopology()
12181217
{
12191218
fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, getAPICID(0xb));
12201219
}
1220+
entry.l3_cache_id = extract_bits_32(getAPICID(0xb), l3CacheMaskShift, 31);
12211221
};
12221222
#endif
12231223

@@ -3231,7 +3231,7 @@ void PCM::printDetailedSystemTopology(const int detailLevel)
32313231
std::cerr << "Tile_Id ";
32323232
if (detailLevel > 0) std::cerr << "Die_Id Die_Group_Id ";
32333233
std::cerr << "Package_Id Core_Type Native_CPU_Model\n";
3234-
std::map<uint32, std::vector<uint32> > os_id_by_core, os_id_by_tile, core_id_by_socket;
3234+
std::map<uint32, std::vector<uint32> > os_id_by_core, os_id_by_tile, core_id_by_socket, os_id_by_l3_cache;
32353235
size_t counter = 0;
32363236
for (auto it = topology.begin(); it != topology.end(); ++it)
32373237
{
@@ -3252,6 +3252,7 @@ void PCM::printDetailedSystemTopology(const int detailLevel)
32523252
// add socket offset to distinguish cores and tiles from different sockets
32533253
os_id_by_core[(it->socket_id << 15) + it->core_id].push_back(it->os_id);
32543254
os_id_by_tile[(it->socket_id << 15) + it->tile_id].push_back(it->os_id);
3255+
os_id_by_l3_cache[(it->socket_id << 15) + it->l3_cache_id].push_back(it->os_id);
32553256

32563257
++counter;
32573258
}
@@ -3288,6 +3289,16 @@ void PCM::printDetailedSystemTopology(const int detailLevel)
32883289
}
32893290
std::cerr << ")";
32903291
}
3292+
std::cerr << "\nL3$ ";
3293+
for (auto core = os_id_by_l3_cache.begin(); core != os_id_by_l3_cache.end(); ++core)
3294+
{
3295+
auto os_id = core->second.begin();
3296+
std::cerr << "(" << *os_id;
3297+
for (++os_id; os_id != core->second.end(); ++os_id) {
3298+
std::cerr << "," << *os_id;
3299+
}
3300+
std::cerr << ")";
3301+
}
32913302
std::cerr << "\n";
32923303
std::cerr << "\n";
32933304
}

src/topologyentry.h

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#pragma once
55

66
#include "types.h"
7+
#include "debug.h"
78

89
namespace pcm
910
{
@@ -25,6 +26,7 @@ struct PCM_API TopologyEntry // describes a core
2526
int32 die_grp_id;
2627
int32 socket_id;
2728
int32 socket_unique_core_id;
29+
int32 l3_cache_id = -1;
2830
int32 native_cpu_model = -1;
2931
enum DomainTypeID
3032
{
@@ -103,7 +105,7 @@ inline void fillEntry(TopologyEntry & entry, const uint32 & smtMaskWidth, const
103105
entry.socket_unique_core_id = entry.core_id;
104106
}
105107

106-
inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift)
108+
inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift, uint32 & l3CacheMaskShift)
107109
{
108110
// init constants for CPU topology leaf 0xB
109111
// adapted from Topology Enumeration Reference code for Intel 64 Architecture
@@ -154,24 +156,62 @@ inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32
154156

155157
(void) coreMaskWidth; // to suppress warnings on MacOS (unused vars)
156158

157-
#ifdef PCM_DEBUG_TOPOLOGY
158-
uint32 threadsSharingL2;
159-
#endif
160-
uint32 l2CacheMaskWidth;
159+
uint32 threadsSharingL2 = 0;
160+
uint32 l2CacheMaskWidth = 0;
161161

162162
pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
163163
l2CacheMaskWidth = 1 + extract_bits_32(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
164-
#ifdef PCM_DEBUG_TOPOLOGY
165164
threadsSharingL2 = l2CacheMaskWidth;
166-
#endif
167165
for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
168166
{
169167
l2CacheMaskShift++;
170168
}
171-
#ifdef PCM_DEBUG_TOPOLOGY
172-
std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
173-
<< " [the most significant bit = " << l2CacheMaskShift << "]\n";
174-
#endif
169+
DBG(1, "Number of threads sharing L2 cache = " , threadsSharingL2, " [the most significant bit = " , l2CacheMaskShift , "]");
170+
171+
uint32 threadsSharingL3 = 0;
172+
uint32 l3CacheMaskWidth = 0;
173+
174+
pcm_cpuid(0x4, 3, cpuid_args); // get ID for L3 cache
175+
l3CacheMaskWidth = 1 + extract_bits_32(cpuid_args.array[0], 14, 25); // number of APIC IDs sharing L3 cache
176+
threadsSharingL3 = l3CacheMaskWidth;
177+
for( ; l3CacheMaskWidth > 1; l3CacheMaskWidth >>= 1)
178+
{
179+
l3CacheMaskShift++;
180+
}
181+
DBG(1, "Number of threads sharing L3 cache = " , threadsSharingL3, " [the most significant bit = " , l3CacheMaskShift , "]");
182+
183+
uint32 it = 0;
184+
185+
for (int i = 0; i < 100; ++i)
186+
{
187+
uint32 threadsSharingCache = 0;
188+
uint32 CacheMaskWidth = 0;
189+
uint32 CacheMaskShift = 0;
190+
pcm_cpuid(0x4, it, cpuid_args);
191+
const auto cacheType = extract_bits_32(cpuid_args.array[0], 0, 4);
192+
if (cacheType == 0)
193+
{
194+
break; // no more caches
195+
}
196+
const char * cacheTypeStr = nullptr;
197+
switch (cacheType)
198+
{
199+
case 1: cacheTypeStr = "data"; break;
200+
case 2: cacheTypeStr = "instruction"; break;
201+
case 3: cacheTypeStr = "unified"; break;
202+
default: cacheTypeStr = "unknown"; break;
203+
}
204+
const auto level = extract_bits_32(cpuid_args.array[0], 5, 7);
205+
CacheMaskWidth = 1 + extract_bits_32(cpuid_args.array[0], 14, 25); // number of APIC IDs sharing cache
206+
threadsSharingCache = CacheMaskWidth;
207+
for( ; CacheMaskWidth > 1; CacheMaskWidth >>= 1)
208+
{
209+
CacheMaskShift++;
210+
}
211+
DBG(1, "Max number of threads sharing L" , level , " " , cacheTypeStr , " cache = " , threadsSharingCache, " [the most significant bit = " , CacheMaskShift , "]",
212+
" shift = " , CacheMaskShift);
213+
++it;
214+
}
175215
}
176216
return true;
177217
}

0 commit comments

Comments
 (0)