add L3 shared cache info

rdementi · rdementi · commit 7349f2f18eba · 2025-04-28T15:23:15.000+02:00
Change-Id: I302d35bde3d4f725327d1dc4827957e369ca5f1b
diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp
@@ -1101,6 +1101,7 @@ bool PCM::discoverSystemTopology()
     uint32 smtMaskWidth = 0;
     uint32 coreMaskWidth = 0;
     uint32 l2CacheMaskShift = 0;
+    uint32 l3CacheMaskShift = 0;
 
     struct domain
     {
@@ -1111,7 +1112,7 @@ bool PCM::discoverSystemTopology()
     {
         TemporalThreadAffinity aff0(0);
 
-        if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift) == false)
+        if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift, l3CacheMaskShift) == false)
         {
             std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
             return false;
@@ -1151,20 +1152,18 @@ bool PCM::discoverSystemTopology()
             for (size_t l = 0; l < topologyDomains.size(); ++l)
             {
                 topologyDomainMap[topologyDomains[l].type] = topologyDomains[l];
-#if 0
-                std::cerr << "Topology level: " << l <<
-                                      " type: " << topologyDomains[l].type <<
-                                      " (" << TopologyEntry::getDomainTypeStr(topologyDomains[l].type) << ")" <<
-                                      " width: " << topologyDomains[l].width <<
-                                      " levelShift: " << topologyDomains[l].levelShift <<
-                                      " nextLevelShift: " << topologyDomains[l].nextLevelShift << "\n";
-#endif
+                DBG(1 , "Topology level: " , l ,
+                                      " type: " , topologyDomains[l].type ,
+                                      " (" , TopologyEntry::getDomainTypeStr(topologyDomains[l].type) , ")" ,
+                                      " width: " , topologyDomains[l].width ,
+                                      " levelShift: " , topologyDomains[l].levelShift ,
+                                      " nextLevelShift: " , topologyDomains[l].nextLevelShift);
             }
         }
     }
 
 #ifndef __APPLE__
-    auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry& entry)
+    auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift, &l3CacheMaskShift](TopologyEntry& entry)
     {
         auto getAPICID = [&](const uint32 leaf)
         {
@@ -1218,6 +1217,7 @@ bool PCM::discoverSystemTopology()
         {
             fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, getAPICID(0xb));
         }
+        entry.l3_cache_id = extract_bits_32(getAPICID(0xb), l3CacheMaskShift, 31);
     };
 #endif
 
@@ -3231,7 +3231,7 @@ void PCM::printDetailedSystemTopology(const int detailLevel)
         std::cerr << "Tile_Id         ";
         if (detailLevel > 0) std::cerr << "Die_Id          Die_Group_Id    ";
         std::cerr << "Package_Id      Core_Type       Native_CPU_Model\n";
-        std::map<uint32, std::vector<uint32> > os_id_by_core, os_id_by_tile, core_id_by_socket;
+        std::map<uint32, std::vector<uint32> > os_id_by_core, os_id_by_tile, core_id_by_socket, os_id_by_l3_cache;
         size_t counter = 0;
         for (auto it = topology.begin(); it != topology.end(); ++it)
         {
@@ -3252,6 +3252,7 @@ void PCM::printDetailedSystemTopology(const int detailLevel)
             // add socket offset to distinguish cores and tiles from different sockets
             os_id_by_core[(it->socket_id << 15) + it->core_id].push_back(it->os_id);
             os_id_by_tile[(it->socket_id << 15) + it->tile_id].push_back(it->os_id);
+            os_id_by_l3_cache[(it->socket_id << 15) + it->l3_cache_id].push_back(it->os_id);
 
             ++counter;
         }
@@ -3288,6 +3289,16 @@ void PCM::printDetailedSystemTopology(const int detailLevel)
             }
             std::cerr << ")";
         }
+        std::cerr << "\nL3$ ";
+        for (auto core = os_id_by_l3_cache.begin(); core != os_id_by_l3_cache.end(); ++core)
+        {
+            auto os_id = core->second.begin();
+            std::cerr << "(" << *os_id;
+            for (++os_id; os_id != core->second.end(); ++os_id) {
+                std::cerr << "," << *os_id;
+            }
+            std::cerr << ")";
+        }
         std::cerr << "\n";
         std::cerr << "\n";
     }
diff --git a/src/topologyentry.h b/src/topologyentry.h
@@ -4,6 +4,7 @@
 #pragma once
 
 #include "types.h"
+#include "debug.h"
 
 namespace pcm
 {
@@ -25,6 +26,7 @@ struct PCM_API TopologyEntry // describes a core
     int32 die_grp_id;
     int32 socket_id;
     int32 socket_unique_core_id;
+    int32 l3_cache_id = -1;
     int32 native_cpu_model = -1;
     enum DomainTypeID
     {
@@ -103,7 +105,7 @@ inline void fillEntry(TopologyEntry & entry, const uint32 & smtMaskWidth, const
     entry.socket_unique_core_id = entry.core_id;
 }
 
-inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift)
+inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift, uint32 & l3CacheMaskShift)
 {
     // init constants for CPU topology leaf 0xB
     // adapted from Topology Enumeration Reference code for Intel 64 Architecture
@@ -154,24 +156,62 @@ inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32
 
         (void) coreMaskWidth; // to suppress warnings on MacOS (unused vars)
 
-    #ifdef PCM_DEBUG_TOPOLOGY
-        uint32 threadsSharingL2;
-    #endif
-        uint32 l2CacheMaskWidth;
+        uint32 threadsSharingL2 = 0;
+        uint32 l2CacheMaskWidth = 0;
 
         pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
         l2CacheMaskWidth = 1 + extract_bits_32(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
-    #ifdef PCM_DEBUG_TOPOLOGY
         threadsSharingL2 = l2CacheMaskWidth;
-    #endif
         for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
         {
             l2CacheMaskShift++;
         }
-    #ifdef PCM_DEBUG_TOPOLOGY
-        std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
-                << " [the most significant bit = " << l2CacheMaskShift << "]\n";
-    #endif
+        DBG(1, "Number of threads sharing L2 cache = " , threadsSharingL2, " [the most significant bit = " , l2CacheMaskShift , "]");
+
+        uint32 threadsSharingL3 = 0;
+        uint32 l3CacheMaskWidth = 0;
+
+        pcm_cpuid(0x4, 3, cpuid_args); // get ID for L3 cache
+        l3CacheMaskWidth = 1 + extract_bits_32(cpuid_args.array[0], 14, 25); // number of APIC IDs sharing L3 cache
+        threadsSharingL3 = l3CacheMaskWidth;
+        for( ; l3CacheMaskWidth > 1; l3CacheMaskWidth >>= 1)
+        {
+            l3CacheMaskShift++;
+        }
+        DBG(1, "Number of threads sharing L3 cache = " , threadsSharingL3, " [the most significant bit = " , l3CacheMaskShift , "]");
+
+        uint32 it = 0;
+
+        for (int i = 0; i < 100; ++i)
+        {
+            uint32 threadsSharingCache = 0;
+            uint32 CacheMaskWidth = 0;
+            uint32 CacheMaskShift = 0;
+            pcm_cpuid(0x4, it, cpuid_args);
+            const auto cacheType = extract_bits_32(cpuid_args.array[0], 0, 4);
+            if (cacheType == 0)
+            {
+                break; // no more caches
+            }
+            const char * cacheTypeStr = nullptr;
+            switch (cacheType)
+            {
+                case 1: cacheTypeStr = "data"; break;
+                case 2: cacheTypeStr = "instruction"; break;
+                case 3: cacheTypeStr = "unified"; break;
+                default: cacheTypeStr = "unknown"; break;
+            }
+            const auto level = extract_bits_32(cpuid_args.array[0], 5, 7);
+            CacheMaskWidth = 1 + extract_bits_32(cpuid_args.array[0], 14, 25); // number of APIC IDs sharing cache
+            threadsSharingCache = CacheMaskWidth;
+            for( ; CacheMaskWidth > 1; CacheMaskWidth >>= 1)
+            {
+                CacheMaskShift++;
+            }
+            DBG(1, "Max number of threads sharing L" , level , " " , cacheTypeStr , " cache = " , threadsSharingCache, " [the most significant bit = " , CacheMaskShift , "]",
+                " shift = " , CacheMaskShift);
+            ++it;
+        }
     }
     return true;
 }

Original file line number	Diff line number	Diff line change
`@@ -1101,6 +1101,7 @@ bool PCM::discoverSystemTopology()`
`1101`	`1101`	`uint32 smtMaskWidth = 0;`
`1102`	`1102`	`uint32 coreMaskWidth = 0;`
`1103`	`1103`	`uint32 l2CacheMaskShift = 0;`
	`1104`	`+ uint32 l3CacheMaskShift = 0;`
`1104`	`1105`
`1105`	`1106`	`struct domain`
`1106`	`1107`	`{`
`@@ -1111,7 +1112,7 @@ bool PCM::discoverSystemTopology()`
`1111`	`1112`	`{`
`1112`	`1113`	`TemporalThreadAffinity aff0(0);`
`1113`	`1114`
`1114`		`- if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift) == false)`
	`1115`	`+ if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift, l3CacheMaskShift) == false)`
`1115`	`1116`	`{`
`1116`	`1117`	`std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";`
`1117`	`1118`	`return false;`
`@@ -1151,20 +1152,18 @@ bool PCM::discoverSystemTopology()`
`1151`	`1152`	`for (size_t l = 0; l < topologyDomains.size(); ++l)`
`1152`	`1153`	`{`
`1153`	`1154`	`topologyDomainMap[topologyDomains[l].type] = topologyDomains[l];`
`1154`		`-#if 0`
`1155`		`- std::cerr << "Topology level: " << l <<`
`1156`		`- " type: " << topologyDomains[l].type <<`
`1157`		`- " (" << TopologyEntry::getDomainTypeStr(topologyDomains[l].type) << ")" <<`
`1158`		`- " width: " << topologyDomains[l].width <<`
`1159`		`- " levelShift: " << topologyDomains[l].levelShift <<`
`1160`		`- " nextLevelShift: " << topologyDomains[l].nextLevelShift << "\n";`
`1161`		`-#endif`
	`1155`	`+ DBG(1 , "Topology level: " , l ,`
	`1156`	`+ " type: " , topologyDomains[l].type ,`
	`1157`	`+ " (" , TopologyEntry::getDomainTypeStr(topologyDomains[l].type) , ")" ,`
	`1158`	`+ " width: " , topologyDomains[l].width ,`
	`1159`	`+ " levelShift: " , topologyDomains[l].levelShift ,`
	`1160`	`+ " nextLevelShift: " , topologyDomains[l].nextLevelShift);`
`1162`	`1161`	`}`
`1163`	`1162`	`}`
`1164`	`1163`	`}`
`1165`	`1164`
`1166`	`1165`	`#ifndef __APPLE__`
`1167`		`- auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry& entry)`
	`1166`	`+ auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift, &l3CacheMaskShift](TopologyEntry& entry)`
`1168`	`1167`	`{`
`1169`	`1168`	`auto getAPICID = [&](const uint32 leaf)`
`1170`	`1169`	`{`
`@@ -1218,6 +1217,7 @@ bool PCM::discoverSystemTopology()`
`1218`	`1217`	`{`
`1219`	`1218`	`fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, getAPICID(0xb));`
`1220`	`1219`	`}`
	`1220`	`+ entry.l3_cache_id = extract_bits_32(getAPICID(0xb), l3CacheMaskShift, 31);`
`1221`	`1221`	`};`
`1222`	`1222`	`#endif`
`1223`	`1223`
`@@ -3231,7 +3231,7 @@ void PCM::printDetailedSystemTopology(const int detailLevel)`
`3231`	`3231`	`std::cerr << "Tile_Id ";`
`3232`	`3232`	`if (detailLevel > 0) std::cerr << "Die_Id Die_Group_Id ";`
`3233`	`3233`	`std::cerr << "Package_Id Core_Type Native_CPU_Model\n";`
`3234`		`- std::map<uint32, std::vector<uint32> > os_id_by_core, os_id_by_tile, core_id_by_socket;`
	`3234`	`+ std::map<uint32, std::vector<uint32> > os_id_by_core, os_id_by_tile, core_id_by_socket, os_id_by_l3_cache;`
`3235`	`3235`	`size_t counter = 0;`
`3236`	`3236`	`for (auto it = topology.begin(); it != topology.end(); ++it)`
`3237`	`3237`	`{`
`@@ -3252,6 +3252,7 @@ void PCM::printDetailedSystemTopology(const int detailLevel)`
`3252`	`3252`	`// add socket offset to distinguish cores and tiles from different sockets`
`3253`	`3253`	`os_id_by_core[(it->socket_id << 15) + it->core_id].push_back(it->os_id);`
`3254`	`3254`	`os_id_by_tile[(it->socket_id << 15) + it->tile_id].push_back(it->os_id);`
	`3255`	`+ os_id_by_l3_cache[(it->socket_id << 15) + it->l3_cache_id].push_back(it->os_id);`
`3255`	`3256`
`3256`	`3257`	`++counter;`
`3257`	`3258`	`}`
`@@ -3288,6 +3289,16 @@ void PCM::printDetailedSystemTopology(const int detailLevel)`
`3288`	`3289`	`}`
`3289`	`3290`	`std::cerr << ")";`
`3290`	`3291`	`}`
	`3292`	`+ std::cerr << "\nL3$ ";`
	`3293`	`+ for (auto core = os_id_by_l3_cache.begin(); core != os_id_by_l3_cache.end(); ++core)`
	`3294`	`+ {`
	`3295`	`+ auto os_id = core->second.begin();`
	`3296`	`+ std::cerr << "(" << *os_id;`
	`3297`	`+ for (++os_id; os_id != core->second.end(); ++os_id) {`
	`3298`	`+ std::cerr << "," << *os_id;`
	`3299`	`+ }`
	`3300`	`+ std::cerr << ")";`
	`3301`	`+ }`
`3291`	`3302`	`std::cerr << "\n";`
`3292`	`3303`	`std::cerr << "\n";`
`3293`	`3304`	`}`