@@ -60,7 +60,7 @@ struct GenericPluginTy;
6060struct GenericKernelTy ;
6161struct GenericDeviceTy ;
6262struct RecordReplayTy ;
63- struct KernelRunRecord ;
63+ struct KernelRunRecordTy ;
6464
6565// / Class that wraps the __tgt_async_info to simply its usage. In case the
6666// / object is constructed without a valid __tgt_async_info, the object will use
@@ -1108,7 +1108,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
11081108
11091109 bool getMultiDeviceKernelValue (void *EntryPtr);
11101110
1111- KernelRunRecord *getKernelRunRecords () const { return KernelRunRecords; }
1111+ KernelRunRecordTy *getKernelRunRecords () const { return KernelRunRecords; }
11121112
11131113 // / Return true if a descriptor of size 'Size' should be allocated using
11141114 // / shared memory. Default implementation returns 'false',
@@ -1262,7 +1262,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
12621262 RPCServerTy *RPCServer;
12631263
12641264 // / Structs for functions and data used in runtime autotuning.
1265- KernelRunRecord *KernelRunRecords;
1265+ KernelRunRecordTy *KernelRunRecords;
12661266
12671267private:
12681268#ifdef OMPT_SUPPORT
@@ -1291,35 +1291,39 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
12911291};
12921292
12931293// / Struct represents the metadata for each kernel run on the device.
1294- struct KernelRunRecord {
1294+ struct KernelRunRecordTy {
12951295
1296- struct KernelRunEntry {
1296+ struct KernelRunEntryTy {
12971297 std::string KernelName;
1298- uint32_t NumTeams;
1299- uint32_t NumThreads;
1300- uint64_t RunDuration;
1298+ uint32_t NumTeams = 0 ;
1299+ uint32_t NumThreads = 0 ;
1300+ uint64_t RunDuration = 0 ;
13011301 };
13021302
13031303 // Metadata used in tuning process.
1304- struct TuningMetadata {
1304+ struct TuningMetadataTy {
13051305 uint32_t IdxThread = 0 ;
13061306 uint32_t IdxCUMultiplier = 0 ;
13071307 // Run counters.
13081308 uint32_t RunCounters = 0 ;
13091309 // Entry with minimum running time.
1310- KernelRunEntry MinEntries ;
1310+ KernelRunEntryTy MinEntry ;
13111311 };
13121312
13131313 // Add a new entry
13141314 void addEntry (std::string KernelName, uint32_t NumTeams, uint32_t NumThreads,
13151315 uint64_t RunDuration) {
1316- KernelRunEntry NewRunEnry = {KernelName, NumTeams, NumThreads, RunDuration};
13171316 TuningData[KernelName].RunCounters ++;
13181317
13191318 // Update min entries.
1320- auto MinDuration = TuningData[KernelName].MinEntries .RunDuration ;
1319+ uint64_t MinDuration = 0 ;
1320+ auto It = TuningData.find (KernelName);
1321+ if (It != TuningData.end ()) {
1322+ MinDuration = It->second .MinEntry .RunDuration ;
1323+ }
13211324 if (MinDuration > RunDuration || MinDuration == 0 ) {
1322- TuningData[KernelName].MinEntries = NewRunEnry;
1325+ TuningData[KernelName].MinEntry = {KernelName, NumTeams, NumThreads,
1326+ RunDuration};
13231327 }
13241328 }
13251329
@@ -1330,7 +1334,7 @@ struct KernelRunRecord {
13301334 // If the kernel reaches the run limit,
13311335 // return the current optimal launch parameters.
13321336 if (reachedRunLimitForKernel (KernelName)) {
1333- auto MinEntry = TuningData[KernelName].MinEntries ;
1337+ auto MinEntry = TuningData[KernelName].MinEntry ;
13341338 return {MinEntry.NumTeams , MinEntry.NumThreads };
13351339 }
13361340
@@ -1341,8 +1345,8 @@ struct KernelRunRecord {
13411345 if (IdxCUMulti >= CUMultiplierCandidate.size ()) {
13421346 // No more element to search.
13431347 // Return current optimal launch parameters.
1344- return {TuningData[KernelName].MinEntries .NumTeams ,
1345- TuningData[KernelName].MinEntries .NumThreads };
1348+ return {TuningData[KernelName].MinEntry .NumTeams ,
1349+ TuningData[KernelName].MinEntry .NumThreads };
13461350 }
13471351
13481352 // New team/thread pair for launch parameters.
@@ -1363,7 +1367,7 @@ struct KernelRunRecord {
13631367 }
13641368
13651369 bool reachedRunLimitForKernel (std::string KernelName) {
1366- if (TuningData.count (KernelName) == 0 ) {
1370+ if (TuningData.find (KernelName) == TuningData. end () ) {
13671371 // If no record for this kernel.
13681372 return false ;
13691373 }
@@ -1372,7 +1376,7 @@ struct KernelRunRecord {
13721376 }
13731377
13741378 uint32_t getRunCounterForKernel (std::string KernelName) {
1375- if (TuningData.count (KernelName) == 0 ) {
1379+ if (TuningData.find (KernelName) == TuningData. end () ) {
13761380 return 0 ;
13771381 }
13781382
@@ -1386,7 +1390,7 @@ struct KernelRunRecord {
13861390 // The max number of tuning runs for each kernel.
13871391 uint32_t RunLimiter = ThreadCandidate.size() * CUMultiplierCandidate.size();
13881392 // Used for keeping track of the metatdata used in tuning for each kernel.
1389- std::unordered_map<std::string, TuningMetadata > TuningData;
1393+ std::unordered_map<std::string, TuningMetadataTy > TuningData;
13901394};
13911395
13921396// / Class implementing common functionalities of offload plugins. Each plugin
0 commit comments