-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMemTraceInstrumentationKernel.cpp
More file actions
74 lines (67 loc) · 2.97 KB
/
MemTraceInstrumentationKernel.cpp
File metadata and controls
74 lines (67 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#include "hip/hip_runtime.h"
#include <stdint.h>
#include <stdio.h>
#define WaveFrontSize 64
#define HexLen 15
__attribute__((always_inline))
__device__ uint32_t getThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
__attribute__((always_inline))
__device__ uint32_t getWaveId() {
return getThreadIdInBlock() / WaveFrontSize;
}
__attribute__((always_inline))
__device__ bool isSharedMemPtr(const void *Ptr) {
return __builtin_amdgcn_is_shared(
(const __attribute__((address_space(0))) void *)Ptr);
}
__attribute__((used))
__device__ void memTrace(void* addressPtr, uint32_t LocationId){
if(isSharedMemPtr(addressPtr))
return;
uint64_t address = reinterpret_cast<uint64_t>(addressPtr);
//Mask of the active threads in the wave
int activeMask = __builtin_amdgcn_read_exec();
// //Find first active thread in the wave by finding the position of the least significant bit set to 1 in the activeMask
const int firstActiveLane = __ffs(activeMask) - 1;
uint64_t addrArray[WaveFrontSize];
for(int i = 0; i < WaveFrontSize; i++){
addrArray[i] = __shfl(address, i, WaveFrontSize);
}
uint32_t Lane = __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
if(Lane == firstActiveLane){
unsigned int hw_id = 0;
uint64_t Time = 0;
#if !defined(__gfx1100__) && !defined(__gfx1101__)
Time = __builtin_amdgcn_s_memrealtime();
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID)" : "=s"(hw_id));
#endif
char hex_str[]= "0123456789abcdef";
char out[WaveFrontSize*HexLen + 1];
(out)[WaveFrontSize*HexLen] = '\0';
for (size_t i = 0; i < WaveFrontSize; i++) {
(out)[i * HexLen + 0] = '0';
(out)[i * HexLen + 1] = 'x';
(out)[i * HexLen + 2] = hex_str[(addrArray[i] >> 44) & 0x0F];
(out)[i * HexLen + 3] = hex_str[(addrArray[i] >> 40) & 0x0F];
(out)[i * HexLen + 4] = hex_str[(addrArray[i] >> 36) & 0x0F];
(out)[i * HexLen + 5] = hex_str[(addrArray[i] >> 32) & 0x0F];
(out)[i * HexLen + 6] = hex_str[(addrArray[i] >> 28) & 0x0F];
(out)[i * HexLen + 7] = hex_str[(addrArray[i] >> 24) & 0x0F];
(out)[i * HexLen + 8] = hex_str[(addrArray[i] >> 20) & 0x0F];
(out)[i * HexLen + 9] = hex_str[(addrArray[i] >> 16) & 0x0F];
(out)[i * HexLen + 10] = hex_str[(addrArray[i] >> 12) & 0x0F];
(out)[i * HexLen + 11] = hex_str[(addrArray[i] >> 8) & 0x0F];
(out)[i * HexLen + 12] = hex_str[(addrArray[i] >> 4) & 0x0F];
(out)[i * HexLen + 13] = hex_str[(addrArray[i] ) & 0x0F];
(out)[i * HexLen + 14] = ',';
}
(out)[WaveFrontSize * HexLen - 1] = '\n';
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
unsigned int xcc_id;
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_XCC_ID)" : "=s"(xcc_id));
printf("%ld,%d,%d,%d,%d,%d,%d, %s", Time, LocationId, (hw_id & 0xf), ((hw_id & 0x30) >> 4), ((hw_id & 0xf00) >> 8), ((hw_id & 0xe000) >> 13), xcc_id, out);
#else
printf("%ld,%d,%d,%d,%d,%d,%s", Time, LocationId, (hw_id & 0xf), ((hw_id & 0x30) >> 4), ((hw_id & 0xf00) >> 8), ((hw_id & 0xe000) >> 13),out);
#endif
}
}