Commit 8823efe
authored
[AMDGPU] Add register usage debug printing the point of maximum register pressure. (#161850)
Basically this allows to analyze "why so many VGPRs used?".
It prints all live registers at the point of maximum register pressure
and for each register its defs/uses are dumped.
Currently can be run before and after the scheduler but would be nice if
it can be ran inbetween any passes (not sure this is possible with
legacy pass-manager). Requires debug or built with asserts compiler.
Highly recommended to run with debug info to have debug locations for
instructions.
Example output:
```
*** Register pressure info (VGPRs) for _ZN7ck_tile6ken.... ***
Max pressure is 256 VGPRs at [email protected] (LoopHdr BB.16, Depth 1): %9858:vreg_512_align2 = contract V_MFMA_F32_32X32X16_BF16_mac_vgprcd_e64 %10137:vreg_128_align2, %10141:vreg_128_align2, %9858:vreg_512_align2(tied-def 0), 0, 0, 0, implicit $mode, implicit $exec
Live registers with single definition (123 VGPRs):
%10126:VReg_128_Align2, LiveMask 00000000000000FF (4 VGPRs)
def [email protected] (LoopHdr BB.16, Depth 1): undef %10126.sub0_sub1:vreg_128_align2 = DS_READ_B64_TR_B16 %478:vgpr_32, 15232, 0, implicit $exec :: (load (s64) from %ir.sunkaddr1314, !noalias !60, addrspace 3)
def [email protected] (LoopHdr BB.16, Depth 1): %10126.sub2_sub3:vreg_128_align2 = DS_READ_B64_TR_B16 %478:vgpr_32, 16320, 0, implicit $exec :: (load (s64) from %ir.sunkaddr1315, !noalias !60, addrspace 3)
use [email protected] (LoopHdr BB.16, Depth 1): %9856:vreg_512_align2 = contract V_MFMA_F32_32X32X16_BF16_mac_vgprcd_e64 %10126:vreg_128_align2, %10138:vreg_128_align2, %9856:vreg_512_align2(tied-def 0), 0, 0, 0, implicit $mode, implicit $exec
%10136:VReg_128_Align2, LiveMask 00000000000000FF (4 VGPRs)
def [email protected] (LoopHdr BB.16, Depth 1): undef %10136.sub0_sub1:vreg_128_align2 = DS_READ_B64_TR_B16 %478:vgpr_32, 2176, 0, implicit $exec :: (load (s64) from %ir.sunkaddr1294, !noalias !60, addrspace 3)
def [email protected] (LoopHdr BB.16, Depth 1): %10136.sub2_sub3:vreg_128_align2 = DS_READ_B64_TR_B16 %478:vgpr_32, 3264, 0, implicit $exec :: (load (s64) from %ir.sunkaddr1295, !noalias !60, addrspace 3)
use [email protected] (LoopHdr BB.16, Depth 1): %9858:vreg_512_align2 = contract V_MFMA_F32_32X32X16_BF16_mac_vgprcd_e64 %10136:vreg_128_align2, %10140:vreg_128_align2, %9858:vreg_512_align2(tied-def 0), 0, 0, 0, implicit $mode, implicit $exec
%10129:VReg_128_Align2, LiveMask 00000000000000FF (4 VGPRs)
...
Live registers with multiple definitions (133 VGPRs):
%9856:VReg_512_Align2, LiveMask 00000000FFFFFFFF (16 VGPRs)
def [email protected]: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def undef %9856.sub0_sub1:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52
def [email protected]: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub2_sub3:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52
def [email protected]: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub4_sub5:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52
def [email protected]: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub6_sub7:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52
def [email protected]: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub8_sub9:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52
def [email protected]: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub10_sub11:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52
def [email protected]: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub12_sub13:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52
def [email protected]: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub14_sub15:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52
def use [email protected] (LoopHdr BB.16, Depth 1): %9856:vreg_512_align2 = contract V_MFMA_F32_...
...
********** INTERVALS **********
...
********** MACHINEINSTRS **********
# Machine code for function _ZN7ck_tile6kentr...
```1 parent a80a6b3 commit 8823efe
File tree
3 files changed
+188
-6
lines changed- llvm/lib/Target/AMDGPU
3 files changed
+188
-6
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
14 | 14 | | |
15 | 15 | | |
16 | 16 | | |
| 17 | + | |
17 | 18 | | |
18 | 19 | | |
19 | 20 | | |
| |||
459 | 460 | | |
460 | 461 | | |
461 | 462 | | |
462 | | - | |
| 463 | + | |
| 464 | + | |
463 | 465 | | |
464 | 466 | | |
465 | 467 | | |
| 468 | + | |
| 469 | + | |
| 470 | + | |
466 | 471 | | |
467 | 472 | | |
468 | 473 | | |
| |||
986 | 991 | | |
987 | 992 | | |
988 | 993 | | |
| 994 | + | |
| 995 | + | |
| 996 | + | |
| 997 | + | |
| 998 | + | |
| 999 | + | |
| 1000 | + | |
| 1001 | + | |
| 1002 | + | |
| 1003 | + | |
| 1004 | + | |
| 1005 | + | |
| 1006 | + | |
| 1007 | + | |
| 1008 | + | |
| 1009 | + | |
| 1010 | + | |
| 1011 | + | |
| 1012 | + | |
| 1013 | + | |
| 1014 | + | |
| 1015 | + | |
| 1016 | + | |
| 1017 | + | |
| 1018 | + | |
| 1019 | + | |
| 1020 | + | |
| 1021 | + | |
| 1022 | + | |
| 1023 | + | |
| 1024 | + | |
| 1025 | + | |
| 1026 | + | |
| 1027 | + | |
| 1028 | + | |
| 1029 | + | |
| 1030 | + | |
| 1031 | + | |
| 1032 | + | |
| 1033 | + | |
| 1034 | + | |
| 1035 | + | |
| 1036 | + | |
| 1037 | + | |
| 1038 | + | |
| 1039 | + | |
| 1040 | + | |
| 1041 | + | |
| 1042 | + | |
| 1043 | + | |
| 1044 | + | |
| 1045 | + | |
| 1046 | + | |
| 1047 | + | |
| 1048 | + | |
| 1049 | + | |
| 1050 | + | |
| 1051 | + | |
| 1052 | + | |
| 1053 | + | |
| 1054 | + | |
| 1055 | + | |
| 1056 | + | |
| 1057 | + | |
| 1058 | + | |
| 1059 | + | |
| 1060 | + | |
| 1061 | + | |
| 1062 | + | |
| 1063 | + | |
| 1064 | + | |
| 1065 | + | |
| 1066 | + | |
| 1067 | + | |
| 1068 | + | |
| 1069 | + | |
| 1070 | + | |
| 1071 | + | |
| 1072 | + | |
| 1073 | + | |
| 1074 | + | |
| 1075 | + | |
| 1076 | + | |
| 1077 | + | |
| 1078 | + | |
| 1079 | + | |
| 1080 | + | |
| 1081 | + | |
| 1082 | + | |
| 1083 | + | |
| 1084 | + | |
| 1085 | + | |
| 1086 | + | |
| 1087 | + | |
| 1088 | + | |
| 1089 | + | |
| 1090 | + | |
| 1091 | + | |
| 1092 | + | |
| 1093 | + | |
| 1094 | + | |
| 1095 | + | |
| 1096 | + | |
| 1097 | + | |
| 1098 | + | |
| 1099 | + | |
| 1100 | + | |
| 1101 | + | |
| 1102 | + | |
| 1103 | + | |
| 1104 | + | |
| 1105 | + | |
| 1106 | + | |
| 1107 | + | |
| 1108 | + | |
| 1109 | + | |
| 1110 | + | |
| 1111 | + | |
| 1112 | + | |
| 1113 | + | |
| 1114 | + | |
| 1115 | + | |
| 1116 | + | |
| 1117 | + | |
| 1118 | + | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
31 | 31 | | |
32 | 32 | | |
33 | 33 | | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
| 39 | + | |
34 | 40 | | |
35 | 41 | | |
36 | 42 | | |
| |||
41 | 47 | | |
42 | 48 | | |
43 | 49 | | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
44 | 55 | | |
45 | 56 | | |
46 | 57 | | |
| |||
138 | 149 | | |
139 | 150 | | |
140 | 151 | | |
| 152 | + | |
| 153 | + | |
| 154 | + | |
| 155 | + | |
| 156 | + | |
| 157 | + | |
141 | 158 | | |
142 | 159 | | |
143 | 160 | | |
| |||
294 | 311 | | |
295 | 312 | | |
296 | 313 | | |
297 | | - | |
298 | | - | |
| 314 | + | |
| 315 | + | |
| 316 | + | |
| 317 | + | |
299 | 318 | | |
300 | 319 | | |
301 | 320 | | |
| |||
428 | 447 | | |
429 | 448 | | |
430 | 449 | | |
431 | | - | |
432 | | - | |
433 | | - | |
434 | 450 | | |
435 | 451 | | |
436 | 452 | | |
| |||
524 | 540 | | |
525 | 541 | | |
526 | 542 | | |
| 543 | + | |
| 544 | + | |
| 545 | + | |
| 546 | + | |
| 547 | + | |
527 | 548 | | |
528 | 549 | | |
529 | 550 | | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
69 | 69 | | |
70 | 70 | | |
71 | 71 | | |
| 72 | + | |
| 73 | + | |
| 74 | + | |
| 75 | + | |
| 76 | + | |
| 77 | + | |
| 78 | + | |
| 79 | + | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
| 83 | + | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
72 | 87 | | |
73 | 88 | | |
74 | 89 | | |
| |||
960 | 975 | | |
961 | 976 | | |
962 | 977 | | |
| 978 | + | |
| 979 | + | |
| 980 | + | |
| 981 | + | |
| 982 | + | |
| 983 | + | |
| 984 | + | |
| 985 | + | |
963 | 986 | | |
964 | 987 | | |
965 | 988 | | |
| |||
995 | 1018 | | |
996 | 1019 | | |
997 | 1020 | | |
| 1021 | + | |
| 1022 | + | |
| 1023 | + | |
| 1024 | + | |
| 1025 | + | |
| 1026 | + | |
| 1027 | + | |
| 1028 | + | |
998 | 1029 | | |
999 | 1030 | | |
1000 | 1031 | | |
| |||
0 commit comments