@@ -945,7 +945,6 @@ body: |
945945 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
946946 ...
947947
948- # FIXME: Missing S_WAIT_XCNT before overwriting vgpr0.
949948---
950949name : wait_kmcnt_with_outstanding_vmem_2
951950tracksRegLiveness : true
@@ -971,6 +970,7 @@ body: |
971970 ; GCN-NEXT: {{ $}}
972971 ; GCN-NEXT: S_WAIT_KMCNT 0
973972 ; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
973+ ; GCN-NEXT: S_WAIT_XCNT 0
974974 ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
975975 bb.0:
976976 liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
@@ -985,6 +985,180 @@ body: |
985985 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
986986 ...
987987
988+ ---
989+ name : wait_kmcnt_and_wait_loadcnt
990+ tracksRegLiveness : true
991+ machineFunctionInfo :
992+ isEntryFunction : true
993+ body : |
994+ ; GCN-LABEL: name: wait_kmcnt_and_wait_loadcnt
995+ ; GCN: bb.0:
996+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
997+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
998+ ; GCN-NEXT: {{ $}}
999+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1000+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
1001+ ; GCN-NEXT: {{ $}}
1002+ ; GCN-NEXT: bb.1:
1003+ ; GCN-NEXT: successors: %bb.2(0x80000000)
1004+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2
1005+ ; GCN-NEXT: {{ $}}
1006+ ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1007+ ; GCN-NEXT: {{ $}}
1008+ ; GCN-NEXT: bb.2:
1009+ ; GCN-NEXT: liveins: $sgpr2
1010+ ; GCN-NEXT: {{ $}}
1011+ ; GCN-NEXT: S_WAIT_KMCNT 0
1012+ ; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
1013+ ; GCN-NEXT: S_WAIT_LOADCNT 0
1014+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
1015+ bb.0:
1016+ liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
1017+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1018+ S_CBRANCH_SCC1 %bb.2, implicit $scc
1019+ bb.1:
1020+ liveins: $vgpr0_vgpr1, $sgpr2
1021+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1022+ bb.2:
1023+ liveins: $sgpr2
1024+ $sgpr2 = S_MOV_B32 $sgpr2
1025+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
1026+ ...
1027+
1028+ ---
1029+ name : implicit_handling_of_pending_vmem_group
1030+ tracksRegLiveness : true
1031+ machineFunctionInfo :
1032+ isEntryFunction : true
1033+ body : |
1034+ ; GCN-LABEL: name: implicit_handling_of_pending_vmem_group
1035+ ; GCN: bb.0:
1036+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
1037+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
1038+ ; GCN-NEXT: {{ $}}
1039+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1040+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
1041+ ; GCN-NEXT: {{ $}}
1042+ ; GCN-NEXT: bb.1:
1043+ ; GCN-NEXT: successors: %bb.2(0x80000000)
1044+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2
1045+ ; GCN-NEXT: {{ $}}
1046+ ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1047+ ; GCN-NEXT: {{ $}}
1048+ ; GCN-NEXT: bb.2:
1049+ ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2
1050+ ; GCN-NEXT: {{ $}}
1051+ ; GCN-NEXT: S_WAIT_KMCNT 0
1052+ ; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
1053+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1054+ ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
1055+ ; GCN-NEXT: S_WAIT_XCNT 0
1056+ ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0
1057+ bb.0:
1058+ liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
1059+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1060+ S_CBRANCH_SCC1 %bb.2, implicit $scc
1061+ bb.1:
1062+ liveins: $vgpr0_vgpr1, $sgpr2
1063+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1064+ bb.2:
1065+ liveins: $sgpr0_sgpr1, $sgpr2
1066+ $sgpr2 = S_MOV_B32 $sgpr2
1067+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1068+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
1069+ $sgpr0 = S_MOV_B32 $sgpr0
1070+ ...
1071+
1072+ ---
1073+ name : pending_vmem_event_between_block
1074+ tracksRegLiveness : true
1075+ machineFunctionInfo :
1076+ isEntryFunction : true
1077+ body : |
1078+ ; GCN-LABEL: name: pending_vmem_event_between_block
1079+ ; GCN: bb.0:
1080+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
1081+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
1082+ ; GCN-NEXT: {{ $}}
1083+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1084+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
1085+ ; GCN-NEXT: {{ $}}
1086+ ; GCN-NEXT: bb.1:
1087+ ; GCN-NEXT: successors: %bb.2(0x80000000)
1088+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2
1089+ ; GCN-NEXT: {{ $}}
1090+ ; GCN-NEXT: $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1091+ ; GCN-NEXT: $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
1092+ ; GCN-NEXT: {{ $}}
1093+ ; GCN-NEXT: bb.2:
1094+ ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
1095+ ; GCN-NEXT: {{ $}}
1096+ ; GCN-NEXT: S_WAIT_KMCNT 0
1097+ ; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
1098+ ; GCN-NEXT: S_WAIT_XCNT 1
1099+ ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
1100+ ; GCN-NEXT: S_WAIT_XCNT 0
1101+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
1102+ ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0
1103+ bb.0:
1104+ liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
1105+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1106+ S_CBRANCH_SCC1 %bb.2, implicit $scc
1107+ bb.1:
1108+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2
1109+ $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1110+ $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
1111+ bb.2:
1112+ liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
1113+ $sgpr2 = S_MOV_B32 $sgpr2
1114+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
1115+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
1116+ $sgpr0 = S_MOV_B32 $sgpr0
1117+ ...
1118+
1119+ ---
1120+ name : flushing_vmem_cnt_on_block_entry
1121+ tracksRegLiveness : true
1122+ machineFunctionInfo :
1123+ isEntryFunction : true
1124+ body : |
1125+ ; GCN-LABEL: name: flushing_vmem_cnt_on_block_entry
1126+ ; GCN: bb.0:
1127+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
1128+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
1129+ ; GCN-NEXT: {{ $}}
1130+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1131+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
1132+ ; GCN-NEXT: {{ $}}
1133+ ; GCN-NEXT: bb.1:
1134+ ; GCN-NEXT: successors: %bb.2(0x80000000)
1135+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2
1136+ ; GCN-NEXT: {{ $}}
1137+ ; GCN-NEXT: $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1138+ ; GCN-NEXT: $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
1139+ ; GCN-NEXT: {{ $}}
1140+ ; GCN-NEXT: bb.2:
1141+ ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
1142+ ; GCN-NEXT: {{ $}}
1143+ ; GCN-NEXT: S_WAIT_XCNT 0
1144+ ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
1145+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
1146+ ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0
1147+ bb.0:
1148+ liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
1149+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1150+ S_CBRANCH_SCC1 %bb.2, implicit $scc
1151+ bb.1:
1152+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2
1153+ $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1154+ $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
1155+ bb.2:
1156+ liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
1157+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
1158+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
1159+ $sgpr0 = S_MOV_B32 $sgpr0
1160+ ...
1161+
9881162---
9891163name : wait_loadcnt_with_outstanding_smem
9901164tracksRegLiveness : true
0 commit comments