Skip to content

Commit 36b58ab

Browse files
committed
AMDGPU: Select vector reg class for divergent build_vector
The main improvement is to the mfma tests. There are some mild regressions scattered around, and a few major ones. The worst regressions are in some of the bitcast tests; these are cases where the SGPR argument list runs out and uses VGPRs, and the copies-from-VGPR are misidentified as divergent. Most of the shufflevector tests are also regressions. These end up with cleaner MIR, but then get poor regalloc decisions.
1 parent ee34e82 commit 36b58ab

File tree

73 files changed

+27693
-29844
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+27693
-29844
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -726,10 +726,14 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
726726
break;
727727
}
728728

729+
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
729730
assert(VT.getVectorElementType().bitsEq(MVT::i32));
730-
unsigned RegClassID =
731-
SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
732-
SelectBuildVector(N, RegClassID);
731+
const TargetRegisterClass *RegClass =
732+
N->isDivergent()
733+
? TRI->getDefaultVectorSuperClassForBitWidth(NumVectorElts * 32)
734+
: SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32);
735+
736+
SelectBuildVector(N, RegClass->getID());
733737
return;
734738
}
735739
case ISD::VECTOR_SHUFFLE:

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 25415 additions & 26603 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24269,13 +24269,13 @@ define <16 x i8> @bitcast_v8bf16_to_v16i8(<8 x bfloat> %a, i32 %b) {
2426924269
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_hi16
2427024270
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_lo16
2427124271
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_hi16
24272-
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19_lo16
24272+
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr22_lo16
2427324273
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4_hi16
2427424274
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5_lo16
2427524275
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7_lo16
2427624276
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8_hi16
2427724277
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9_lo16
24278-
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_hi16
24278+
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr20_hi16
2427924279
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_lo16
2428024280
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12_hi16
2428124281
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13_lo16
@@ -24285,22 +24285,22 @@ define <16 x i8> @bitcast_v8bf16_to_v16i8(<8 x bfloat> %a, i32 %b) {
2428524285
; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0
2428624286
; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB108_2
2428724287
; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false
24288-
; GFX11-TRUE16-NEXT: v_lshrrev_b64 v[18:19], 24, v[2:3]
2428924288
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v15, 24, v3
2429024289
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v13, 8, v3
2429124290
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v9, 8, v2
2429224291
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 24, v1
2429324292
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v1
2429424293
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v10, 8, v0
24295-
; GFX11-TRUE16-NEXT: v_lshrrev_b64 v[19:20], 24, v[0:1]
24294+
; GFX11-TRUE16-NEXT: v_lshrrev_b64 v[18:19], 24, v[2:3]
24295+
; GFX11-TRUE16-NEXT: v_lshrrev_b64 v[22:23], 24, v[0:1]
2429624296
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v0.l
2429724297
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v0.h
2429824298
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v1.l
2429924299
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v1.h
2430024300
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v2.l
24301-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v2.h
24301+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v2.h
2430224302
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v3.l
24303-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v3.h
24303+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v3.h
2430424304
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
2430524305
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
2430624306
; GFX11-TRUE16-NEXT: .LBB108_2: ; %Flow
@@ -24372,32 +24372,32 @@ define <16 x i8> @bitcast_v8bf16_to_v16i8(<8 x bfloat> %a, i32 %b) {
2437224372
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc_lo
2437324373
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
2437424374
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
24375+
; GFX11-TRUE16-NEXT: v_lshrrev_b64 v[22:23], 24, v[16:17]
2437524376
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v10, 8, v16
24376-
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v22, v7, v9, vcc_lo
24377+
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v21, v7, v9, vcc_lo
2437724378
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
24378-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v12.h
24379+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v12.h
2437924380
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 24, v17
24380-
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v21, v1, v3, vcc_lo
24381-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v8.h
24381+
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v20, v1, v3, vcc_lo
24382+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v8.h
2438224383
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
24383-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v15, 24, v22
24384-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v13, 8, v22
24385-
; GFX11-TRUE16-NEXT: v_lshrrev_b64 v[18:19], 24, v[21:22]
24386-
; GFX11-TRUE16-NEXT: v_lshrrev_b64 v[19:20], 24, v[16:17]
24387-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v9, 8, v21
24384+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v15, 24, v21
24385+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v13, 8, v21
24386+
; GFX11-TRUE16-NEXT: v_lshrrev_b64 v[18:19], 24, v[20:21]
24387+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v9, 8, v20
2438824388
; GFX11-TRUE16-NEXT: .LBB108_4: ; %end
2438924389
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
2439024390
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.h
2439124391
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v10.l
2439224392
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.h
24393-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v19.l
24393+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v22.l
2439424394
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v4.h
2439524395
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v17.h
2439624396
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v8.h
24397-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v21.h
24397+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v20.h
2439824398
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v18.l
2439924399
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v12.h
24400-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v22.h
24400+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v21.h
2440124401
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
2440224402
;
2440324403
; GFX11-FAKE16-LABEL: bitcast_v8bf16_to_v16i8:

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.160bit.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ define <10 x i16> @bitcast_v5i32_to_v10i16(<5 x i32> %a, i32 %b) {
424424
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
425425
; SI-NEXT: s_setpc_b64 s[30:31]
426426
; SI-NEXT: .LBB4_3: ; %cmp.false
427-
; SI-NEXT: v_alignbit_b32 v9, s4, v8, 16
427+
; SI-NEXT: v_alignbit_b32 v9, v0, v8, 16
428428
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
429429
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
430430
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v6
@@ -439,7 +439,7 @@ define <10 x i16> @bitcast_v5i32_to_v10i16(<5 x i32> %a, i32 %b) {
439439
; SI-NEXT: v_add_i32_e32 v4, vcc, 3, v4
440440
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
441441
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
442-
; SI-NEXT: v_alignbit_b32 v9, s4, v8, 16
442+
; SI-NEXT: v_alignbit_b32 v9, v0, v8, 16
443443
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v6
444444
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2
445445
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
@@ -1634,7 +1634,7 @@ define <10 x i16> @bitcast_v5f32_to_v10i16(<5 x float> %a, i32 %b) {
16341634
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
16351635
; SI-NEXT: s_setpc_b64 s[30:31]
16361636
; SI-NEXT: .LBB12_3: ; %cmp.false
1637-
; SI-NEXT: v_alignbit_b32 v9, s4, v8, 16
1637+
; SI-NEXT: v_alignbit_b32 v9, v0, v8, 16
16381638
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
16391639
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
16401640
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v6
@@ -1649,7 +1649,7 @@ define <10 x i16> @bitcast_v5f32_to_v10i16(<5 x float> %a, i32 %b) {
16491649
; SI-NEXT: v_add_f32_e32 v4, 1.0, v4
16501650
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
16511651
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
1652-
; SI-NEXT: v_alignbit_b32 v9, s4, v8, 16
1652+
; SI-NEXT: v_alignbit_b32 v9, v0, v8, 16
16531653
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v6
16541654
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2
16551655
; SI-NEXT: s_or_b64 exec, exec, s[4:5]

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.224bit.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,7 @@ define <14 x i16> @bitcast_v7i32_to_v14i16(<7 x i32> %a, i32 %b) {
476476
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
477477
; SI-NEXT: s_setpc_b64 s[30:31]
478478
; SI-NEXT: .LBB4_3: ; %cmp.false
479-
; SI-NEXT: v_alignbit_b32 v13, s4, v12, 16
479+
; SI-NEXT: v_alignbit_b32 v13, v0, v12, 16
480480
; SI-NEXT: v_alignbit_b32 v9, v10, v8, 16
481481
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
482482
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
@@ -496,7 +496,7 @@ define <14 x i16> @bitcast_v7i32_to_v14i16(<7 x i32> %a, i32 %b) {
496496
; SI-NEXT: v_alignbit_b32 v9, v10, v8, 16
497497
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
498498
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
499-
; SI-NEXT: v_alignbit_b32 v13, s4, v12, 16
499+
; SI-NEXT: v_alignbit_b32 v13, v0, v12, 16
500500
; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v10
501501
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v6
502502
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2
@@ -1939,7 +1939,7 @@ define <14 x i16> @bitcast_v7f32_to_v14i16(<7 x float> %a, i32 %b) {
19391939
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
19401940
; SI-NEXT: s_setpc_b64 s[30:31]
19411941
; SI-NEXT: .LBB12_3: ; %cmp.false
1942-
; SI-NEXT: v_alignbit_b32 v13, s4, v12, 16
1942+
; SI-NEXT: v_alignbit_b32 v13, v0, v12, 16
19431943
; SI-NEXT: v_alignbit_b32 v9, v10, v8, 16
19441944
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
19451945
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
@@ -1959,7 +1959,7 @@ define <14 x i16> @bitcast_v7f32_to_v14i16(<7 x float> %a, i32 %b) {
19591959
; SI-NEXT: v_alignbit_b32 v9, v10, v8, 16
19601960
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
19611961
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
1962-
; SI-NEXT: v_alignbit_b32 v13, s4, v12, 16
1962+
; SI-NEXT: v_alignbit_b32 v13, v0, v12, 16
19631963
; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v10
19641964
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v6
19651965
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.288bit.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,7 @@ define <18 x i16> @bitcast_v9i32_to_v18i16(<9 x i32> %a, i32 %b) {
526526
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
527527
; SI-NEXT: s_setpc_b64 s[30:31]
528528
; SI-NEXT: .LBB4_3: ; %cmp.false
529-
; SI-NEXT: v_alignbit_b32 v17, s4, v16, 16
529+
; SI-NEXT: v_alignbit_b32 v17, v0, v16, 16
530530
; SI-NEXT: v_alignbit_b32 v13, v14, v12, 16
531531
; SI-NEXT: v_alignbit_b32 v9, v10, v8, 16
532532
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
@@ -551,7 +551,7 @@ define <18 x i16> @bitcast_v9i32_to_v18i16(<9 x i32> %a, i32 %b) {
551551
; SI-NEXT: v_alignbit_b32 v9, v10, v8, 16
552552
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
553553
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
554-
; SI-NEXT: v_alignbit_b32 v17, s4, v16, 16
554+
; SI-NEXT: v_alignbit_b32 v17, v0, v16, 16
555555
; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v14
556556
; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v10
557557
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v6
@@ -2240,7 +2240,7 @@ define <18 x i16> @bitcast_v9f32_to_v18i16(<9 x float> %a, i32 %b) {
22402240
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
22412241
; SI-NEXT: s_setpc_b64 s[30:31]
22422242
; SI-NEXT: .LBB12_3: ; %cmp.false
2243-
; SI-NEXT: v_alignbit_b32 v17, s4, v16, 16
2243+
; SI-NEXT: v_alignbit_b32 v17, v0, v16, 16
22442244
; SI-NEXT: v_alignbit_b32 v13, v14, v12, 16
22452245
; SI-NEXT: v_alignbit_b32 v9, v10, v8, 16
22462246
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
@@ -2265,7 +2265,7 @@ define <18 x i16> @bitcast_v9f32_to_v18i16(<9 x float> %a, i32 %b) {
22652265
; SI-NEXT: v_alignbit_b32 v9, v10, v8, 16
22662266
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
22672267
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
2268-
; SI-NEXT: v_alignbit_b32 v17, s4, v16, 16
2268+
; SI-NEXT: v_alignbit_b32 v17, v0, v16, 16
22692269
; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v14
22702270
; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v10
22712271
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v6

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20108,7 +20108,7 @@ define <5 x double> @bitcast_v20i16_to_v5f64(<20 x i16> %a, i32 %b) {
2010820108
; SI-NEXT: v_add_i32_e32 v5, vcc, s6, v5
2010920109
; SI-NEXT: v_add_i32_e32 v6, vcc, s6, v6
2011020110
; SI-NEXT: v_add_i32_e32 v7, vcc, s6, v7
20111-
; SI-NEXT: v_add_i32_e32 v8, vcc, s6, v8
20111+
; SI-NEXT: v_add_i32_e32 v8, vcc, 0x30000, v8
2011220112
; SI-NEXT: v_add_i32_e32 v9, vcc, 0x30000, v9
2011320113
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
2011420114
; SI-NEXT: s_setpc_b64 s[30:31]
@@ -20954,7 +20954,7 @@ define <5 x i64> @bitcast_v20i16_to_v5i64(<20 x i16> %a, i32 %b) {
2095420954
; SI-NEXT: v_add_i32_e32 v5, vcc, s6, v5
2095520955
; SI-NEXT: v_add_i32_e32 v6, vcc, s6, v6
2095620956
; SI-NEXT: v_add_i32_e32 v7, vcc, s6, v7
20957-
; SI-NEXT: v_add_i32_e32 v8, vcc, s6, v8
20957+
; SI-NEXT: v_add_i32_e32 v8, vcc, 0x30000, v8
2095820958
; SI-NEXT: v_add_i32_e32 v9, vcc, 0x30000, v9
2095920959
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
2096020960
; SI-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.352bit.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ define <22 x i16> @bitcast_v11i32_to_v22i16(<11 x i32> %a, i32 %b) {
581581
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
582582
; SI-NEXT: s_setpc_b64 s[30:31]
583583
; SI-NEXT: .LBB4_3: ; %cmp.false
584-
; SI-NEXT: v_alignbit_b32 v21, s4, v20, 16
584+
; SI-NEXT: v_alignbit_b32 v21, v0, v20, 16
585585
; SI-NEXT: v_alignbit_b32 v17, v18, v16, 16
586586
; SI-NEXT: v_alignbit_b32 v13, v14, v12, 16
587587
; SI-NEXT: v_alignbit_b32 v9, v10, v8, 16
@@ -611,7 +611,7 @@ define <22 x i16> @bitcast_v11i32_to_v22i16(<11 x i32> %a, i32 %b) {
611611
; SI-NEXT: v_alignbit_b32 v9, v10, v8, 16
612612
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
613613
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
614-
; SI-NEXT: v_alignbit_b32 v21, s4, v20, 16
614+
; SI-NEXT: v_alignbit_b32 v21, v0, v20, 16
615615
; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v18
616616
; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v14
617617
; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v10
@@ -2541,7 +2541,7 @@ define <22 x i16> @bitcast_v11f32_to_v22i16(<11 x float> %a, i32 %b) {
25412541
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
25422542
; SI-NEXT: s_setpc_b64 s[30:31]
25432543
; SI-NEXT: .LBB12_3: ; %cmp.false
2544-
; SI-NEXT: v_alignbit_b32 v21, s4, v20, 16
2544+
; SI-NEXT: v_alignbit_b32 v21, v0, v20, 16
25452545
; SI-NEXT: v_alignbit_b32 v17, v18, v16, 16
25462546
; SI-NEXT: v_alignbit_b32 v13, v14, v12, 16
25472547
; SI-NEXT: v_alignbit_b32 v9, v10, v8, 16
@@ -2571,7 +2571,7 @@ define <22 x i16> @bitcast_v11f32_to_v22i16(<11 x float> %a, i32 %b) {
25712571
; SI-NEXT: v_alignbit_b32 v9, v10, v8, 16
25722572
; SI-NEXT: v_alignbit_b32 v5, v6, v4, 16
25732573
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
2574-
; SI-NEXT: v_alignbit_b32 v21, s4, v20, 16
2574+
; SI-NEXT: v_alignbit_b32 v21, v0, v20, 16
25752575
; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v18
25762576
; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v14
25772577
; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v10

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.384bit.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7980,7 +7980,7 @@ define <6 x double> @bitcast_v24i16_to_v6f64(<24 x i16> %a, i32 %b) {
79807980
; SI-NEXT: v_add_i32_e32 v7, vcc, s6, v7
79817981
; SI-NEXT: v_add_i32_e32 v8, vcc, s6, v8
79827982
; SI-NEXT: v_add_i32_e32 v9, vcc, s6, v9
7983-
; SI-NEXT: v_add_i32_e32 v10, vcc, s6, v10
7983+
; SI-NEXT: v_add_i32_e32 v10, vcc, 0x30000, v10
79847984
; SI-NEXT: v_add_i32_e32 v11, vcc, 0x30000, v11
79857985
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
79867986
; SI-NEXT: s_setpc_b64 s[30:31]
@@ -10034,7 +10034,7 @@ define <6 x i64> @bitcast_v24i16_to_v6i64(<24 x i16> %a, i32 %b) {
1003410034
; SI-NEXT: v_add_i32_e32 v7, vcc, s6, v7
1003510035
; SI-NEXT: v_add_i32_e32 v8, vcc, s6, v8
1003610036
; SI-NEXT: v_add_i32_e32 v9, vcc, s6, v9
10037-
; SI-NEXT: v_add_i32_e32 v10, vcc, s6, v10
10037+
; SI-NEXT: v_add_i32_e32 v10, vcc, 0x30000, v10
1003810038
; SI-NEXT: v_add_i32_e32 v11, vcc, 0x30000, v11
1003910039
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
1004010040
; SI-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.448bit.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,7 +2383,7 @@ define <14 x i32> @bitcast_v28i16_to_v14i32(<28 x i16> %a, i32 %b) {
23832383
; SI-NEXT: v_add_i32_e32 v8, vcc, s6, v8
23842384
; SI-NEXT: v_add_i32_e32 v9, vcc, s6, v9
23852385
; SI-NEXT: v_add_i32_e32 v10, vcc, s6, v10
2386-
; SI-NEXT: v_add_i32_e32 v11, vcc, s6, v11
2386+
; SI-NEXT: v_add_i32_e32 v11, vcc, 0x30000, v11
23872387
; SI-NEXT: v_add_i32_e32 v12, vcc, 0x30000, v12
23882388
; SI-NEXT: v_add_i32_e32 v13, vcc, 0x30000, v13
23892389
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
@@ -5942,7 +5942,7 @@ define <14 x float> @bitcast_v28i16_to_v14f32(<28 x i16> %a, i32 %b) {
59425942
; SI-NEXT: v_add_i32_e32 v8, vcc, s6, v8
59435943
; SI-NEXT: v_add_i32_e32 v9, vcc, s6, v9
59445944
; SI-NEXT: v_add_i32_e32 v10, vcc, s6, v10
5945-
; SI-NEXT: v_add_i32_e32 v11, vcc, s6, v11
5945+
; SI-NEXT: v_add_i32_e32 v11, vcc, 0x30000, v11
59465946
; SI-NEXT: v_add_i32_e32 v12, vcc, 0x30000, v12
59475947
; SI-NEXT: v_add_i32_e32 v13, vcc, 0x30000, v13
59485948
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
@@ -8862,7 +8862,7 @@ define <7 x i64> @bitcast_v28i16_to_v7i64(<28 x i16> %a, i32 %b) {
88628862
; SI-NEXT: v_add_i32_e32 v8, vcc, s6, v8
88638863
; SI-NEXT: v_add_i32_e32 v9, vcc, s6, v9
88648864
; SI-NEXT: v_add_i32_e32 v10, vcc, s6, v10
8865-
; SI-NEXT: v_add_i32_e32 v11, vcc, s6, v11
8865+
; SI-NEXT: v_add_i32_e32 v11, vcc, 0x30000, v11
88668866
; SI-NEXT: v_add_i32_e32 v12, vcc, 0x30000, v12
88678867
; SI-NEXT: v_add_i32_e32 v13, vcc, 0x30000, v13
88688868
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
@@ -11188,7 +11188,7 @@ define <7 x double> @bitcast_v28i16_to_v7f64(<28 x i16> %a, i32 %b) {
1118811188
; SI-NEXT: v_add_i32_e32 v8, vcc, s6, v8
1118911189
; SI-NEXT: v_add_i32_e32 v9, vcc, s6, v9
1119011190
; SI-NEXT: v_add_i32_e32 v10, vcc, s6, v10
11191-
; SI-NEXT: v_add_i32_e32 v11, vcc, s6, v11
11191+
; SI-NEXT: v_add_i32_e32 v11, vcc, 0x30000, v11
1119211192
; SI-NEXT: v_add_i32_e32 v12, vcc, 0x30000, v12
1119311193
; SI-NEXT: v_add_i32_e32 v13, vcc, 0x30000, v13
1119411194
; SI-NEXT: s_or_b64 exec, exec, s[4:5]

0 commit comments

Comments
 (0)