@@ -18735,68 +18735,68 @@ define inreg <8 x i16> @bitcast_v8bf16_to_v8i16_scalar(<8 x bfloat> inreg %a, i3
1873518735; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1873618736; SI-NEXT: s_cmp_lg_u32 s24, 0
1873718737; SI-NEXT: v_mul_f32_e64 v15, 1.0, s16
18738- ; SI-NEXT: v_mul_f32_e64 v14 , 1.0, s17
18739- ; SI-NEXT: v_mul_f32_e64 v2 , 1.0, s18
18740- ; SI-NEXT: v_mul_f32_e64 v10 , 1.0, s19
18741- ; SI-NEXT: v_mul_f32_e64 v13 , 1.0, s20
18742- ; SI-NEXT: v_mul_f32_e64 v12 , 1.0, s21
18743- ; SI-NEXT: v_mul_f32_e64 v6 , 1.0, s22
18744- ; SI-NEXT: v_mul_f32_e64 v11 , 1.0, s23
18738+ ; SI-NEXT: v_mul_f32_e64 v1 , 1.0, s17
18739+ ; SI-NEXT: v_mul_f32_e64 v9 , 1.0, s18
18740+ ; SI-NEXT: v_mul_f32_e64 v3 , 1.0, s19
18741+ ; SI-NEXT: v_mul_f32_e64 v14 , 1.0, s20
18742+ ; SI-NEXT: v_mul_f32_e64 v5 , 1.0, s21
18743+ ; SI-NEXT: v_mul_f32_e64 v7 , 1.0, s22
18744+ ; SI-NEXT: v_mul_f32_e64 v13 , 1.0, s23
1874518745; SI-NEXT: s_cbranch_scc0 .LBB95_4
1874618746; SI-NEXT: ; %bb.1: ; %cmp.false
1874718747; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v15
18748- ; SI-NEXT: v_lshrrev_b32_e32 v1 , 16, v14
18749- ; SI-NEXT: v_lshrrev_b32_e32 v9 , 16, v2
18750- ; SI-NEXT: v_lshrrev_b32_e32 v3 , 16, v10
18751- ; SI-NEXT: v_lshrrev_b32_e32 v4 , 16, v13
18752- ; SI-NEXT: v_lshrrev_b32_e32 v5 , 16, v12
18753- ; SI-NEXT: v_lshrrev_b32_e32 v8 , 16, v6
18754- ; SI-NEXT: v_lshrrev_b32_e32 v7 , 16, v11
18748+ ; SI-NEXT: v_lshrrev_b32_e32 v11 , 16, v1
18749+ ; SI-NEXT: v_lshrrev_b32_e32 v10 , 16, v3
18750+ ; SI-NEXT: v_lshrrev_b32_e32 v4 , 16, v14
18751+ ; SI-NEXT: v_lshrrev_b32_e32 v12 , 16, v5
18752+ ; SI-NEXT: v_lshrrev_b32_e32 v8 , 16, v13
18753+ ; SI-NEXT: v_lshrrev_b32_e32 v2 , 16, v9
18754+ ; SI-NEXT: v_lshrrev_b32_e32 v6 , 16, v7
1875518755; SI-NEXT: s_cbranch_execnz .LBB95_3
1875618756; SI-NEXT: .LBB95_2: ; %cmp.true
18757- ; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v14
18757+ ; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1875818758; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v15
18759- ; SI-NEXT: v_add_f32_e32 v14 , 0x40c00000, v1
18759+ ; SI-NEXT: v_add_f32_e32 v11 , 0x40c00000, v1
1876018760; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
18761- ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v14
18761+ ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v11
1876218762; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
18763+ ; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v5
18764+ ; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v14
18765+ ; SI-NEXT: v_add_f32_e32 v10, 0x40c00000, v2
18766+ ; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
18767+ ; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v10
18768+ ; SI-NEXT: v_lshr_b64 v[4:5], v[1:2], 16
18769+ ; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v7
18770+ ; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v1
1876318771; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v13
18764- ; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v1
18765- ; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v12
1876618772; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
18767- ; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v1
18768- ; SI-NEXT: v_lshr_b64 v[4:5], v[3:4], 16
18769- ; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v6
18770- ; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v3
18771- ; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v11
18772- ; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
18773- ; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
18774- ; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
18775- ; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v1
18776- ; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v10
18777- ; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v3
18773+ ; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v1
18774+ ; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v9
18775+ ; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v1
18776+ ; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v3
1877818777; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
18779- ; SI-NEXT: v_lshr_b64 v[8:9], v[6:7], 16
18780- ; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v1
18781- ; SI-NEXT: v_lshr_b64 v[9:10], v[2:3], 16
18782- ; SI-NEXT: v_mov_b32_e32 v6, v8
18783- ; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v14
18784- ; SI-NEXT: v_mov_b32_e32 v2, v9
18785- ; SI-NEXT: v_lshr_b64 v[1:2], v[1:2], 16
18786- ; SI-NEXT: v_lshr_b64 v[5:6], v[5:6], 16
18778+ ; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v10
18779+ ; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v1
18780+ ; SI-NEXT: v_lshr_b64 v[2:3], v[9:10], 16
18781+ ; SI-NEXT: v_lshr_b64 v[6:7], v[7:8], 16
18782+ ; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v11
18783+ ; SI-NEXT: v_lshr_b64 v[11:12], v[1:2], 16
18784+ ; SI-NEXT: v_lshr_b64 v[12:13], v[5:6], 16
1878718785; SI-NEXT: .LBB95_3: ; %end
18788- ; SI-NEXT: v_mov_b32_e32 v2, v9
18789- ; SI-NEXT: v_mov_b32_e32 v6, v8
18786+ ; SI-NEXT: v_mov_b32_e32 v1, v11
18787+ ; SI-NEXT: v_mov_b32_e32 v3, v10
18788+ ; SI-NEXT: v_mov_b32_e32 v5, v12
18789+ ; SI-NEXT: v_mov_b32_e32 v7, v8
1879018790; SI-NEXT: s_setpc_b64 s[30:31]
1879118791; SI-NEXT: .LBB95_4:
1879218792; SI-NEXT: ; implicit-def: $vgpr0
18793- ; SI-NEXT: ; implicit-def: $vgpr1
18794- ; SI-NEXT: ; implicit-def: $vgpr9
18795- ; SI-NEXT: ; implicit-def: $vgpr3
18793+ ; SI-NEXT: ; implicit-def: $vgpr11
18794+ ; SI-NEXT: ; implicit-def: $vgpr2
18795+ ; SI-NEXT: ; implicit-def: $vgpr10
1879618796; SI-NEXT: ; implicit-def: $vgpr4
18797- ; SI-NEXT: ; implicit-def: $vgpr5
18797+ ; SI-NEXT: ; implicit-def: $vgpr12
18798+ ; SI-NEXT: ; implicit-def: $vgpr6
1879818799; SI-NEXT: ; implicit-def: $vgpr8
18799- ; SI-NEXT: ; implicit-def: $vgpr7
1880018800; SI-NEXT: s_branch .LBB95_2
1880118801;
1880218802; VI-LABEL: bitcast_v8bf16_to_v8i16_scalar:
@@ -24555,7 +24555,7 @@ define inreg <16 x i8> @bitcast_v8bf16_to_v16i8_scalar(<8 x bfloat> inreg %a, i3
2455524555; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2455624556; SI-NEXT: s_cmp_lg_u32 s24, 0
2455724557; SI-NEXT: v_mul_f32_e64 v28, 1.0, s17
24558- ; SI-NEXT: v_mul_f32_e64 v23 , 1.0, s16
24558+ ; SI-NEXT: v_mul_f32_e64 v8 , 1.0, s16
2455924559; SI-NEXT: v_mul_f32_e64 v27, 1.0, s19
2456024560; SI-NEXT: v_mul_f32_e64 v5, 1.0, s18
2456124561; SI-NEXT: v_mul_f32_e64 v30, 1.0, s21
@@ -24564,87 +24564,84 @@ define inreg <16 x i8> @bitcast_v8bf16_to_v16i8_scalar(<8 x bfloat> inreg %a, i3
2456424564; SI-NEXT: v_mul_f32_e64 v13, 1.0, s22
2456524565; SI-NEXT: s_cbranch_scc0 .LBB109_4
2456624566; SI-NEXT: ; %bb.1: ; %cmp.false
24567+ ; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v28
24568+ ; SI-NEXT: v_lshrrev_b32_e32 v26, 16, v30
24569+ ; SI-NEXT: v_lshr_b64 v[19:20], v[8:9], 16
2456724570; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v27
24568- ; SI-NEXT: v_lshrrev_b32_e32 v24, 16, v28
24569- ; SI-NEXT: v_lshr_b64 v[19:20], v[5:6], 16
24571+ ; SI-NEXT: v_lshr_b64 v[22:23], v[25:26], 16
2457024572; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v29
24571- ; SI-NEXT: v_lshr_b64 v[0:1], v[23:24], 16
24572- ; SI-NEXT: v_lshrrev_b32_e32 v26, 16, v30
24573- ; SI-NEXT: v_lshr_b64 v[21:22], v[13:14], 16
24574- ; SI-NEXT: v_mov_b32_e32 v1, v19
24575- ; SI-NEXT: v_lshr_b64 v[8:9], v[25:26], 16
24576- ; SI-NEXT: v_mov_b32_e32 v9, v21
24577- ; SI-NEXT: v_lshr_b64 v[16:17], v[0:1], 16
24573+ ; SI-NEXT: v_lshr_b64 v[20:21], v[5:6], 16
24574+ ; SI-NEXT: v_lshr_b64 v[23:24], v[13:14], 16
24575+ ; SI-NEXT: v_lshr_b64 v[0:1], v[19:20], 16
24576+ ; SI-NEXT: v_lshr_b64 v[10:11], v[22:23], 16
24577+ ; SI-NEXT: v_lshr_b64 v[3:4], v[19:20], 24
2457824578; SI-NEXT: v_lshrrev_b32_e32 v7, 24, v27
2457924579; SI-NEXT: v_lshrrev_b32_e32 v15, 24, v29
24580- ; SI-NEXT: v_lshrrev_b32_e32 v20, 8, v19
24581- ; SI-NEXT: v_lshrrev_b32_e32 v22, 8, v21
24582- ; SI-NEXT: v_lshr_b64 v[3:4], v[0:1], 24
24583- ; SI-NEXT: v_lshr_b64 v[1:2], v[0:1], 8
24584- ; SI-NEXT: v_lshr_b64 v[11:12], v[8:9], 24
24585- ; SI-NEXT: v_lshr_b64 v[17:18], v[8:9], 16
24586- ; SI-NEXT: v_lshr_b64 v[9:10], v[8:9], 8
24580+ ; SI-NEXT: v_lshrrev_b32_e32 v9, 8, v20
24581+ ; SI-NEXT: v_lshrrev_b32_e32 v16, 8, v23
24582+ ; SI-NEXT: v_lshr_b64 v[1:2], v[19:20], 8
24583+ ; SI-NEXT: v_lshr_b64 v[17:18], v[22:23], 24
24584+ ; SI-NEXT: v_lshr_b64 v[11:12], v[22:23], 8
2458724585; SI-NEXT: s_cbranch_execnz .LBB109_3
2458824586; SI-NEXT: .LBB109_2: ; %cmp.true
24589- ; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v13
2459024587; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v30
24591- ; SI-NEXT: v_add_f32_e32 v13, 0x40c00000, v2
24592- ; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v29
2459324588; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v25
2459424589; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
24595- ; SI-NEXT: v_add_f32_e32 v10, 0x40c00000, v2
24596- ; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v5
2459724590; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
2459824591; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
24599- ; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v2
24600- ; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v27
24601- ; SI-NEXT: v_lshr_b64 v[8:9], v[0:1], 16
24592+ ; SI-NEXT: v_lshr_b64 v[22:23], v[0:1], 16
24593+ ; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v13
24594+ ; SI-NEXT: v_add_f32_e32 v13, 0x40c00000, v0
24595+ ; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v29
2460224596; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v28
24603- ; SI-NEXT: v_add_f32_e32 v7 , 0x40c00000, v2
24604- ; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v23
24597+ ; SI-NEXT: v_add_f32_e32 v12 , 0x40c00000, v0
24598+ ; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v8
2460524599; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
24606- ; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v7
24607- ; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v10
2460824600; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
2460924601; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
24610- ; SI-NEXT: v_lshr_b64 v[19:20], v[5:6], 16
24611- ; SI-NEXT: v_lshr_b64 v[21:22], v[13:14], 16
24612- ; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
24613- ; SI-NEXT: v_mov_b32_e32 v1, v19
24614- ; SI-NEXT: v_mov_b32_e32 v9, v21
24615- ; SI-NEXT: v_lshr_b64 v[16:17], v[0:1], 16
24616- ; SI-NEXT: v_lshr_b64 v[3:4], v[0:1], 24
24617- ; SI-NEXT: v_lshr_b64 v[1:2], v[0:1], 8
24618- ; SI-NEXT: v_lshr_b64 v[11:12], v[8:9], 24
24619- ; SI-NEXT: v_lshrrev_b32_e32 v15, 24, v10
24620- ; SI-NEXT: v_lshr_b64 v[17:18], v[8:9], 16
24621- ; SI-NEXT: v_lshr_b64 v[9:10], v[8:9], 8
24622- ; SI-NEXT: v_lshrrev_b32_e32 v20, 8, v19
24623- ; SI-NEXT: v_lshrrev_b32_e32 v22, 8, v21
24602+ ; SI-NEXT: v_lshr_b64 v[19:20], v[0:1], 16
24603+ ; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v5
24604+ ; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v0
24605+ ; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v27
24606+ ; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v0
24607+ ; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v12
24608+ ; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v7
24609+ ; SI-NEXT: v_lshr_b64 v[20:21], v[5:6], 16
24610+ ; SI-NEXT: v_lshr_b64 v[23:24], v[13:14], 16
24611+ ; SI-NEXT: v_lshr_b64 v[0:1], v[19:20], 16
24612+ ; SI-NEXT: v_lshr_b64 v[10:11], v[22:23], 16
24613+ ; SI-NEXT: v_lshr_b64 v[3:4], v[19:20], 24
24614+ ; SI-NEXT: v_lshr_b64 v[1:2], v[19:20], 8
24615+ ; SI-NEXT: v_lshr_b64 v[17:18], v[22:23], 24
24616+ ; SI-NEXT: v_lshrrev_b32_e32 v15, 24, v12
24617+ ; SI-NEXT: v_lshr_b64 v[11:12], v[22:23], 8
24618+ ; SI-NEXT: v_lshrrev_b32_e32 v9, 8, v20
24619+ ; SI-NEXT: v_lshrrev_b32_e32 v16, 8, v23
2462424620; SI-NEXT: v_lshrrev_b32_e32 v7, 24, v7
2462524621; SI-NEXT: .LBB109_3: ; %end
24626- ; SI-NEXT: v_mov_b32_e32 v2, v16
24627- ; SI-NEXT: v_mov_b32_e32 v4, v19
24628- ; SI-NEXT: v_mov_b32_e32 v5, v20
24629- ; SI-NEXT: v_mov_b32_e32 v10, v17
24630- ; SI-NEXT: v_mov_b32_e32 v12, v21
24631- ; SI-NEXT: v_mov_b32_e32 v13, v22
24622+ ; SI-NEXT: v_mov_b32_e32 v2, v0
24623+ ; SI-NEXT: v_mov_b32_e32 v0, v19
24624+ ; SI-NEXT: v_mov_b32_e32 v4, v20
24625+ ; SI-NEXT: v_mov_b32_e32 v5, v9
24626+ ; SI-NEXT: v_mov_b32_e32 v8, v22
24627+ ; SI-NEXT: v_mov_b32_e32 v9, v11
24628+ ; SI-NEXT: v_mov_b32_e32 v11, v17
24629+ ; SI-NEXT: v_mov_b32_e32 v12, v23
24630+ ; SI-NEXT: v_mov_b32_e32 v13, v16
2463224631; SI-NEXT: s_setpc_b64 s[30:31]
2463324632; SI-NEXT: .LBB109_4:
24634- ; SI-NEXT: ; implicit-def: $vgpr0
24633+ ; SI-NEXT: ; implicit-def: $vgpr19
2463524634; SI-NEXT: ; implicit-def: $vgpr1
24636- ; SI-NEXT: ; implicit-def: $vgpr16
24635+ ; SI-NEXT: ; implicit-def: $vgpr0
2463724636; SI-NEXT: ; implicit-def: $vgpr3
24638- ; SI-NEXT: ; implicit-def: $vgpr19
24639- ; SI-NEXT: ; implicit-def: $vgpr20
24640- ; SI-NEXT: ; implicit-def: $vgpr7
24641- ; SI-NEXT: ; implicit-def: $vgpr8
2464224637; SI-NEXT: ; implicit-def: $vgpr9
24643- ; SI-NEXT: ; implicit-def: $vgpr17
24638+ ; SI-NEXT: ; implicit-def: $vgpr7
2464424639; SI-NEXT: ; implicit-def: $vgpr22
24645- ; SI-NEXT: ; implicit-def: $vgpr15
2464624640; SI-NEXT: ; implicit-def: $vgpr11
24647- ; SI-NEXT: ; implicit-def: $vgpr21
24641+ ; SI-NEXT: ; implicit-def: $vgpr10
24642+ ; SI-NEXT: ; implicit-def: $vgpr16
24643+ ; SI-NEXT: ; implicit-def: $vgpr15
24644+ ; SI-NEXT: ; implicit-def: $vgpr17
2464824645; SI-NEXT: s_branch .LBB109_2
2464924646;
2465024647; VI-LABEL: bitcast_v8bf16_to_v16i8_scalar:
0 commit comments