1111// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<4xi8> to vector<4xf8E4M3FN>
1212// CHECK: return [[CAST]] : vector<4xf8E4M3FN>
1313func.func @packed_scaled_trunc_f8e4m3_f32 (%v: vector <2 xf32 >, %scale: f32 ) -> vector <4 xf8 E4 M3 FN> {
14- %ret = amdgpu.packed_scaled_trunc %v into undef [index 0 ], %scale : vector <2 xf32 > to vector <4 xf8 E4 M3 FN>
14+ %ret = amdgpu.packed_scaled_trunc %v into undef [0 ], %scale : vector <2 xf32 > to vector <4 xf8 E4 M3 FN>
1515 func.return %ret : vector <4 xf8 E4 M3 FN>
1616}
1717
@@ -27,7 +27,7 @@ func.func @packed_scaled_trunc_f8e4m3_f32(%v: vector<2xf32>, %scale: f32) -> vec
2727// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<4xi8> to vector<4xf8E4M3FN>
2828// CHECK: return [[CAST]] : vector<4xf8E4M3FN>
2929func.func @packed_scaled_trunc_into_f8e4m3_f32 (%v: vector <2 xf32 >, %existing: vector <4 xf8 E4 M3 FN>, %scale: f32 ) -> vector <4 xf8 E4 M3 FN> {
30- %ret = amdgpu.packed_scaled_trunc %v into %existing [index 0 ], %scale : vector <2 xf32 > to vector <4 xf8 E4 M3 FN> into vector <4 xf8 E4 M3 FN>
30+ %ret = amdgpu.packed_scaled_trunc %v into %existing [0 ], %scale : vector <2 xf32 > to vector <4 xf8 E4 M3 FN> into vector <4 xf8 E4 M3 FN>
3131 func.return %ret : vector <4 xf8 E4 M3 FN>
3232}
3333
@@ -38,7 +38,7 @@ func.func @packed_scaled_trunc_into_f8e4m3_f32(%v: vector<2xf32>, %existing: vec
3838// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<4xi8> to vector<4xf8E4M3FN>
3939// CHECK: return [[CAST]] : vector<4xf8E4M3FN>
4040func.func @packed_scaled_trunc_f8e4m3_f16 (%v: vector <2 xf16 >, %scale: f32 ) -> vector <4 xf8 E4 M3 FN> {
41- %ret = amdgpu.packed_scaled_trunc %v into undef [index 0 ], %scale : vector <2 xf16 > to vector <4 xf8 E4 M3 FN>
41+ %ret = amdgpu.packed_scaled_trunc %v into undef [0 ], %scale : vector <2 xf16 > to vector <4 xf8 E4 M3 FN>
4242 func.return %ret : vector <4 xf8 E4 M3 FN>
4343}
4444
@@ -50,7 +50,7 @@ func.func @packed_scaled_trunc_f8e4m3_f16(%v: vector<2xf16>, %scale: f32) -> vec
5050// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<4xi8> to vector<4xf8E4M3FN>
5151// CHECK: return [[CAST]] : vector<4xf8E4M3FN>
5252func.func @packed_scaled_trunc_into_f8e4m3_f16 (%v: vector <2 xf16 >, %existing: vector <4 xf8 E4 M3 FN>, %scale: f32 ) -> vector <4 xf8 E4 M3 FN> {
53- %ret = amdgpu.packed_scaled_trunc %v into %existing [index 0 ], %scale : vector <2 xf16 > to vector <4 xf8 E4 M3 FN> into vector <4 xf8 E4 M3 FN>
53+ %ret = amdgpu.packed_scaled_trunc %v into %existing [0 ], %scale : vector <2 xf16 > to vector <4 xf8 E4 M3 FN> into vector <4 xf8 E4 M3 FN>
5454 func.return %ret : vector <4 xf8 E4 M3 FN>
5555}
5656
@@ -61,7 +61,7 @@ func.func @packed_scaled_trunc_into_f8e4m3_f16(%v: vector<2xf16>, %existing: vec
6161// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<4xi8> to vector<4xf8E4M3FN>
6262// CHECK: return [[CAST]] : vector<4xf8E4M3FN>
6363func.func @packed_scaled_trunc_f8e4m3_bf16 (%v: vector <2 xbf16 >, %scale: f32 ) -> vector <4 xf8 E4 M3 FN> {
64- %ret = amdgpu.packed_scaled_trunc %v into undef [index 0 ], %scale : vector <2 xbf16 > to vector <4 xf8 E4 M3 FN>
64+ %ret = amdgpu.packed_scaled_trunc %v into undef [0 ], %scale : vector <2 xbf16 > to vector <4 xf8 E4 M3 FN>
6565 func.return %ret : vector <4 xf8 E4 M3 FN>
6666}
6767
@@ -73,7 +73,7 @@ func.func @packed_scaled_trunc_f8e4m3_bf16(%v: vector<2xbf16>, %scale: f32) -> v
7373// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<4xi8> to vector<4xf8E4M3FN>
7474// CHECK: return [[CAST]] : vector<4xf8E4M3FN>
7575func.func @packed_scaled_trunc_into_f8e4m3_bf16 (%v: vector <2 xbf16 >, %existing: vector <4 xf8 E4 M3 FN>, %scale: f32 ) -> vector <4 xf8 E4 M3 FN> {
76- %ret = amdgpu.packed_scaled_trunc %v into %existing [index 0 ], %scale : vector <2 xbf16 > to vector <4 xf8 E4 M3 FN> into vector <4 xf8 E4 M3 FN>
76+ %ret = amdgpu.packed_scaled_trunc %v into %existing [0 ], %scale : vector <2 xbf16 > to vector <4 xf8 E4 M3 FN> into vector <4 xf8 E4 M3 FN>
7777 func.return %ret : vector <4 xf8 E4 M3 FN>
7878}
7979
@@ -88,7 +88,7 @@ func.func @packed_scaled_trunc_into_f8e4m3_bf16(%v: vector<2xbf16>, %existing: v
8888// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<4xi8> to vector<4xf8E5M2>
8989// CHECK: return [[CAST]] : vector<4xf8E5M2>
9090func.func @packed_scaled_trunc_f8e5m2_f32 (%v: vector <2 xf32 >, %scale: f32 ) -> vector <4 xf8 E5 M2 > {
91- %ret = amdgpu.packed_scaled_trunc %v into undef [index 0 ], %scale : vector <2 xf32 > to vector <4 xf8 E5 M2 >
91+ %ret = amdgpu.packed_scaled_trunc %v into undef [0 ], %scale : vector <2 xf32 > to vector <4 xf8 E5 M2 >
9292 func.return %ret : vector <4 xf8 E5 M2 >
9393}
9494
@@ -104,7 +104,7 @@ func.func @packed_scaled_trunc_f8e5m2_f32(%v: vector<2xf32>, %scale: f32) -> vec
104104// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<4xi8> to vector<4xf8E5M2>
105105// CHECK: return [[CAST]] : vector<4xf8E5M2>
106106func.func @packed_scaled_trunc_into_f8e5m2_f32 (%v: vector <2 xf32 >, %existing: vector <4 xf8 E5 M2 >, %scale: f32 ) -> vector <4 xf8 E5 M2 > {
107- %ret = amdgpu.packed_scaled_trunc %v into %existing [index 0 ], %scale : vector <2 xf32 > to vector <4 xf8 E5 M2 > into vector <4 xf8 E5 M2 >
107+ %ret = amdgpu.packed_scaled_trunc %v into %existing [0 ], %scale : vector <2 xf32 > to vector <4 xf8 E5 M2 > into vector <4 xf8 E5 M2 >
108108 func.return %ret : vector <4 xf8 E5 M2 >
109109}
110110
@@ -115,7 +115,7 @@ func.func @packed_scaled_trunc_into_f8e5m2_f32(%v: vector<2xf32>, %existing: vec
115115// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<4xi8> to vector<4xf8E5M2>
116116// CHECK: return [[CAST]] : vector<4xf8E5M2>
117117func.func @packed_scaled_trunc_f8e5m2_f16 (%v: vector <2 xf16 >, %scale: f32 ) -> vector <4 xf8 E5 M2 > {
118- %ret = amdgpu.packed_scaled_trunc %v into undef [index 0 ], %scale : vector <2 xf16 > to vector <4 xf8 E5 M2 >
118+ %ret = amdgpu.packed_scaled_trunc %v into undef [0 ], %scale : vector <2 xf16 > to vector <4 xf8 E5 M2 >
119119 func.return %ret : vector <4 xf8 E5 M2 >
120120}
121121
@@ -127,7 +127,7 @@ func.func @packed_scaled_trunc_f8e5m2_f16(%v: vector<2xf16>, %scale: f32) -> vec
127127// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<4xi8> to vector<4xf8E5M2>
128128// CHECK: return [[CAST]] : vector<4xf8E5M2>
129129func.func @packed_scaled_trunc_into_f8e5m2_f16 (%v: vector <2 xf16 >, %existing: vector <4 xf8 E5 M2 >, %scale: f32 ) -> vector <4 xf8 E5 M2 > {
130- %ret = amdgpu.packed_scaled_trunc %v into %existing [index 0 ], %scale : vector <2 xf16 > to vector <4 xf8 E5 M2 > into vector <4 xf8 E5 M2 >
130+ %ret = amdgpu.packed_scaled_trunc %v into %existing [0 ], %scale : vector <2 xf16 > to vector <4 xf8 E5 M2 > into vector <4 xf8 E5 M2 >
131131 func.return %ret : vector <4 xf8 E5 M2 >
132132}
133133
@@ -138,7 +138,7 @@ func.func @packed_scaled_trunc_into_f8e5m2_f16(%v: vector<2xf16>, %existing: vec
138138// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<4xi8> to vector<4xf8E5M2>
139139// CHECK: return [[CAST]] : vector<4xf8E5M2>
140140func.func @packed_scaled_trunc_f8e5m2_bf16 (%v: vector <2 xbf16 >, %scale: f32 ) -> vector <4 xf8 E5 M2 > {
141- %ret = amdgpu.packed_scaled_trunc %v into undef [index 0 ], %scale : vector <2 xbf16 > to vector <4 xf8 E5 M2 >
141+ %ret = amdgpu.packed_scaled_trunc %v into undef [0 ], %scale : vector <2 xbf16 > to vector <4 xf8 E5 M2 >
142142 func.return %ret : vector <4 xf8 E5 M2 >
143143}
144144
@@ -150,7 +150,7 @@ func.func @packed_scaled_trunc_f8e5m2_bf16(%v: vector<2xbf16>, %scale: f32) -> v
150150// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<4xi8> to vector<4xf8E5M2>
151151// CHECK: return [[CAST]] : vector<4xf8E5M2>
152152func.func @packed_scaled_trunc_into_f8e5m2_bf16 (%v: vector <2 xbf16 >, %existing: vector <4 xf8 E5 M2 >, %scale: f32 ) -> vector <4 xf8 E5 M2 > {
153- %ret = amdgpu.packed_scaled_trunc %v into %existing [index 0 ], %scale : vector <2 xbf16 > to vector <4 xf8 E5 M2 > into vector <4 xf8 E5 M2 >
153+ %ret = amdgpu.packed_scaled_trunc %v into %existing [0 ], %scale : vector <2 xbf16 > to vector <4 xf8 E5 M2 > into vector <4 xf8 E5 M2 >
154154 func.return %ret : vector <4 xf8 E5 M2 >
155155}
156156
@@ -165,7 +165,7 @@ func.func @packed_scaled_trunc_into_f8e5m2_bf16(%v: vector<2xbf16>, %existing: v
165165// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<8xi4> to vector<8xf4E2M1FN>
166166// CHECK: return [[CAST]] : vector<8xf4E2M1FN>
167167func.func @packed_scaled_trunc_f4e2m1_f32 (%v: vector <2 xf32 >, %scale: f32 ) -> vector <8 xf4 E2 M1 FN> {
168- %ret = amdgpu.packed_scaled_trunc %v into undef [index 0 ], %scale : vector <2 xf32 > to vector <8 xf4 E2 M1 FN>
168+ %ret = amdgpu.packed_scaled_trunc %v into undef [0 ], %scale : vector <2 xf32 > to vector <8 xf4 E2 M1 FN>
169169 func.return %ret : vector <8 xf4 E2 M1 FN>
170170}
171171
@@ -181,7 +181,7 @@ func.func @packed_scaled_trunc_f4e2m1_f32(%v: vector<2xf32>, %scale: f32) -> vec
181181// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<8xi4> to vector<8xf4E2M1FN>
182182// CHECK: return [[CAST]] : vector<8xf4E2M1FN>
183183func.func @packed_scaled_trunc_into_f4e2m1_f32 (%v: vector <2 xf32 >, %existing: vector <8 xf4 E2 M1 FN>, %scale: f32 ) -> vector <8 xf4 E2 M1 FN> {
184- %ret = amdgpu.packed_scaled_trunc %v into %existing [index 0 ], %scale : vector <2 xf32 > to vector <8 xf4 E2 M1 FN> into vector <8 xf4 E2 M1 FN>
184+ %ret = amdgpu.packed_scaled_trunc %v into %existing [0 ], %scale : vector <2 xf32 > to vector <8 xf4 E2 M1 FN> into vector <8 xf4 E2 M1 FN>
185185 func.return %ret : vector <8 xf4 E2 M1 FN>
186186}
187187
@@ -192,7 +192,7 @@ func.func @packed_scaled_trunc_into_f4e2m1_f32(%v: vector<2xf32>, %existing: vec
192192// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<8xi4> to vector<8xf4E2M1FN>
193193// CHECK: return [[CAST]] : vector<8xf4E2M1FN>
194194func.func @packed_scaled_trunc_f4e2m1_f16 (%v: vector <2 xf16 >, %scale: f32 ) -> vector <8 xf4 E2 M1 FN> {
195- %ret = amdgpu.packed_scaled_trunc %v into undef [index 0 ], %scale : vector <2 xf16 > to vector <8 xf4 E2 M1 FN>
195+ %ret = amdgpu.packed_scaled_trunc %v into undef [0 ], %scale : vector <2 xf16 > to vector <8 xf4 E2 M1 FN>
196196 func.return %ret : vector <8 xf4 E2 M1 FN>
197197}
198198
@@ -204,7 +204,7 @@ func.func @packed_scaled_trunc_f4e2m1_f16(%v: vector<2xf16>, %scale: f32) -> vec
204204// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<8xi4> to vector<8xf4E2M1FN>
205205// CHECK: return [[CAST]] : vector<8xf4E2M1FN>
206206func.func @packed_scaled_trunc_into_f4e2m1_f16 (%v: vector <2 xf16 >, %existing: vector <8 xf4 E2 M1 FN>, %scale: f32 ) -> vector <8 xf4 E2 M1 FN> {
207- %ret = amdgpu.packed_scaled_trunc %v into %existing [index 0 ], %scale : vector <2 xf16 > to vector <8 xf4 E2 M1 FN> into vector <8 xf4 E2 M1 FN>
207+ %ret = amdgpu.packed_scaled_trunc %v into %existing [0 ], %scale : vector <2 xf16 > to vector <8 xf4 E2 M1 FN> into vector <8 xf4 E2 M1 FN>
208208 func.return %ret : vector <8 xf4 E2 M1 FN>
209209}
210210
@@ -215,7 +215,7 @@ func.func @packed_scaled_trunc_into_f4e2m1_f16(%v: vector<2xf16>, %existing: vec
215215// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<8xi4> to vector<8xf4E2M1FN>
216216// CHECK: return [[CAST]] : vector<8xf4E2M1FN>
217217func.func @packed_scaled_trunc_f4e2m1_bf16 (%v: vector <2 xbf16 >, %scale: f32 ) -> vector <8 xf4 E2 M1 FN> {
218- %ret = amdgpu.packed_scaled_trunc %v into undef [index 0 ], %scale : vector <2 xbf16 > to vector <8 xf4 E2 M1 FN>
218+ %ret = amdgpu.packed_scaled_trunc %v into undef [0 ], %scale : vector <2 xbf16 > to vector <8 xf4 E2 M1 FN>
219219 func.return %ret : vector <8 xf4 E2 M1 FN>
220220}
221221
@@ -227,6 +227,6 @@ func.func @packed_scaled_trunc_f4e2m1_bf16(%v: vector<2xbf16>, %scale: f32) -> v
227227// CHECK: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[BITCAST]] : vector<8xi4> to vector<8xf4E2M1FN>
228228// CHECK: return [[CAST]] : vector<8xf4E2M1FN>
229229func.func @packed_scaled_trunc_into_f4e2m1_bf16 (%v: vector <2 xbf16 >, %existing: vector <8 xf4 E2 M1 FN>, %scale: f32 ) -> vector <8 xf4 E2 M1 FN> {
230- %ret = amdgpu.packed_scaled_trunc %v into %existing [index 0 ], %scale : vector <2 xbf16 > to vector <8 xf4 E2 M1 FN> into vector <8 xf4 E2 M1 FN>
230+ %ret = amdgpu.packed_scaled_trunc %v into %existing [0 ], %scale : vector <2 xbf16 > to vector <8 xf4 E2 M1 FN> into vector <8 xf4 E2 M1 FN>
231231 func.return %ret : vector <8 xf4 E2 M1 FN>
232232}
0 commit comments