@@ -196,7 +196,7 @@ func.func @negative_vector_fma_3d(%a: vector<3x2x2xf32>) -> vector<3x2x2xf32>{
196196// CHECK-LABEL: func @negative_vector_fma_3d
197197// CHECK-NOT: vector.extract_strided_slice
198198// CHECK: %[[R0:.*]] = vector.fma %{{.+}} : vector<3x2x2xf32>
199- // CHECK: return
199+ // CHECK: return
200200
201201func.func @vector_multi_reduction (%v : vector <4 x6 xf32 >, %acc: vector <4 xf32 >) -> vector <4 xf32 > {
202202 %0 = vector.multi_reduction #vector.kind <add >, %v , %acc [1 ] : vector <4 x6 xf32 > to vector <4 xf32 >
@@ -311,3 +311,70 @@ func.func @vector_contract_batched(%lhs: vector<8x8x4xf32>, %rhs: vector<8x8x4xf
311311// BATCHED-COUNT-16: vector.contract
312312// BATCHED-NOT: vector.contract
313313// BATCHED: return
314+
315+
316+ func.func @vector_broadcast (%v: vector <4 xf32 >) -> vector <4 x4 xf32 > {
317+ %0 = vector.broadcast %v : vector <4 xf32 > to vector <4 x4 xf32 >
318+ return %0 : vector <4 x4 xf32 >
319+ }
320+
321+ // CHECK-LABEL: func @vector_broadcast
322+ // CHECK-SAME: [[arg0:%.+]]: vector<4xf32>
323+ // CHECK: [[c:%.+]] = arith.constant dense<0.000000e+00> : vector<4x4xf32>
324+ // CHECK: [[s0:%.+]] = vector.extract_strided_slice [[arg0]] {offsets = [0], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>
325+ // CHECK: [[b0:%.+]] = vector.broadcast [[s0]] : vector<2xf32> to vector<2x2xf32>
326+ // CHECK: [[r0:%.+]] = vector.insert_strided_slice [[b0]], [[c]] {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
327+ // CHECK: [[s1:%.+]] = vector.extract_strided_slice [[arg0]] {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>
328+ // CHECK: [[b1:%.+]] = vector.broadcast [[s1]] : vector<2xf32> to vector<2x2xf32>
329+ // CHECK: [[r1:%.+]] = vector.insert_strided_slice [[b1]], [[r0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
330+ // CHECK: [[s2:%.+]] = vector.extract_strided_slice [[arg0]] {offsets = [0], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>
331+ // CHECK: [[b2:%.+]] = vector.broadcast [[s2]] : vector<2xf32> to vector<2x2xf32>
332+ // CHECK: [[r2:%.+]] = vector.insert_strided_slice [[b2]], [[r1]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
333+ // CHECK: [[s3:%.+]] = vector.extract_strided_slice [[arg0]] {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>
334+ // CHECK: [[b3:%.+]] = vector.broadcast [[s3]] : vector<2xf32> to vector<2x2xf32>
335+ // CHECK: [[r3:%.+]] = vector.insert_strided_slice [[b3]], [[r2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
336+ // CHECK: return [[r3]] : vector<4x4xf32>
337+
338+ func.func @vector_broadcast_with_leading_unit_dim (%v: vector <1 x4 xf32 >) -> vector <4 x4 xf32 > {
339+ %0 = vector.broadcast %v : vector <1 x4 xf32 > to vector <4 x4 xf32 >
340+ return %0 : vector <4 x4 xf32 >
341+ }
342+
343+ // CHECK-LABEL: func.func @vector_broadcast_with_leading_unit_dim
344+ // CHECK-SAME: ([[arg0:%.+]]: vector<1x4xf32>) -> vector<4x4xf32> {
345+ // CHECK: [[c:%.+]] = arith.constant dense<0.000000e+00> : vector<4x4xf32>
346+ // CHECK: [[s0:%.+]] = vector.extract_strided_slice [[arg0]] {offsets = [0, 0], sizes = [1, 2], strides = [1, 1]} : vector<1x4xf32> to vector<1x2xf32>
347+ // CHECK: [[b0:%.+]] = vector.broadcast [[s0]] : vector<1x2xf32> to vector<2x2xf32>
348+ // CHECK: [[r0:%.+]] = vector.insert_strided_slice [[b0]], [[c]] {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
349+ // CHECK: [[s1:%.+]] = vector.extract_strided_slice [[arg0]] {offsets = [0, 2], sizes = [1, 2], strides = [1, 1]} : vector<1x4xf32> to vector<1x2xf32>
350+ // CHECK: [[b1:%.+]] = vector.broadcast [[s1]] : vector<1x2xf32> to vector<2x2xf32>
351+ // CHECK: [[r1:%.+]] = vector.insert_strided_slice [[b1]], [[r0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
352+ // CHECK: [[s2:%.+]] = vector.extract_strided_slice [[arg0]] {offsets = [0, 0], sizes = [1, 2], strides = [1, 1]} : vector<1x4xf32> to vector<1x2xf32>
353+ // CHECK: [[b2:%.+]] = vector.broadcast [[s2]] : vector<1x2xf32> to vector<2x2xf32>
354+ // CHECK: [[r2:%.+]] = vector.insert_strided_slice [[b2]], [[r1]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
355+ // CHECK: [[s3:%.+]] = vector.extract_strided_slice [[arg0]] {offsets = [0, 2], sizes = [1, 2], strides = [1, 1]} : vector<1x4xf32> to vector<1x2xf32>
356+ // CHECK: [[b3:%.+]] = vector.broadcast [[s3]] : vector<1x2xf32> to vector<2x2xf32>
357+ // CHECK: [[r3:%.+]] = vector.insert_strided_slice [[b3]], [[r2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
358+ // CHECK: return [[r3]] : vector<4x4xf32>
359+
360+ func.func @vector_broadcast_with_tailing_unit_dim (%v: vector <4 x1 xf32 >) -> vector <4 x4 xf32 > {
361+ %0 = vector.broadcast %v : vector <4 x1 xf32 > to vector <4 x4 xf32 >
362+ return %0 : vector <4 x4 xf32 >
363+ }
364+
365+ // CHECK-LABEL: func.func @vector_broadcast_with_tailing_unit_dim
366+ // CHECK-SAME: ([[arg0:%.+]]: vector<4x1xf32>) -> vector<4x4xf32> {
367+ // CHECK: [[c:%.+]] = arith.constant dense<0.000000e+00> : vector<4x4xf32>
368+ // CHECK: [[s0:%.+]] = vector.extract_strided_slice [[arg0]] {offsets = [0, 0], sizes = [2, 1], strides = [1, 1]} : vector<4x1xf32> to vector<2x1xf32>
369+ // CHECK: [[b0:%.+]] = vector.broadcast [[s0]] : vector<2x1xf32> to vector<2x2xf32>
370+ // CHECK: [[r0:%.+]] = vector.insert_strided_slice [[b0]], [[c]] {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
371+ // CHECK: [[s1:%.+]] = vector.extract_strided_slice [[arg0]] {offsets = [0, 0], sizes = [2, 1], strides = [1, 1]} : vector<4x1xf32> to vector<2x1xf32>
372+ // CHECK: [[b1:%.+]] = vector.broadcast [[s1]] : vector<2x1xf32> to vector<2x2xf32>
373+ // CHECK: [[r1:%.+]] = vector.insert_strided_slice [[b1]], [[r0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
374+ // CHECK: [[s2:%.+]] = vector.extract_strided_slice [[arg0]] {offsets = [2, 0], sizes = [2, 1], strides = [1, 1]} : vector<4x1xf32> to vector<2x1xf32>
375+ // CHECK: [[b2:%.+]] = vector.broadcast [[s2]] : vector<2x1xf32> to vector<2x2xf32>
376+ // CHECK: [[r2:%.+]] = vector.insert_strided_slice [[b2]], [[r1]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
377+ // CHECK: [[s3:%.+]] = vector.extract_strided_slice [[arg0]] {offsets = [2, 0], sizes = [2, 1], strides = [1, 1]} : vector<4x1xf32> to vector<2x1xf32>
378+ // CHECK: [[b3:%.+]] = vector.broadcast [[s3]] : vector<2x1xf32> to vector<2x2xf32>
379+ // CHECK: [[r3:%.+]] = vector.insert_strided_slice [[b3]], [[r2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
380+ // CHECK: return [[r3]] : vector<4x4xf32>
0 commit comments