|
| 1 | +;=========================== begin_copyright_notice ============================ |
| 2 | +; |
| 3 | +; Copyright (C) 2022 Intel Corporation |
| 4 | +; |
| 5 | +; SPDX-License-Identifier: MIT |
| 6 | +; |
| 7 | +;============================ end_copyright_notice ============================= |
| 8 | +; |
| 9 | +; RUN: igc_opt --igc-scalarize -S < %s | FileCheck %s |
| 10 | +; ------------------------------------------------ |
| 11 | +; ScalarizeFunction |
| 12 | +; ------------------------------------------------ |
| 13 | + |
| 14 | +define spir_kernel void @test_unary(<2 x float> %src1) { |
| 15 | +; CHECK-LABEL: @test_unary( |
| 16 | +; CHECK: [[SRC1_SCALAR:%.*]] = extractelement <2 x float> [[SRC1:%.*]], i32 0 |
| 17 | +; CHECK: [[SRC1_SCALAR1:%.*]] = extractelement <2 x float> [[SRC1]], i32 1 |
| 18 | +; CHECK: [[TMP1:%.*]] = alloca <2 x float>, align 4 |
| 19 | +; CHECK: [[TMP2:%.*]] = fneg float [[SRC1_SCALAR]] |
| 20 | +; CHECK: [[TMP3:%.*]] = fneg float [[SRC1_SCALAR1]] |
| 21 | +; CHECK: [[ASSEMBLED_VECT:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0 |
| 22 | +; CHECK: [[ASSEMBLED_VECT2:%.*]] = insertelement <2 x float> [[ASSEMBLED_VECT]], float [[TMP3]], i32 1 |
| 23 | +; CHECK: store <2 x float> [[ASSEMBLED_VECT2]], <2 x float>* [[TMP1]], align 8 |
| 24 | +; CHECK: ret void |
| 25 | +; |
| 26 | + %1 = alloca <2 x float>, align 4 |
| 27 | + %2 = fneg <2 x float> %src1 |
| 28 | + store <2 x float> %2, <2 x float>* %1, align 8 |
| 29 | + ret void |
| 30 | +} |
| 31 | + |
| 32 | +define spir_kernel void @test_binary(<2 x i32> %src1, <2 x i32> %src2) { |
| 33 | +; CHECK-LABEL: @test_binary( |
| 34 | +; CHECK: [[SCALAR2:%.*]] = extractelement <2 x i32> [[SRC2:%.*]], i32 0 |
| 35 | +; CHECK: [[SCALAR3:%.*]] = extractelement <2 x i32> [[SRC2]], i32 1 |
| 36 | +; CHECK: [[SCALAR:%.*]] = extractelement <2 x i32> [[SRC1:%.*]], i32 0 |
| 37 | +; CHECK: [[SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1 |
| 38 | +; CHECK: [[TMP1:%.*]] = alloca <2 x i32>, align 4 |
| 39 | +; CHECK: [[TMP2:%.*]] = add i32 [[SCALAR]], [[SCALAR2]] |
| 40 | +; CHECK: [[TMP3:%.*]] = add i32 [[SCALAR1]], [[SCALAR3]] |
| 41 | +; CHECK: [[ASSEMBLED_VECT:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 |
| 42 | +; CHECK: [[ASSEMBLED_VECT4:%.*]] = insertelement <2 x i32> [[ASSEMBLED_VECT]], i32 [[TMP3]], i32 1 |
| 43 | +; CHECK: store <2 x i32> [[ASSEMBLED_VECT4]], <2 x i32>* [[TMP1]], align 8 |
| 44 | +; CHECK: ret void |
| 45 | +; |
| 46 | + %1 = alloca <2 x i32>, align 4 |
| 47 | + %2 = add <2 x i32> %src1, %src2 |
| 48 | + store <2 x i32> %2, <2 x i32>* %1, align 8 |
| 49 | + ret void |
| 50 | +} |
| 51 | + |
| 52 | +define spir_kernel void @test_cast(<2 x i32> %src1) { |
| 53 | +; CHECK-LABEL: @test_cast( |
| 54 | +; CHECK: [[SCALAR:%.*]] = extractelement <2 x i32> [[SRC1:%.*]], i32 0 |
| 55 | +; CHECK: [[SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1 |
| 56 | +; CHECK: [[TMP1:%.*]] = alloca <2 x i64>, align 4 |
| 57 | +; CHECK: [[TMP2:%.*]] = alloca <4 x i16>, align 4 |
| 58 | +; CHECK: [[TMP3:%.*]] = sext i32 [[SCALAR]] to i64 |
| 59 | +; CHECK: [[TMP4:%.*]] = sext i32 [[SCALAR1]] to i64 |
| 60 | +; CHECK: [[ASSEMBLED_VECT:%.*]] = insertelement <2 x i64> undef, i64 [[TMP3]], i32 0 |
| 61 | +; CHECK: [[ASSEMBLED_VECT2:%.*]] = insertelement <2 x i64> [[ASSEMBLED_VECT]], i64 [[TMP4]], i32 1 |
| 62 | +; CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[SRC1]] to <4 x i16> |
| 63 | +; CHECK: store <2 x i64> [[ASSEMBLED_VECT2]], <2 x i64>* [[TMP1]], align 16 |
| 64 | +; CHECK: store <4 x i16> [[TMP5]], <4 x i16>* [[TMP2]], align 8 |
| 65 | +; CHECK: ret void |
| 66 | +; |
| 67 | + %1 = alloca <2 x i64>, align 4 |
| 68 | + %2 = alloca <4 x i16>, align 4 |
| 69 | + %3 = sext <2 x i32> %src1 to <2 x i64> |
| 70 | + %4 = bitcast <2 x i32> %src1 to <4 x i16> |
| 71 | + store <2 x i64> %3, <2 x i64>* %1, align 16 |
| 72 | + store <4 x i16> %4, <4 x i16>* %2, align 8 |
| 73 | + ret void |
| 74 | +} |
| 75 | + |
| 76 | +define spir_kernel void @test_cmp(<2 x i32> %src1, <2 x i32> %src2) { |
| 77 | +; CHECK-LABEL: @test_cmp( |
| 78 | +; CHECK: [[SCALAR2:%.*]] = extractelement <2 x i32> [[SRC2:%.*]], i32 0 |
| 79 | +; CHECK: [[SCALAR3:%.*]] = extractelement <2 x i32> [[SRC2]], i32 1 |
| 80 | +; CHECK: [[SCALAR:%.*]] = extractelement <2 x i32> [[SRC1:%.*]], i32 0 |
| 81 | +; CHECK: [[SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1 |
| 82 | +; CHECK: [[TMP1:%.*]] = alloca <2 x i1>, align 4 |
| 83 | +; CHECK: [[TMP2:%.*]] = icmp eq i32 [[SCALAR]], [[SCALAR2]] |
| 84 | +; CHECK: [[TMP3:%.*]] = icmp eq i32 [[SCALAR1]], [[SCALAR3]] |
| 85 | +; CHECK: [[ASSEMBLED_VECT:%.*]] = insertelement <2 x i1> undef, i1 [[TMP2]], i32 0 |
| 86 | +; CHECK: [[ASSEMBLED_VECT4:%.*]] = insertelement <2 x i1> [[ASSEMBLED_VECT]], i1 [[TMP3]], i32 1 |
| 87 | +; CHECK: store <2 x i1> [[ASSEMBLED_VECT4]], <2 x i1>* [[TMP1]], align 1 |
| 88 | +; CHECK: ret void |
| 89 | +; |
| 90 | + %1 = alloca <2 x i1>, align 4 |
| 91 | + %2 = icmp eq <2 x i32> %src1, %src2 |
| 92 | + store <2 x i1> %2, <2 x i1>* %1, align 1 |
| 93 | + ret void |
| 94 | +} |
| 95 | + |
| 96 | +define spir_kernel void @test_select(<2 x i32> %src1, <4 x i16> %src2, i1 %cond, <4 x i1> %vcond) { |
| 97 | +; CHECK-LABEL: @test_select( |
| 98 | +; CHECK: [[SCALAR6:%.*]] = extractelement <4 x i1> [[VCOND:%.*]], i32 0 |
| 99 | +; CHECK: [[SCALAR7:%.*]] = extractelement <4 x i1> [[VCOND]], i32 1 |
| 100 | +; CHECK: [[SCALAR8:%.*]] = extractelement <4 x i1> [[VCOND]], i32 2 |
| 101 | +; CHECK: [[SCALAR9:%.*]] = extractelement <4 x i1> [[VCOND]], i32 3 |
| 102 | +; CHECK: [[SCALAR2:%.*]] = extractelement <4 x i16> [[SRC2:%.*]], i32 0 |
| 103 | +; CHECK: [[SCALAR3:%.*]] = extractelement <4 x i16> [[SRC2]], i32 1 |
| 104 | +; CHECK: [[SCALAR4:%.*]] = extractelement <4 x i16> [[SRC2]], i32 2 |
| 105 | +; CHECK: [[SCALAR5:%.*]] = extractelement <4 x i16> [[SRC2]], i32 3 |
| 106 | +; CHECK: [[SCALAR:%.*]] = extractelement <2 x i32> [[SRC1:%.*]], i32 0 |
| 107 | +; CHECK: [[SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1 |
| 108 | +; CHECK: [[TMP1:%.*]] = alloca <2 x i32>, align 4 |
| 109 | +; CHECK: [[TMP2:%.*]] = alloca <4 x i16>, align 4 |
| 110 | +; CHECK: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[SCALAR]], i32 42 |
| 111 | +; CHECK: [[TMP4:%.*]] = select i1 [[COND]], i32 [[SCALAR1]], i32 13 |
| 112 | +; CHECK: [[ASSEMBLED_VECT:%.*]] = insertelement <2 x i32> undef, i32 [[TMP3]], i32 0 |
| 113 | +; CHECK: [[ASSEMBLED_VECT10:%.*]] = insertelement <2 x i32> [[ASSEMBLED_VECT]], i32 [[TMP4]], i32 1 |
| 114 | +; CHECK: [[TMP5:%.*]] = select i1 [[SCALAR6]], i16 [[SCALAR2]], i16 1 |
| 115 | +; CHECK: [[TMP6:%.*]] = select i1 [[SCALAR7]], i16 [[SCALAR3]], i16 2 |
| 116 | +; CHECK: [[TMP7:%.*]] = select i1 [[SCALAR8]], i16 [[SCALAR4]], i16 3 |
| 117 | +; CHECK: [[TMP8:%.*]] = select i1 [[SCALAR9]], i16 [[SCALAR5]], i16 4 |
| 118 | +; CHECK: [[ASSEMBLED_VECT11:%.*]] = insertelement <4 x i16> undef, i16 [[TMP5]], i32 0 |
| 119 | +; CHECK: [[ASSEMBLED_VECT12:%.*]] = insertelement <4 x i16> [[ASSEMBLED_VECT11]], i16 [[TMP6]], i32 1 |
| 120 | +; CHECK: [[ASSEMBLED_VECT13:%.*]] = insertelement <4 x i16> [[ASSEMBLED_VECT12]], i16 [[TMP7]], i32 2 |
| 121 | +; CHECK: [[ASSEMBLED_VECT14:%.*]] = insertelement <4 x i16> [[ASSEMBLED_VECT13]], i16 [[TMP8]], i32 3 |
| 122 | +; CHECK: [[ASSEMBLED_VECT15:%.*]] = insertelement <4 x i16> undef, i16 [[SCALAR2]], i32 0 |
| 123 | +; CHECK: [[ASSEMBLED_VECT16:%.*]] = insertelement <4 x i16> [[ASSEMBLED_VECT15]], i16 [[SCALAR3]], i32 1 |
| 124 | +; CHECK: [[ASSEMBLED_VECT17:%.*]] = insertelement <4 x i16> [[ASSEMBLED_VECT16]], i16 [[SCALAR4]], i32 2 |
| 125 | +; CHECK: [[ASSEMBLED_VECT18:%.*]] = insertelement <4 x i16> [[ASSEMBLED_VECT17]], i16 [[SCALAR5]], i32 3 |
| 126 | +; CHECK: store <2 x i32> [[ASSEMBLED_VECT10]], <2 x i32>* [[TMP1]], align 8 |
| 127 | +; CHECK: store <4 x i16> [[ASSEMBLED_VECT14]], <4 x i16>* [[TMP2]], align 8 |
| 128 | +; CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[ASSEMBLED_VECT18]] to i64 |
| 129 | +; CHECK: ret void |
| 130 | +; |
| 131 | + %1 = alloca <2 x i32>, align 4 |
| 132 | + %2 = alloca <4 x i16>, align 4 |
| 133 | + %3 = select i1 %cond, <2 x i32> %src1, <2 x i32> <i32 42, i32 13> |
| 134 | + %4 = select <4 x i1> %vcond, <4 x i16> %src2, <4 x i16> <i16 1, i16 2, i16 3, i16 4> |
| 135 | + %5 = select i1 %cond, <4 x i16> %src2, <4 x i16> %src2 |
| 136 | + store <2 x i32> %3, <2 x i32>* %1, align 8 |
| 137 | + store <4 x i16> %4, <4 x i16>* %2, align 8 |
| 138 | + %6 = bitcast <4 x i16> %5 to i64 |
| 139 | + ret void |
| 140 | +} |
0 commit comments