|
| 1 | +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir |
| 2 | +// RUN: FileCheck --input-file=%t.cir %s |
| 3 | + |
| 4 | +// Test that __builtin_ia32_pshufd and __builtin_ia32_vpermilp generates correct CIR vec.shuffle operations |
| 5 | +// This verifies the fix for SIMD intrinsic support that was previously NYI |
| 6 | + |
| 7 | +typedef int __v4si __attribute__((__vector_size__(16))); |
| 8 | +typedef float __v4sf __attribute__((__vector_size__(16))); |
| 9 | +typedef double __v2df __attribute__((__vector_size__(16))); |
| 10 | +typedef float __v8sf __attribute__((__vector_size__(32))); |
| 11 | +typedef double __v4df __attribute__((__vector_size__(32))); |
| 12 | +typedef float __v16sf __attribute__((__vector_size__(64))); |
| 13 | +typedef double __v8df __attribute__((__vector_size__(64))); |
| 14 | + |
| 15 | +typedef __v4si __m128i; |
| 16 | +typedef __v4sf __m128; |
| 17 | +typedef __v2df __m128d; |
| 18 | +typedef __v8sf __m256; |
| 19 | +typedef __v4df __m256d; |
| 20 | +typedef __v16sf __m512; |
| 21 | +typedef __v8df __m512d; |
| 22 | + |
| 23 | +// CHECK-LABEL: @_Z11test_pshufdv |
| 24 | +void test_pshufd() { |
| 25 | + __m128i vec = {1, 2, 3, 4}; |
| 26 | + // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 4> |
| 27 | + __m128i result = __builtin_ia32_pshufd(vec, 0x4E); |
| 28 | +} |
| 29 | + |
| 30 | +// CHECK-LABEL: @_Z19test_different_maskv |
| 31 | +void test_different_mask() { |
| 32 | + __m128i vec = {10, 20, 30, 40}; |
| 33 | + // Test different immediate value: 0x1B = 00011011 = [3,2,1,0] reversed |
| 34 | + // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!s32i x 4> |
| 35 | + __m128i result = __builtin_ia32_pshufd(vec, 0x1B); |
| 36 | +} |
| 37 | + |
| 38 | +// CHECK-LABEL: @_Z9test_casev |
| 39 | +void test_case() { |
| 40 | + __m128i p0 = {1, 2, 3, 4}; |
| 41 | + |
| 42 | + // This reproduces the exact pattern from stb_image.h:2685 that was failing: |
| 43 | + // _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); |
| 44 | + // Which expands to: __builtin_ia32_pshufd(p0, 0x4e) |
| 45 | + // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 4> |
| 46 | + __m128i out_vec = __builtin_ia32_pshufd(p0, 0x4e); |
| 47 | +} |
| 48 | + |
| 49 | +// CHECK-LABEL: @_Z15test_vpermilps4v |
| 50 | +void test_vpermilps4() { |
| 51 | + __m128 vec = {1.0f, 2.0f, 3.0f, 4.0f}; |
| 52 | + // vpermilps with immediate 0x4E = 01001110 = [1,3,2,0] for 4 elements |
| 53 | + // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 4>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.float x 4> |
| 54 | + __m128 result = __builtin_ia32_vpermilps(vec, 0x4E); |
| 55 | +} |
| 56 | + |
| 57 | +// CHECK-LABEL: @_Z15test_vpermilpd2v |
| 58 | +void test_vpermilpd2() { |
| 59 | + __m128d vec = {1.0, 2.0}; |
| 60 | + // vpermilpd with immediate 0x1 = 01 = [1,0] for 2 elements |
| 61 | + // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 2>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!cir.double x 2> |
| 62 | + __m128d result = __builtin_ia32_vpermilpd(vec, 0x1); |
| 63 | +} |
| 64 | + |
| 65 | +// CHECK-LABEL: @_Z17test_vpermilps256v |
| 66 | +void test_vpermilps256() { |
| 67 | + __m256 vec = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; |
| 68 | + // vpermilps256 with immediate 0x1B = 00011011 = [3,2,1,0] for each 128-bit lane |
| 69 | + // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 8>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<7> : !s32i, #cir.int<6> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i] : !cir.vector<!cir.float x 8> |
| 70 | + __m256 result = __builtin_ia32_vpermilps256(vec, 0x1B); |
| 71 | +} |
| 72 | + |
| 73 | +// CHECK-LABEL: @_Z17test_vpermilpd256v |
| 74 | +void test_vpermilpd256() { |
| 75 | + __m256d vec = {1.0, 2.0, 3.0, 4.0}; |
| 76 | + // vpermilpd256 with immediate 0x5 = 0101 = [1,0,1,0] for 4 elements |
| 77 | + // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 4>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!cir.double x 4> |
| 78 | + __m256d result = __builtin_ia32_vpermilpd256(vec, 0x5); |
| 79 | +} |
| 80 | + |
| 81 | +// CHECK-LABEL: @_Z17test_vpermilps512v |
| 82 | +void test_vpermilps512() { |
| 83 | + __m512 vec = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, |
| 84 | + 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}; |
| 85 | + // vpermilps512 with immediate 0x4E = 01001110 = [1,3,2,0] for each 128-bit lane |
| 86 | + // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 16>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i] : !cir.vector<!cir.float x 16> |
| 87 | + __m512 result = __builtin_ia32_vpermilps512(vec, 0x4E); |
| 88 | +} |
| 89 | + |
| 90 | +// CHECK-LABEL: @_Z17test_vpermilpd512v |
| 91 | +void test_vpermilpd512() { |
| 92 | + __m512d vec = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; |
| 93 | + // vpermilpd512 with immediate 0x55 = 01010101 = [1,0,1,0,1,0,1,0] for 8 elements |
| 94 | + // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 8>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i, #cir.int<7> : !s32i, #cir.int<6> : !s32i] : !cir.vector<!cir.double x 8> |
| 95 | + __m512d result = __builtin_ia32_vpermilpd512(vec, 0x55); |
| 96 | +} |
| 97 | + |
| 98 | +// Test different immediate values |
| 99 | +// CHECK-LABEL: @_Z24test_vpermilps_differentv |
| 100 | +void test_vpermilps_different() { |
| 101 | + __m128 vec = {10.0f, 20.0f, 30.0f, 40.0f}; |
| 102 | + // Test different immediate value: 0x1B = 00011011 = [3,2,1,0] reversed |
| 103 | + // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 4>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!cir.float x 4> |
| 104 | + __m128 result = __builtin_ia32_vpermilps(vec, 0x1B); |
| 105 | +} |
| 106 | + |
| 107 | +// CHECK-LABEL: @_Z24test_vpermilpd_differentv |
| 108 | +void test_vpermilpd_different() { |
| 109 | + __m128d vec = {100.0, 200.0}; |
| 110 | + // Test immediate 0x0 = 00 = [0,0] - duplicate first element |
| 111 | + // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 2>) [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!cir.double x 2> |
| 112 | + __m128d result = __builtin_ia32_vpermilpd(vec, 0x0); |
| 113 | +} |
0 commit comments