Skip to content

Commit 15b665b

Browse files
authored
[Headers][X86] Add constexpr support for some AVX512 int to f16 intrinsics. (#159231)
Added constexpr to the remaining intrinsics: _mm256_cvtepu16_ph _mm256_mask_cvtepu16_ph _mm256_maskz_cvtepu16_ph _mm256_cvtepi32_ph _mm256_mask_cvtepi32_ph _mm256_maskz_cvtepi32_ph _mm256_cvtepu32_ph _mm256_mask_cvtepu32_ph _mm256_maskz_cvtepu32_ph Last part fixing #155798
1 parent 3defab3 commit 15b665b

File tree

2 files changed

+27
-9
lines changed

2 files changed

+27
-9
lines changed

clang/lib/Headers/avx512vlfp16intrin.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -931,18 +931,18 @@ _mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
931931
(__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
932932
}
933933

934-
static __inline__ __m256h __DEFAULT_FN_ATTRS256
934+
static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
935935
_mm256_cvtepu16_ph(__m256i __A) {
936936
return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
937937
}
938938

939-
static __inline__ __m256h __DEFAULT_FN_ATTRS256
939+
static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
940940
_mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
941941
return (__m256h)__builtin_ia32_selectph_256(
942942
(__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
943943
}
944944

945-
static __inline__ __m256h __DEFAULT_FN_ATTRS256
945+
static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
946946
_mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) {
947947
return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
948948
(__v16hf)_mm256_cvtepu16_ph(__A),
@@ -1036,18 +1036,18 @@ _mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) {
10361036
(__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
10371037
}
10381038

1039-
static __inline__ __m128h __DEFAULT_FN_ATTRS256
1039+
static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
10401040
_mm256_cvtepi32_ph(__m256i __A) {
10411041
return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
10421042
}
10431043

1044-
static __inline__ __m128h __DEFAULT_FN_ATTRS256
1044+
static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
10451045
_mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
10461046
return (__m128h)__builtin_ia32_selectph_128(
10471047
(__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
10481048
}
10491049

1050-
static __inline__ __m128h __DEFAULT_FN_ATTRS256
1050+
static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
10511051
_mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) {
10521052
return (__m128h)__builtin_ia32_selectph_128(
10531053
(__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
@@ -1070,18 +1070,18 @@ _mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) {
10701070
(__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
10711071
}
10721072

1073-
static __inline__ __m128h __DEFAULT_FN_ATTRS256
1073+
static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
10741074
_mm256_cvtepu32_ph(__m256i __A) {
10751075
return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
10761076
}
10771077

1078-
static __inline__ __m128h __DEFAULT_FN_ATTRS256
1078+
static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
10791079
_mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
10801080
return (__m128h)__builtin_ia32_selectph_128(
10811081
(__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
10821082
}
10831083

1084-
static __inline__ __m128h __DEFAULT_FN_ATTRS256
1084+
static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
10851085
_mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) {
10861086
return (__m128h)__builtin_ia32_selectph_128(
10871087
(__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());

clang/test/CodeGen/X86/avx512vlfp16-builtins.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1947,18 +1947,24 @@ __m256h test_mm256_cvtepu16_ph(__m256i A) {
19471947
return _mm256_cvtepu16_ph(A);
19481948
}
19491949

1950+
TEST_CONSTEXPR(match_m256h(_mm256_cvtepu16_ph((__m256i)(__v16hu){1, 1, 2, 2, 4, 4, 8, 8, 16, 16, 32, 32, 64, 64, 128, 128}),1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 8.0, 8.0, 16.0, 16.0, 32.0, 32.0, 64.0, 64.0, 128.0, 128.0));
1951+
19501952
__m256h test_mm256_mask_cvtepu16_ph(__m256h A, __mmask16 B, __m256i C) {
19511953
// CHECK-LABEL: test_mm256_mask_cvtepu16_ph
19521954
// CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half>
19531955
return _mm256_mask_cvtepu16_ph(A, B, C);
19541956
}
19551957

1958+
TEST_CONSTEXPR(match_m256h(_mm256_mask_cvtepu16_ph(_mm256_set1_ph(-999.0),/*1100 1001 1110 1000=*/0xc9e8,(__m256i)(__v16hu){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), -999.0, -999.0, -999.0, 4.0, -999.0, 6.0, 7.0, 8.0, 9.0, -999.0, -999.0, 12.0, -999.0, -999.0, 15.0, 16.0));
1959+
19561960
__m256h test_mm256_maskz_cvtepu16_ph(__mmask16 A, __m256i B) {
19571961
// CHECK-LABEL: test_mm256_maskz_cvtepu16_ph
19581962
// CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half>
19591963
return _mm256_maskz_cvtepu16_ph(A, B);
19601964
}
19611965

1966+
TEST_CONSTEXPR(match_m256h(_mm256_maskz_cvtepu16_ph(/*1100 1001 1110 1000=*/0xc9e8,(__m256i)(__v16hu){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 0.0, 0.0, 0.0, 4.0, 0.0, 6.0, 7.0, 8.0, 9.0, 0.0, 0.0, 12.0, 0.0, 0.0, 15.0, 16.0));
1967+
19621968
__m128i test_mm_cvtph_epi32(__m128h A) {
19631969
// CHECK-LABEL: test_mm_cvtph_epi32
19641970
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.128
@@ -2055,18 +2061,24 @@ __m128h test_mm256_cvtepi32_ph(__m256i A) {
20552061
return _mm256_cvtepi32_ph(A);
20562062
}
20572063

2064+
TEST_CONSTEXPR(match_m128h(_mm256_cvtepi32_ph((__m256i)(__v8si){-1, -1, 2, 2, -4, -4, 6, 6}), -1.0, -1.0, 2.0, 2.0, -4.0, -4.0, 6.0, 6.0));
2065+
20582066
__m128h test_mm256_mask_cvtepi32_ph(__m128h A, __mmask8 B, __m256i C) {
20592067
// CHECK-LABEL: test_mm256_mask_cvtepi32_ph
20602068
// CHECK: %{{.*}} = sitofp <8 x i32> %{{.*}} to <8 x half>
20612069
return _mm256_mask_cvtepi32_ph(A, B, C);
20622070
}
20632071

2072+
TEST_CONSTEXPR(match_m128h(_mm256_mask_cvtepi32_ph(_mm_set1_ph(-999.0),/*1001 0011=*/0x93,(__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 1.0, 2.0, -999.0, -999.0, 5.0, -999.0, -999.0, 8.0));
2073+
20642074
__m128h test_mm256_maskz_cvtepi32_ph(__mmask8 A, __m256i B) {
20652075
// CHECK-LABEL: test_mm256_maskz_cvtepi32_ph
20662076
// CHECK: %{{.*}} = sitofp <8 x i32> %{{.*}} to <8 x half>
20672077
return _mm256_maskz_cvtepi32_ph(A, B);
20682078
}
20692079

2080+
TEST_CONSTEXPR(match_m128h(_mm256_maskz_cvtepi32_ph(/*1001 0011=*/0x93, (__m256i)(__v8si){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 8.0));
2081+
20702082
__m128h test_mm_cvtepu32_ph(__m128i A) {
20712083
// CHECK-LABEL: test_mm_cvtepu32_ph
20722084
// CHECK: @llvm.x86.avx512fp16.mask.vcvtudq2ph.128
@@ -2091,18 +2103,24 @@ __m128h test_mm256_cvtepu32_ph(__m256i A) {
20912103
return _mm256_cvtepu32_ph(A);
20922104
}
20932105

2106+
TEST_CONSTEXPR(match_m128h( _mm256_cvtepu32_ph((__m256i)(__v8su){1, 2, 3, 4, 5, 6, 7, 8}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0));
2107+
20942108
__m128h test_mm256_mask_cvtepu32_ph(__m128h A, __mmask8 B, __m256i C) {
20952109
// CHECK-LABEL: test_mm256_mask_cvtepu32_ph
20962110
// CHECK: %{{.*}} = uitofp <8 x i32> %{{.*}} to <8 x half>
20972111
return _mm256_mask_cvtepu32_ph(A, B, C);
20982112
}
20992113

2114+
TEST_CONSTEXPR(match_m128h( _mm256_mask_cvtepu32_ph(_mm_set1_ph(-999.0),/*1001 0011=*/0x93,(__m256i)(__v8su){1, 2, 3, 4, 5, 6, 7, 8}), 1.0, 2.0, -999.0, -999.0, 5.0, -999.0, -999.0, 8.0));
2115+
21002116
__m128h test_mm256_maskz_cvtepu32_ph(__mmask8 A, __m256i B) {
21012117
// CHECK-LABEL: test_mm256_maskz_cvtepu32_ph
21022118
// CHECK: %{{.*}} = uitofp <8 x i32> %{{.*}} to <8 x half>
21032119
return _mm256_maskz_cvtepu32_ph(A, B);
21042120
}
21052121

2122+
TEST_CONSTEXPR(match_m128h(_mm256_maskz_cvtepu32_ph(/*1001 0011=*/0x93, (__m256i)(__v8su){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 8.0));
2123+
21062124
__m128i test_mm_cvttph_epi32(__m128h A) {
21072125
// CHECK-LABEL: test_mm_cvttph_epi32
21082126
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.128

0 commit comments

Comments
 (0)