Skip to content

[Headers][X86] Allow AVX512 reduction intrinsics to be used in constexpr #152363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 20 additions & 16 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -9337,19 +9337,23 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
* This takes log2(n) steps where n is the number of elements in the vector.
*/

static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_add_epi64(__m512i __W) {
return __builtin_reduce_add((__v8di)__W);
}

static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_mul_epi64(__m512i __W) {
return __builtin_reduce_mul((__v8di)__W);
}

static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_and_epi64(__m512i __W) {
return __builtin_reduce_and((__v8di)__W);
}

static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_or_epi64(__m512i __W) {
return __builtin_reduce_or((__v8di)__W);
}

Expand Down Expand Up @@ -9400,22 +9404,22 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
}

static __inline__ int __DEFAULT_FN_ATTRS512
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_add_epi32(__m512i __W) {
return __builtin_reduce_add((__v16si)__W);
}

static __inline__ int __DEFAULT_FN_ATTRS512
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_mul_epi32(__m512i __W) {
return __builtin_reduce_mul((__v16si)__W);
}

static __inline__ int __DEFAULT_FN_ATTRS512
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_and_epi32(__m512i __W) {
return __builtin_reduce_and((__v16si)__W);
}

static __inline__ int __DEFAULT_FN_ATTRS512
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_or_epi32(__m512i __W) {
return __builtin_reduce_or((__v16si)__W);
}
Expand Down Expand Up @@ -9466,22 +9470,22 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
}

static __inline__ long long __DEFAULT_FN_ATTRS512
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_max_epi64(__m512i __V) {
return __builtin_reduce_max((__v8di)__V);
}

static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_max_epu64(__m512i __V) {
return __builtin_reduce_max((__v8du)__V);
}

static __inline__ long long __DEFAULT_FN_ATTRS512
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_min_epi64(__m512i __V) {
return __builtin_reduce_min((__v8di)__V);
}

static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_min_epu64(__m512i __V) {
return __builtin_reduce_min((__v8du)__V);
}
Expand Down Expand Up @@ -9509,22 +9513,22 @@ _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
__V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
return __builtin_reduce_min((__v8du)__V);
}
static __inline__ int __DEFAULT_FN_ATTRS512
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_max_epi32(__m512i __V) {
return __builtin_reduce_max((__v16si)__V);
}

static __inline__ unsigned int __DEFAULT_FN_ATTRS512
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_max_epu32(__m512i __V) {
return __builtin_reduce_max((__v16su)__V);
}

static __inline__ int __DEFAULT_FN_ATTRS512
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_min_epi32(__m512i __V) {
return __builtin_reduce_min((__v16si)__V);
}

static __inline__ unsigned int __DEFAULT_FN_ATTRS512
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_reduce_min_epu32(__m512i __V) {
return __builtin_reduce_min((__v16su)__V);
}
Expand Down
9 changes: 9 additions & 0 deletions clang/test/CodeGen/X86/avx512-reduceIntrin.c
Original file line number Diff line number Diff line change
@@ -1,30 +1,35 @@
// RUN: %clang_cc1 -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s

#include <immintrin.h>
#include "builtin_test_helpers.h"

long long test_mm512_reduce_add_epi64(__m512i __W){
// CHECK-LABEL: @test_mm512_reduce_add_epi64(
// CHECK: call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %{{.*}})
return _mm512_reduce_add_epi64(__W);
}
TEST_CONSTEXPR(_mm512_reduce_add_epi64((__m512i)(__v8di){-4, -3, -2, -1, 0, 1, 2, 3}) == -4);

long long test_mm512_reduce_mul_epi64(__m512i __W){
// CHECK-LABEL: @test_mm512_reduce_mul_epi64(
// CHECK: call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %{{.*}})
return _mm512_reduce_mul_epi64(__W);
}
TEST_CONSTEXPR(_mm512_reduce_mul_epi64((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}) == 40320);

long long test_mm512_reduce_or_epi64(__m512i __W){
// CHECK-LABEL: @test_mm512_reduce_or_epi64(
// CHECK: call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %{{.*}})
return _mm512_reduce_or_epi64(__W);
}
TEST_CONSTEXPR(_mm512_reduce_or_epi64((__m512i)(__v8di){0x100, 0x200, 0x400, 0x800, 0, 0, 0, 0}) == 0xF00);

long long test_mm512_reduce_and_epi64(__m512i __W){
// CHECK-LABEL: @test_mm512_reduce_and_epi64(
// CHECK: call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %{{.*}})
return _mm512_reduce_and_epi64(__W);
}
TEST_CONSTEXPR(_mm512_reduce_and_epi64((__m512i)(__v8di){0xFFFF, 0xFF00, 0x00FF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFF00, 0x00FF}) == 0x0000);

long long test_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W){
// CHECK-LABEL: @test_mm512_mask_reduce_add_epi64(
Expand Down Expand Up @@ -59,23 +64,27 @@ int test_mm512_reduce_add_epi32(__m512i __W){
// CHECK: call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %{{.*}})
return _mm512_reduce_add_epi32(__W);
}
TEST_CONSTEXPR(_mm512_reduce_add_epi32((__m512i)(__v16si){-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}) == -8);

int test_mm512_reduce_mul_epi32(__m512i __W){
// CHECK-LABEL: @test_mm512_reduce_mul_epi32(
// CHECK: call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %{{.*}})
return _mm512_reduce_mul_epi32(__W);
}
TEST_CONSTEXPR(_mm512_reduce_mul_epi32((__m512i)(__v16si){1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 1, 1, -3, 1, 1}) == -36);

int test_mm512_reduce_or_epi32(__m512i __W){
// CHECK: call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %{{.*}})
return _mm512_reduce_or_epi32(__W);
}
TEST_CONSTEXPR(_mm512_reduce_or_epi32((__m512i)(__v16si){0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0, 0, 0, 0, 0, 0, 0, 0}) == 0xFF);

int test_mm512_reduce_and_epi32(__m512i __W){
// CHECK-LABEL: @test_mm512_reduce_and_epi32(
// CHECK: call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %{{.*}})
return _mm512_reduce_and_epi32(__W);
}
TEST_CONSTEXPR(_mm512_reduce_and_epi32((__m512i)(__v16si){0xFF, 0xF0, 0x0F, 0xFF, 0xFF, 0xFF, 0xF0, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xF0, 0xF0, 0x0F, 0x0F}) == 0x00);

int test_mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W){
// CHECK-LABEL: @test_mm512_mask_reduce_add_epi32(
Expand Down
9 changes: 9 additions & 0 deletions clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
// RUN: %clang_cc1 -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s

#include <immintrin.h>
#include "builtin_test_helpers.h"

long long test_mm512_reduce_max_epi64(__m512i __W){
// CHECK-LABEL: @test_mm512_reduce_max_epi64(
// CHECK: call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})
return _mm512_reduce_max_epi64(__W);
}
TEST_CONSTEXPR(_mm512_reduce_max_epi64((__m512i)(__v8di){-4, -3, -2, -1, 0, 1, 2, 3}) == 3);

unsigned long long test_mm512_reduce_max_epu64(__m512i __W){
// CHECK-LABEL: @test_mm512_reduce_max_epu64(
// CHECK: call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})
return _mm512_reduce_max_epu64(__W);
}
TEST_CONSTEXPR(_mm512_reduce_max_epu64((__m512i)(__v8du){0, 1, 2, 3, 4, 5, 6, 7}) == 7);

double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){
// CHECK-LABEL: @test_mm512_reduce_max_pd(
Expand All @@ -27,12 +30,14 @@ long long test_mm512_reduce_min_epi64(__m512i __W){
// CHECK: call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})
return _mm512_reduce_min_epi64(__W);
}
TEST_CONSTEXPR(_mm512_reduce_min_epi64((__m512i)(__v8di){-4, -3, -2, -1, 0, 1, 2, 3}) == -4);

unsigned long long test_mm512_reduce_min_epu64(__m512i __W){
// CHECK-LABEL: @test_mm512_reduce_min_epu64(
// CHECK: call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})
return _mm512_reduce_min_epu64(__W);
}
TEST_CONSTEXPR(_mm512_reduce_min_epu64((__m512i)(__v8du){0, 1, 2, 3, 4, 5, 6, 7}) == 0);

double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){
// CHECK-LABEL: @test_mm512_reduce_min_pd(
Expand Down Expand Up @@ -89,12 +94,14 @@ int test_mm512_reduce_max_epi32(__m512i __W){
// CHECK: call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})
return _mm512_reduce_max_epi32(__W);
}
TEST_CONSTEXPR(_mm512_reduce_max_epi32((__m512i)(__v16si){-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}) == 7);

unsigned int test_mm512_reduce_max_epu32(__m512i __W){
// CHECK-LABEL: @test_mm512_reduce_max_epu32(
// CHECK: call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})
return _mm512_reduce_max_epu32(__W);
}
TEST_CONSTEXPR(_mm512_reduce_max_epu32((__m512i)(__v16su){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}) == 15);

float test_mm512_reduce_max_ps(__m512 __W){
// CHECK-LABEL: @test_mm512_reduce_max_ps(
Expand All @@ -107,12 +114,14 @@ int test_mm512_reduce_min_epi32(__m512i __W){
// CHECK: call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})
return _mm512_reduce_min_epi32(__W);
}
TEST_CONSTEXPR(_mm512_reduce_min_epi32((__m512i)(__v16si){-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}) == -8);

unsigned int test_mm512_reduce_min_epu32(__m512i __W){
// CHECK-LABEL: @test_mm512_reduce_min_epu32(
// CHECK: call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})
return _mm512_reduce_min_epu32(__W);
}
TEST_CONSTEXPR(_mm512_reduce_min_epu32((__m512i)(__v16su){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}) == 0);

float test_mm512_reduce_min_ps(__m512 __W){
// CHECK-LABEL: @test_mm512_reduce_min_ps(
Expand Down