-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[Clang][x86]: allow PCLMULQDQ intrinsics to be used in constexpr #169214
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 12 commits
60fac68
12b605b
2f80098
e3de251
5afa1b1
f5c5f23
0cee75e
974ddac
7a28323
5c7eb8e
3a72489
5c486a9
ec1331c
931bcc5
e433e33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,25 @@ | ||
| // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +pclmul -emit-llvm -o - | FileCheck %s | ||
|
|
||
| // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +pclmul -emit-llvm -o - -std=c++11 | FileCheck %s | ||
| // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +pclmul -emit-llvm -o - -std=c++11 -fexperimental-new-constant-interpreter | FileCheck %s | ||
|
|
||
| #include <wmmintrin.h> | ||
| #include "builtin_test_helpers.h" | ||
|
|
||
| __m128i test_mm_clmulepi64_si128(__m128i a, __m128i b) { | ||
| // CHECK: @llvm.x86.pclmulqdq | ||
| return _mm_clmulepi64_si128(a, b, 0); | ||
| } | ||
|
|
||
| // Test constexpr evaluation for _mm_clmulepi64_si128 | ||
| // imm8=0x00: lower 64 bits of both operands | ||
| // Test case: 0x1 * 0x3 = 0x3 (carry-less multiplication) | ||
| TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x1ULL, 0x0ULL}), ((__m128i){0x3ULL, 0x0ULL}), 0x00), 0x3ULL, 0x0ULL)); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we need tests showing results in the upper 64-bits as well: |
||
|
|
||
| // imm8=0x01: upper 64 bits of first operand, lower 64 bits of second | ||
| TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x0ULL, 0x1ULL}), ((__m128i){0x3ULL, 0x0ULL}), 0x01), 0x3ULL, 0x0ULL)); | ||
|
|
||
| // imm8=0x10: lower 64 bits of first operand, upper 64 bits of second | ||
| TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x1ULL, 0x0ULL}), ((__m128i){0x0ULL, 0x3ULL}), 0x10), 0x3ULL, 0x0ULL)); | ||
|
|
||
| // imm8=0x11: upper 64 bits of both operands | ||
| TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x0ULL, 0x1ULL}), ((__m128i){0x0ULL, 0x3ULL}), 0x11), 0x3ULL, 0x0ULL)); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,17 +1,30 @@ | ||
| // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - | FileCheck %s --check-prefix AVX | ||
| // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes AVX,AVX512 | ||
| // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - -std=c++11 | FileCheck %s --check-prefix AVX | ||
| // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -target-feature +avx512f -emit-llvm -o - -std=c++11 | FileCheck %s --check-prefixes AVX,AVX512 | ||
| // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - -std=c++11 -fexperimental-new-constant-interpreter | FileCheck %s --check-prefix AVX | ||
| // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -target-feature +avx512f -emit-llvm -o - -std=c++11 -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes AVX,AVX512 | ||
|
|
||
| #include <immintrin.h> | ||
| #include "builtin_test_helpers.h" | ||
|
|
||
| __m256i test_mm256_clmulepi64_epi128(__m256i A, __m256i B) { | ||
| // AVX: @llvm.x86.pclmulqdq.256 | ||
| return _mm256_clmulepi64_epi128(A, B, 0); | ||
| } | ||
|
|
||
| // Test constexpr evaluation for _mm256_clmulepi64_epi128 | ||
| // Each 128-bit lane is processed independently | ||
| TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128(((__m256i){0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL}), ((__m256i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL}), 0x00), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL)); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd prefer to see some complex values - not just some simple cases - we need to be certain that the implementation is complete - have you done any fuzz testing comparing constexpr vs runtime ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey @RKSimon, I've updated with more complex test cases. For fuzzing, I'm still trying to get something working using |
||
|
|
||
| #ifdef __AVX512F__ | ||
| __m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) { | ||
| // AVX512: @llvm.x86.pclmulqdq.512 | ||
| return _mm512_clmulepi64_epi128(A, B, 0); | ||
| } | ||
|
|
||
| // Test constexpr evaluation for _mm512_clmulepi64_epi128 | ||
| // Each 128-bit lane is processed independently | ||
| TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128(((__m512i){0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL, 0x0ULL}), ((__m512i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL, 0x7ULL, 0x0ULL, 0x9ULL, 0x0ULL}), 0x00), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL, 0x1cULL, 0x0ULL, 0x48ULL, 0x0ULL)); | ||
| #endif | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.