-
Notifications
You must be signed in to change notification settings - Fork 15k
[X86][Clang] Allow constexpr evaluation of F16C CVTPS2PH intrinsics #162295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -46,6 +46,37 @@ __m128 test_mm_cvtph_ps(__m128i a) { | |
| return _mm_cvtph_ps(a); | ||
| } | ||
|
|
||
| __m128i test_mm_cvtps_ph(__m128 a) { | ||
| // CHECK-LABEL: test_mm_cvtps_ph | ||
| // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0) | ||
| return _mm_cvtps_ph(a, 0); | ||
| } | ||
|
|
||
| // A value exactly halfway between 1.0 and the next representable FP16 number. | ||
| // In binary, its significand ends in ...000, followed by a tie-bit 1. | ||
| #define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case | ||
|
|
||
| // | ||
| // _mm_cvtps_ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded) | ||
| // | ||
| // Test values: -2.5f, 1.123f, POS_HALFWAY | ||
| TEST_CONSTEXPR(match_v8hi( | ||
| _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT), | ||
| 0xC100, 0x3C7E, 0x3C00, 0x0000, 0, 0, 0, 0 | ||
| )); | ||
| TEST_CONSTEXPR(match_v8hi( | ||
| _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF), | ||
| 0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0 | ||
| )); | ||
| TEST_CONSTEXPR(match_v8hi( | ||
| _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF), | ||
| 0xC100, 0x3C7E, 0x3C01, 0x0000, 0, 0, 0, 0 | ||
| )); | ||
| TEST_CONSTEXPR(match_v8hi( | ||
| _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO), | ||
| 0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0 | ||
| )); | ||
|
|
||
| __m256 test_mm256_cvtph_ps(__m128i a) { | ||
| // CHECK-LABEL: test_mm256_cvtph_ps | ||
| // CHECK: fpext <8 x half> %{{.*}} to <8 x float> | ||
|
|
@@ -56,14 +87,40 @@ TEST_CONSTEXPR(match_m256( | |
| 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f | ||
| )); | ||
|
|
||
| __m128i test_mm_cvtps_ph(__m128 a) { | ||
| // CHECK-LABEL: test_mm_cvtps_ph | ||
| // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0) | ||
| return _mm_cvtps_ph(a, 0); | ||
| } | ||
| // | ||
| // _mm256_cvtps_ph (256-bit, 8 floats -> 8 shorts) | ||
| // | ||
| // Test values: -2.5f, 1.123f, POS_HALFWAY | ||
| TEST_CONSTEXPR(match_v8hi( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (style) Please can you position these just below the matching test_mm_cvtps_ph/test_mm256_cvtps_ph functions - it helps if we keep all the tests relevant to a specific intrinsic together. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
| _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT), | ||
| 0xC100, 0x3C7E, 0x3C00, 0x0000, 0xC100, 0x3C7E, 0x3C00, 0x0000 | ||
| )); | ||
| TEST_CONSTEXPR(match_v8hi( | ||
| _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF), | ||
| 0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000 | ||
| )); | ||
| TEST_CONSTEXPR(match_v8hi( | ||
| _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF), | ||
| 0xC100, 0x3C7E, 0x3C01, 0x0000, 0xC100, 0x3C7E, 0x3C01, 0x0000 | ||
| )); | ||
| TEST_CONSTEXPR(match_v8hi( | ||
| _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO), | ||
| 0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000 | ||
| )); | ||
|
|
||
| // | ||
| // Tests for Exact Dynamic Rounding | ||
| // | ||
| // Test that dynamic rounding SUCCEEDS for exactly representable values. | ||
| // We use _MM_FROUND_CUR_DIRECTION (value 4) to specify dynamic rounding. | ||
| // Inputs: -2.5f, 0.125f, -16.0f are all exactly representable in FP16. | ||
| TEST_CONSTEXPR(match_v8hi( | ||
| __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 0.125f, -16.0f, 0.0f, -2.5f, 0.125f, -16.0f, 0.0f), _MM_FROUND_CUR_DIRECTION), | ||
| 0xC100, 0x3000, 0xCC00, 0x0000, 0xC100, 0x3000, 0xCC00, 0x0000 | ||
| )); | ||
|
|
||
| __m128i test_mm256_cvtps_ph(__m256 a) { | ||
| // CHECK-LABEL: test_mm256_cvtps_ph | ||
| // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0) | ||
| return _mm256_cvtps_ph(a, 0); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The input vector is of floats, but the output vector is of integers?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, since this is converting single-precision (float32) values to half-precision (float16) which is stored by __m128i.