-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[Clang][X86] Add constexpr support for permute4x64_pd and permute4x64_epi64 #170442
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 8 commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
89e3954
feat: Add constexpr support for permdi256 and permdf256
ahmednoursphinx b949e99
feat: add tests
ahmednoursphinx 962ed40
chore: format files
ahmednoursphinx 4ec125a
refactor: Fix tests and revert changes in intrinsics header file
ahmednoursphinx 10510e2
chore: Fix formatting
ahmednoursphinx 86b263b
chore: revert formatting changes
ahmednoursphinx 358fb5f
refactor: move to avx2 existing container
ahmednoursphinx 7d93e7f
chore: Update formatiing
ahmednoursphinx b14e20f
refactor: use brace initialization
ahmednoursphinx 4e53bd0
Merge branch 'main' into issue_169304
ahmednoursphinx cd963f1
Merge branch 'main' into issue_169304
RKSimon File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1111,12 +1111,34 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) { | |
| // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> <i32 3, i32 0, i32 2, i32 0> | ||
| return _mm256_permute4x64_epi64(a, 35); | ||
| } | ||
| // Control value 0x00: [0,0,0,0] -> broadcast element 0 | ||
| TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x00), 10LL, 10LL, 10LL, 10LL)); | ||
| // Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A] | ||
| TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x1B), 40LL, 30LL, 20LL, 10LL)); | ||
| // Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A] | ||
| TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x39), 20LL, 30LL, 40LL, 10LL)); | ||
| // Control value 0x12: [2,0,1,0] -> [C,A,B,A] | ||
| TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x12), 30LL, 10LL, 20LL, 10LL)); | ||
| // Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D] | ||
| TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0xE4), 10LL, 20LL, 30LL, 40LL)); | ||
| // Test with negative values | ||
| TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(-40LL, -30LL, -20LL, -10LL), 0x1B), -40LL, -30LL, -20LL, -10LL)); | ||
|
|
||
| __m256d test_mm256_permute4x64_pd(__m256d a) { | ||
| // CHECK-LABEL: test_mm256_permute4x64_pd | ||
| // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 1, i32 0> | ||
| return _mm256_permute4x64_pd(a, 25); | ||
| } | ||
| // Control value 0x00: [0,0,0,0] -> broadcast element 0 | ||
| TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x00), 1.0, 1.0, 1.0, 1.0)); | ||
| // Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A] | ||
| TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x1B), 4.0, 3.0, 2.0, 1.0)); | ||
| // Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A] | ||
| TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x39), 2.0, 3.0, 4.0, 1.0)); | ||
| // Control value 0x12: [2,0,1,0] -> [C,A,B,A] | ||
| TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x12), 3.0, 1.0, 2.0, 1.0)); | ||
| // Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D] | ||
| TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0xE4), 1.0, 2.0, 3.0, 4.0)); | ||
|
||
|
|
||
| __m256i test_mm256_permutevar8x32_epi32(__m256i a, __m256i b) { | ||
| // CHECK-LABEL: test_mm256_permutevar8x32_epi32 | ||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't use _mm256_set_epi64x - use brace initialisation