-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[X86] EmitX86BuiltinExpr - attempt to convert SSE41/AVX1 roundps/d/ss/sd builtins to regular rounding modes #171227
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
62647bf
56f72b8
aab58a9
156d2aa
7ab45f8
62a18f9
c4eff0d
9b2cda2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,25 +75,29 @@ TEST_CONSTEXPR(match_m128(_mm_blendv_ps((__m128)(__v4sf){0.0f, 1.0f, 2.0f, 3.0f} | |
|
|
||
| __m128d test_mm_ceil_pd(__m128d x) { | ||
| // CHECK-LABEL: test_mm_ceil_pd | ||
| // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 2) | ||
| // CHECK %{{.*}} = call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{.*}}) | ||
| return _mm_ceil_pd(x); | ||
| } | ||
|
|
||
| __m128 test_mm_ceil_ps(__m128 x) { | ||
| // CHECK-LABEL: test_mm_ceil_ps | ||
| // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 2) | ||
| // CHECK: %{{.*}} = call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{.*}}) | ||
| return _mm_ceil_ps(x); | ||
| } | ||
|
|
||
| __m128d test_mm_ceil_sd(__m128d x, __m128d y) { | ||
| // CHECK-LABEL: test_mm_ceil_sd | ||
| // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 2) | ||
| // CHECK: %[[A:.*]] = extractelement <2 x double> %{{.*}}, i32 0 | ||
| // CHECK: %[[B:.*]] = call double @llvm.ceil.f64(double %[[A:.*]]) | ||
| // CHECK: %{{.*}} = insertelement <2 x double> %0, double %[[B:.*]], i32 0 | ||
|
Comment on lines
+90
to
+92
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if the backend combines them to the same instruction. Did you check we can get the same assemble?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right, we do not get the same instruction. We get: roundss xmm1, xmm1, 10
blendps xmm0, xmm1, 1I can either try to implement a pattern to convert this to a single
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think pattern matching is worth a try. It can be done independently.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW, did you build it with O0? It's not a problem if it's only different on O0.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried O1, O3 also, it is different on both of those. |
||
| return _mm_ceil_sd(x, y); | ||
| } | ||
|
|
||
| __m128 test_mm_ceil_ss(__m128 x, __m128 y) { | ||
| // CHECK-LABEL: test_mm_ceil_ss | ||
| // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 2) | ||
| // CHECK: %[[A:.*]] = extractelement <4 x float> %{{.*}}, i32 0 | ||
| // CHECK: %[[B:.*]] = call float @llvm.ceil.f32(float %[[A:.*]]) | ||
| // CHECK: %{{.*}} = insertelement <4 x float> %0, float %[[B:.*]], i32 0 | ||
| return _mm_ceil_ss(x, y); | ||
| } | ||
|
|
||
|
|
@@ -256,25 +260,29 @@ TEST_CONSTEXPR(_mm_extract_ps(((__m128){1.25f, 2.5f, 3.75f, 5.0f}), 6) == __buil | |
|
|
||
| __m128d test_mm_floor_pd(__m128d x) { | ||
| // CHECK-LABEL: test_mm_floor_pd | ||
| // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 1) | ||
| // CHECK: %{{.*}} = call <2 x double> @llvm.floor.v2f64(<2 x double> %{{.*}}) | ||
| return _mm_floor_pd(x); | ||
| } | ||
|
|
||
| __m128 test_mm_floor_ps(__m128 x) { | ||
| // CHECK-LABEL: test_mm_floor_ps | ||
| // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 1) | ||
| // CHECK: %{{.*}} = call <4 x float> @llvm.floor.v4f32(<4 x float> %{{.*}}) | ||
| return _mm_floor_ps(x); | ||
| } | ||
|
|
||
| __m128d test_mm_floor_sd(__m128d x, __m128d y) { | ||
| // CHECK-LABEL: test_mm_floor_sd | ||
| // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 1) | ||
| // CHECK: %[[A:.*]] = extractelement <2 x double> %{{.*}}, i32 0 | ||
| // CHECK: %[[B:.*]] = call double @llvm.floor.f64(double %[[A:.*]]) | ||
| // CHECK: %{{.*}} = insertelement <2 x double> %0, double %[[B:.*]], i32 0 | ||
| return _mm_floor_sd(x, y); | ||
| } | ||
|
|
||
| __m128 test_mm_floor_ss(__m128 x, __m128 y) { | ||
| // CHECK-LABEL: test_mm_floor_ss | ||
| // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 1) | ||
| // CHECK: %[[A:.*]] = extractelement <4 x float> %{{.*}}, i32 0 | ||
| // CHECK: %[[B:.*]] = call float @llvm.floor.f32(float %[[A:.*]]) | ||
| // CHECK: %{{.*}} = insertelement <4 x float> %0, float %[[B:.*]], i32 0 | ||
| return _mm_floor_ss(x, y); | ||
| } | ||
|
|
||
|
|
@@ -430,25 +438,29 @@ TEST_CONSTEXPR(match_v8hi(_mm_packus_epi32((__m128i)(__v4si){40000, -50000, 3276 | |
|
|
||
| __m128d test_mm_round_pd(__m128d x) { | ||
| // CHECK-LABEL: test_mm_round_pd | ||
| // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 4) | ||
| // CHECK: %{{.*}} = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %{{.*}}) | ||
| return _mm_round_pd(x, 4); | ||
| } | ||
|
|
||
| __m128 test_mm_round_ps(__m128 x) { | ||
| // CHECK-LABEL: test_mm_round_ps | ||
| // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 4) | ||
| // CHECK: %{{.*}} = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %{{.*}}) | ||
| return _mm_round_ps(x, 4); | ||
| } | ||
|
|
||
| __m128d test_mm_round_sd(__m128d x, __m128d y) { | ||
| // CHECK-LABEL: test_mm_round_sd | ||
| // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 4) | ||
| // CHECK: %[[A:.*]] = extractelement <2 x double> %{{.*}}, i32 0 | ||
| // CHECK: %[[B:.*]] = call double @llvm.roundeven.f64(double %[[A:.*]]) | ||
| // CHECK: %{{.*}} = insertelement <2 x double> %0, double %[[B:.*]], i32 0 | ||
| return _mm_round_sd(x, y, 4); | ||
| } | ||
|
|
||
| __m128 test_mm_round_ss(__m128 x, __m128 y) { | ||
| // CHECK-LABEL: test_mm_round_ss | ||
| // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 4) | ||
| // CHECK: %[[A:.*]] = extractelement <4 x float> %{{.*}}, i32 0 | ||
| // CHECK: %[[B:.*]] = call float @llvm.roundeven.f32(float %[[A:.*]]) | ||
| // CHECK: %{{.*}} = insertelement <4 x float> %0, float %[[B:.*]], i32 0 | ||
| return _mm_round_ss(x, y, 4); | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) capitalize first letter of variable (same for others)