Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a824ded
[X86][SelectionDAG] - Add support for llvm.canonicalize intrinsic
pawan-nirpal-031 Aug 28, 2024
5ebc6b4
Merge branch 'llvm:main' into main
pawan-nirpal-031 Aug 28, 2024
34d5244
Move combine operations to DAG combiner over from legalizer, address …
pawan-nirpal-031 Sep 6, 2024
3c961a8
Merge branch 'llvm:main' into main
pawan-nirpal-031 Sep 6, 2024
74ae03e
Merge branch 'llvm:main' into main
pawan-nirpal-031 Sep 6, 2024
d405230
addressing review comments, simplify condtions
pawan-nirpal-031 Sep 6, 2024
96f7c43
Merge branch 'llvm:main' into main
pawan-nirpal-031 Sep 6, 2024
317dd6f
Removed constant folding for another patch, moving undef canonicalize…
pawan-nirpal-031 Sep 10, 2024
06f09f4
Merge branch 'llvm:main' into main
pawan-nirpal-031 Sep 10, 2024
cbe7d0b
fix run lines to reuse checks
pawan-nirpal-031 Sep 11, 2024
d48773a
Merge branch 'llvm:main' into main
pawan-nirpal-031 Sep 11, 2024
9e37e86
fix lit failure for sse2 mode
pawan-nirpal-031 Sep 11, 2024
26ee8a9
Merge branch 'llvm:main' into main
pawan-nirpal-031 Sep 11, 2024
b9d2cf8
minor refactors
pawan-nirpal-031 Sep 12, 2024
7a77677
handling vector inputs and moving to lowering
pawan-nirpal-031 Sep 18, 2024
9970720
Merge branch 'llvm:main' into main
pawan-nirpal-031 Sep 18, 2024
a71759d
Merge branch 'llvm:main' into main
pawan-nirpal-031 Sep 18, 2024
fa04409
remove the rouge comment
pawan-nirpal-031 Sep 18, 2024
ad86002
remove the rouge comment
pawan-nirpal-031 Sep 18, 2024
e6a6646
Merge branch 'llvm:main' into main
pawan-nirpal-031 Sep 23, 2024
f303f37
Merge branch 'llvm:main' into main
pawan-nirpal-031 Feb 28, 2025
00faf91
[X86][CodeGen] - Use shift operators instead of built-ins for SSE emu…
pawan-nirpal-031 Feb 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions clang/lib/Headers/mmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -880,11 +880,11 @@ _mm_sll_si64(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector containing the left-shifted value. If
/// \a __count is greater or equal to 64, the result is set to 0.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_slli_si64(__m64 __m, int __count)
{
return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m),
__count));
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_si64(__m64 __m,
int __count) {
if (__builtin_constant_p(__count))
return (__m64)((__count > 63) ? 0 : ((long long)__m << __count));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should limit it to 64-bit only?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean also make similar change for _mm_slli_si32? if yes then we may not have an example at hand where that is needed. I will add as needed/requested.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I mean long long is less efficient on 32-bit: https://godbolt.org/z/4KeY4hrhq

return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m), __count));
Comment on lines +885 to +887
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just do this unconditionally? The fold can always be done in the backend

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The non-constant shifts become worse

https://godbolt.org/z/9xv5hxjv7

I think to accommodate both the conditions at DAG level might not be as trivial.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-1 on the whole concept of doing this in the header

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arsenm do we have options? Because solving it in CG is too late as middle end could have optimized shifts.

}

/// Right-shifts each 16-bit integer element of the first parameter,
Expand Down Expand Up @@ -1115,11 +1115,11 @@ _mm_srl_si64(__m64 __m, __m64 __count)
/// \param __count
/// A 32-bit integer value.
/// \returns A 64-bit integer vector containing the right-shifted value.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_srli_si64(__m64 __m, int __count)
{
return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m),
__count));
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_si64(__m64 __m,
int __count) {
if (__builtin_constant_p(__count))
return (__m64)((__count > 63) ? 0 : ((long long)__m >> __count));
return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m), __count));
Copy link
Contributor

@e-kud e-kud Feb 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to notice that we change the behavior for negative immediates. Before this change we returned a zero, now we return it as is because shift on negative number is UB. Intrinsic description doesn't specify what should happen in case of negative __count

}

/// Performs a bitwise AND of two 64-bit integer vectors.
Expand Down
16 changes: 14 additions & 2 deletions clang/test/CodeGen/X86/mmx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,13 @@ __m64 test_mm_slli_pi32(__m64 a) {

__m64 test_mm_slli_si64(__m64 a) {
// CHECK-LABEL: test_mm_slli_si64
// CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(
// CHECK: %__m.addr.i = alloca <1 x i64>, align 8
// CHECK: %__count.addr.i = alloca i32, align 4
// CHECK: %5 = load <1 x i64>, ptr %__m.addr.i, align 8
// CHECK: %6 = bitcast <1 x i64> %5 to i64
// CHECK: %7 = load i32, ptr %__count.addr.i, align 4
// CHECK: %sh_prom.i = zext i32 %7 to i64
// CHECK: %shl.i = shl i64 %6, %sh_prom.i
return _mm_slli_si64(a, 3);
}

Expand Down Expand Up @@ -623,7 +629,13 @@ __m64 test_mm_srli_pi32(__m64 a) {

__m64 test_mm_srli_si64(__m64 a) {
// CHECK-LABEL: test_mm_srli_si64
// CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(
// CHECK: %__m.addr.i = alloca <1 x i64>, align 8
// CHECK: %__count.addr.i = alloca i32, align 4
// CHECK: %5 = load <1 x i64>, ptr %__m.addr.i, align 8
// CHECK: %6 = bitcast <1 x i64> %5 to i64
// CHECK: %7 = load i32, ptr %__count.addr.i, align 4
// CHECK: %sh_prom.i = zext i32 %7 to i64
// CHECK: %shr.i = ashr i64 %6, %sh_prom.i
return _mm_srli_si64(a, 3);
}

Expand Down