-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[X86] Truncate i64 sub to i32 when upper 33 bits are zeros #145850
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=SSE | ||
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 | ||
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 | ||
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41 | ||
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX1 | ||
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 | ||
|
|
||
| define <2 x i64> @test1(ptr%ptr) { | ||
| ; SSE-LABEL: test1: | ||
| ; SSE: # %bb.0: # %entry | ||
| ; SSE-NEXT: movzbl (%rdi), %eax | ||
| ; SSE-NEXT: movzbl %al, %ecx | ||
| ; SSE-NEXT: shrb %al | ||
| ; SSE-NEXT: movzbl %al, %eax | ||
| ; SSE-NEXT: negl %eax | ||
| ; SSE-NEXT: movd %eax, %xmm1 | ||
| ; SSE-NEXT: andl $1, %ecx | ||
| ; SSE-NEXT: negl %ecx | ||
| ; SSE-NEXT: movd %ecx, %xmm0 | ||
| ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | ||
| ; SSE-NEXT: retq | ||
| ; | ||
| ; SSE2-LABEL: test1: | ||
| ; SSE2: # %bb.0: # %entry | ||
| ; SSE2-NEXT: movzbl (%rdi), %eax | ||
| ; SSE2-NEXT: movzbl %al, %ecx | ||
| ; SSE2-NEXT: shrb %al | ||
| ; SSE2-NEXT: movzbl %al, %eax | ||
| ; SSE2-NEXT: negl %eax | ||
| ; SSE2-NEXT: movd %eax, %xmm1 | ||
| ; SSE2-NEXT: andl $1, %ecx | ||
| ; SSE2-NEXT: negl %ecx | ||
| ; SSE2-NEXT: movd %ecx, %xmm0 | ||
| ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | ||
| ; SSE2-NEXT: retq | ||
| ; | ||
| ; SSSE3-LABEL: test1: | ||
| ; SSSE3: # %bb.0: # %entry | ||
| ; SSSE3-NEXT: movzbl (%rdi), %eax | ||
| ; SSSE3-NEXT: movzbl %al, %ecx | ||
| ; SSSE3-NEXT: shrb %al | ||
| ; SSSE3-NEXT: movzbl %al, %eax | ||
| ; SSSE3-NEXT: negl %eax | ||
| ; SSSE3-NEXT: movd %eax, %xmm1 | ||
| ; SSSE3-NEXT: andl $1, %ecx | ||
| ; SSSE3-NEXT: negl %ecx | ||
| ; SSSE3-NEXT: movd %ecx, %xmm0 | ||
| ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | ||
| ; SSSE3-NEXT: retq | ||
| ; | ||
| ; SSE41-LABEL: test1: | ||
| ; SSE41: # %bb.0: # %entry | ||
| ; SSE41-NEXT: movzbl (%rdi), %eax | ||
| ; SSE41-NEXT: movzbl %al, %ecx | ||
| ; SSE41-NEXT: shrb %al | ||
| ; SSE41-NEXT: movzbl %al, %eax | ||
| ; SSE41-NEXT: negl %eax | ||
| ; SSE41-NEXT: movd %eax, %xmm1 | ||
| ; SSE41-NEXT: andl $1, %ecx | ||
| ; SSE41-NEXT: negl %ecx | ||
| ; SSE41-NEXT: movd %ecx, %xmm0 | ||
| ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | ||
| ; SSE41-NEXT: retq | ||
| ; | ||
| ; AVX1-LABEL: test1: | ||
| ; AVX1: # %bb.0: # %entry | ||
| ; AVX1-NEXT: movzbl (%rdi), %eax | ||
| ; AVX1-NEXT: movzbl %al, %ecx | ||
| ; AVX1-NEXT: shrb %al | ||
| ; AVX1-NEXT: movzbl %al, %eax | ||
| ; AVX1-NEXT: negl %eax | ||
| ; AVX1-NEXT: vmovd %eax, %xmm0 | ||
| ; AVX1-NEXT: andl $1, %ecx | ||
| ; AVX1-NEXT: negl %ecx | ||
| ; AVX1-NEXT: vmovd %ecx, %xmm1 | ||
| ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] | ||
| ; AVX1-NEXT: retq | ||
| ; | ||
| ; AVX2-LABEL: test1: | ||
| ; AVX2: # %bb.0: # %entry | ||
| ; AVX2-NEXT: movzbl (%rdi), %eax | ||
| ; AVX2-NEXT: movzbl %al, %ecx | ||
| ; AVX2-NEXT: shrb %al | ||
| ; AVX2-NEXT: movzbl %al, %eax | ||
| ; AVX2-NEXT: negl %eax | ||
| ; AVX2-NEXT: vmovd %eax, %xmm0 | ||
| ; AVX2-NEXT: andl $1, %ecx | ||
| ; AVX2-NEXT: negl %ecx | ||
| ; AVX2-NEXT: vmovd %ecx, %xmm1 | ||
| ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] | ||
| ; AVX2-NEXT: retq | ||
| entry: | ||
| %X = load <2 x i1>, ptr %ptr | ||
| %Y = sext <2 x i1> %X to <2 x i64> | ||
| ret <2 x i64> %Y | ||
| } | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,96 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
| ; RUN: llc < %s -mtriple=i686-unknown-unknown -disable-cgp-branch-opts | FileCheck %s --check-prefix=X86 | ||
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-cgp-branch-opts | FileCheck %s --check-prefix=X64 | ||
|
|
||
| ; Truncate to 32 bit subtraction since first 48 bits are zeros | ||
| define i64 @test1(i16 %a, i16 %b) nounwind { | ||
| ; X86-LABEL: test1: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx | ||
| ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: xorl %edx, %edx | ||
| ; X86-NEXT: subl %ecx, %eax | ||
| ; X86-NEXT: sbbl %edx, %edx | ||
|
||
| ; X86-NEXT: retl | ||
| ; | ||
| ; X64-LABEL: test1: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: movzwl %si, %ecx | ||
| ; X64-NEXT: movzwl %di, %eax | ||
| ; X64-NEXT: subl %ecx, %eax | ||
| ; X64-NEXT: retq | ||
| %zext_a = zext i16 %a to i64 | ||
| %zext_b = zext i16 %b to i64 | ||
| %sub = sub i64 %zext_a, %zext_b | ||
| ret i64 %sub | ||
| } | ||
|
|
||
| ; Do not truncate to 32 bit subtraction if 32nd bit is set | ||
| define i64 @test2(i16 %a, i16 %b) nounwind { | ||
| ; X86-LABEL: test2: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx | ||
| ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: subl %ecx, %eax | ||
| ; X86-NEXT: movl $1, %edx | ||
| ; X86-NEXT: sbbl $0, %edx | ||
| ; X86-NEXT: retl | ||
| ; | ||
| ; X64-LABEL: test2: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: movzwl %di, %ecx | ||
| ; X64-NEXT: movzwl %si, %edx | ||
| ; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 | ||
| ; X64-NEXT: orq %rcx, %rax | ||
| ; X64-NEXT: subq %rdx, %rax | ||
| ; X64-NEXT: retq | ||
| %zext_a = zext i16 %a to i64 | ||
| %zext_b = zext i16 %b to i64 | ||
| %or_a = or i64 %zext_a, 4294967296 | ||
| %sub = sub i64 %or_a, %zext_b | ||
| ret i64 %sub | ||
| } | ||
|
|
||
| ; Do not truncate to 32 bit subtraction in case of sign extension | ||
| define i64 @test3(i16 %a, i16 %b) nounwind { | ||
| ; X86-LABEL: test3: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: movl %eax, %edx | ||
| ; X86-NEXT: sarl $31, %edx | ||
| ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx | ||
| ; X86-NEXT: subl %ecx, %eax | ||
| ; X86-NEXT: sbbl $0, %edx | ||
| ; X86-NEXT: retl | ||
| ; | ||
| ; X64-LABEL: test3: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: # kill: def $edi killed $edi def $rdi | ||
| ; X64-NEXT: movswq %di, %rax | ||
| ; X64-NEXT: movzwl %si, %ecx | ||
| ; X64-NEXT: subq %rcx, %rax | ||
| ; X64-NEXT: retq | ||
| %sext_a = sext i16 %a to i64 | ||
| %zext_b = zext i16 %b to i64 | ||
| %sub = sub i64 %sext_a, %zext_b | ||
| ret i64 %sub | ||
| } | ||
|
|
||
| define i64 @test4(i16 %x) nounwind { | ||
| ; X86-LABEL: test4: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: xorl %edx, %edx | ||
| ; X86-NEXT: negl %eax | ||
| ; X86-NEXT: sbbl %edx, %edx | ||
| ; X86-NEXT: retl | ||
| ; | ||
| ; X64-LABEL: test4: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: movzwl %di, %eax | ||
| ; X64-NEXT: negl %eax | ||
| ; X64-NEXT: retq | ||
| %zext_x = zext i16 %x to i64 | ||
| %sub = sub i64 0, %zext_x | ||
| ret i64 %sub | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could the
xor %edx, %edxbe considered redundant here?Since we’re truncating to a 32-bit subtraction and the high bits are known to be zero, and if we remove the redundant
sbb, is there still a reason to explicitly clear edx?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its necessary to stop a dependency on the old value of EDX stalling the SBB instruction - some cpus recognise the dependency break (search X86ScheduleBtVer2.td for IsDepBreakingFunction).