-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86][GlobalIsel] Support G_IS_FPCLASS #160850
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Mahesh-Attarde (mahesh-attarde) ChangesSome of dependency opcodes for G_IS_FPCLASS are now supported. This patch adds lowering for G_IS_FPCLASS. Patch is 28.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160850.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index ee9760f881ae9..807a2a7d1542b 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -410,6 +410,9 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
getActionDefinitionsBuilder(G_SEXT_INREG).lower();
+ getActionDefinitionsBuilder(G_IS_FPCLASS)
+ .lower();
+
// fp constants
getActionDefinitionsBuilder(G_FCONSTANT)
.legalFor({s32, s64})
diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll
index 960bbf53a6451..d033d7cb66bc9 100644
--- a/llvm/test/CodeGen/X86/isel-fpclass.ll
+++ b/llvm/test/CodeGen/X86/isel-fpclass.ll
@@ -3,8 +3,8 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL
; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X86-FASTISEL
; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-FASTISEL
-
-; FIXME: We can reuse/delete llvm/test/CodeGen/X86/is_fpclass.ll when all patches are included.
+; RUN: llc < %s -mtriple=i686-linux -global-isel -global-isel-abort=1 | FileCheck %s -check-prefixes=X86-GISEL
+; RUN: llc < %s -mtriple=x86_64-linux -global-isel -global-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-GISEL
define i1 @isnone_f(float %x) {
; X86-SDAGISEL-LABEL: isnone_f:
@@ -23,6 +23,11 @@ define i1 @isnone_f(float %x) {
; X86-FASTISEL-NEXT: fstp %st(0)
; X86-FASTISEL-NEXT: xorl %eax, %eax
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: isnone_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: xorl %eax, %eax
+; X86-GISEL-NEXT: retl
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0)
ret i1 %0
@@ -45,6 +50,11 @@ define i1 @isany_f(float %x) {
; X86-FASTISEL-NEXT: fstp %st(0)
; X86-FASTISEL-NEXT: movb $1, %al
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: isany_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movb $1, %al
+; X86-GISEL-NEXT: retl
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023)
ret i1 %0
@@ -62,16 +72,16 @@ define i1 @issignaling_f(float %x) {
; X86-SDAGISEL-NEXT: andb %cl, %al
; X86-SDAGISEL-NEXT: retl
;
-; X64-LABEL: issignaling_f:
-; X64: # %bb.0:
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; X64-NEXT: setl %cl
-; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; X64-NEXT: setge %al
-; X64-NEXT: andb %cl, %al
-; X64-NEXT: retq
+; X64-SDAGISEL-LABEL: issignaling_f:
+; X64-SDAGISEL: # %bb.0:
+; X64-SDAGISEL-NEXT: movd %xmm0, %eax
+; X64-SDAGISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-SDAGISEL-NEXT: setl %cl
+; X64-SDAGISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X64-SDAGISEL-NEXT: setge %al
+; X64-SDAGISEL-NEXT: andb %cl, %al
+; X64-SDAGISEL-NEXT: retq
;
; X86-FASTISEL-LABEL: issignaling_f:
; X86-FASTISEL: # %bb.0:
@@ -89,6 +99,43 @@ define i1 @issignaling_f(float %x) {
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: issignaling_f:
+; X64-FASTISEL: # %bb.0:
+; X64-FASTISEL-NEXT: movd %xmm0, %eax
+; X64-FASTISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-FASTISEL-NEXT: setl %cl
+; X64-FASTISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X64-FASTISEL-NEXT: setge %al
+; X64-FASTISEL-NEXT: andb %cl, %al
+; X64-FASTISEL-NEXT: retq
+;
+; X86-GISEL-LABEL: issignaling_f:
+; X86-GISEL: # %bb.0:
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: seta %dl
+; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT: setb %al
+; X86-GISEL-NEXT: andb %dl, %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: issignaling_f:
+; X64-GISEL: # %bb.0:
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: seta %dl
+; X64-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-GISEL-NEXT: setb %al
+; X64-GISEL-NEXT: andb %dl, %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
%a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan"
ret i1 %a0
}
@@ -102,13 +149,13 @@ define i1 @issignaling_f(float %x) {
; X86-SDAGISEL-NEXT: setge %al
; X86-SDAGISEL-NEXT: retl
;
-; X64-LABEL: isquiet_f:
-; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; X64-NEXT: setge %al
-; X64-NEXT: retq
+; X64-SDAGISEL-LABEL: isquiet_f:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: movd %xmm0, %eax
+; X64-SDAGISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-SDAGISEL-NEXT: setge %al
+; X64-SDAGISEL-NEXT: retq
;
; X86-FASTISEL-LABEL: isquiet_f:
; X86-FASTISEL: # %bb.0: # %entry
@@ -123,6 +170,34 @@ define i1 @issignaling_f(float %x) {
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: isquiet_f:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: movd %xmm0, %eax
+; X64-FASTISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-FASTISEL-NEXT: setge %al
+; X64-FASTISEL-NEXT: retq
+;
+; X86-GISEL-LABEL: isquiet_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT: setae %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: isquiet_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-GISEL-NEXT: setae %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan"
ret i1 %0
@@ -137,13 +212,13 @@ define i1 @not_isquiet_f(float %x) {
; X86-SDAGISEL-NEXT: setl %al
; X86-SDAGISEL-NEXT: retl
;
-; X64-LABEL: not_isquiet_f:
-; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; X64-NEXT: setl %al
-; X64-NEXT: retq
+; X64-SDAGISEL-LABEL: not_isquiet_f:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: movd %xmm0, %eax
+; X64-SDAGISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-SDAGISEL-NEXT: setl %al
+; X64-SDAGISEL-NEXT: retq
;
; X86-FASTISEL-LABEL: not_isquiet_f:
; X86-FASTISEL: # %bb.0: # %entry
@@ -158,6 +233,52 @@ define i1 @not_isquiet_f(float %x) {
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: not_isquiet_f:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: movd %xmm0, %eax
+; X64-FASTISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-FASTISEL-NEXT: setl %al
+; X64-FASTISEL-NEXT: retq
+;
+; X86-GISEL-LABEL: not_isquiet_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: setb %dl
+; X86-GISEL-NEXT: orb %cl, %dl
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: sete %cl
+; X86-GISEL-NEXT: orb %dl, %cl
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: seta %dl
+; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT: setb %al
+; X86-GISEL-NEXT: andb %dl, %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: not_isquiet_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: setb %dl
+; X64-GISEL-NEXT: orb %cl, %dl
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: sete %cl
+; X64-GISEL-NEXT: orb %dl, %cl
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: seta %dl
+; X64-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-GISEL-NEXT: setb %al
+; X64-GISEL-NEXT: andb %dl, %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan"
ret i1 %0
@@ -172,13 +293,13 @@ define i1 @isinf_f(float %x) {
; X86-SDAGISEL-NEXT: sete %al
; X86-SDAGISEL-NEXT: retl
;
-; X64-LABEL: isinf_f:
-; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: sete %al
-; X64-NEXT: retq
+; X64-SDAGISEL-LABEL: isinf_f:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: movd %xmm0, %eax
+; X64-SDAGISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-SDAGISEL-NEXT: sete %al
+; X64-SDAGISEL-NEXT: retq
;
; X86-FASTISEL-LABEL: isinf_f:
; X86-FASTISEL: # %bb.0: # %entry
@@ -193,6 +314,34 @@ define i1 @isinf_f(float %x) {
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: isinf_f:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: movd %xmm0, %eax
+; X64-FASTISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-FASTISEL-NEXT: sete %al
+; X64-FASTISEL-NEXT: retq
+;
+; X86-GISEL-LABEL: isinf_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: sete %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: isinf_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: sete %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf"
ret i1 %0
@@ -207,13 +356,13 @@ define i1 @not_isinf_f(float %x) {
; X86-SDAGISEL-NEXT: setne %al
; X86-SDAGISEL-NEXT: retl
;
-; X64-LABEL: not_isinf_f:
-; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setne %al
-; X64-NEXT: retq
+; X64-SDAGISEL-LABEL: not_isinf_f:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: movd %xmm0, %eax
+; X64-SDAGISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-SDAGISEL-NEXT: setne %al
+; X64-SDAGISEL-NEXT: retq
;
; X86-FASTISEL-LABEL: not_isinf_f:
; X86-FASTISEL: # %bb.0: # %entry
@@ -228,6 +377,40 @@ define i1 @not_isinf_f(float %x) {
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: not_isinf_f:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: movd %xmm0, %eax
+; X64-FASTISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-FASTISEL-NEXT: setne %al
+; X64-FASTISEL-NEXT: retq
+;
+; X86-GISEL-LABEL: not_isinf_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: setb %dl
+; X86-GISEL-NEXT: orb %cl, %dl
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: seta %al
+; X86-GISEL-NEXT: orb %dl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: not_isinf_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: setb %dl
+; X64-GISEL-NEXT: orb %cl, %dl
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: seta %al
+; X64-GISEL-NEXT: orb %dl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf"
ret i1 %0
@@ -240,12 +423,12 @@ define i1 @is_plus_inf_f(float %x) {
; X86-SDAGISEL-NEXT: sete %al
; X86-SDAGISEL-NEXT: retl
;
-; X64-LABEL: is_plus_inf_f:
-; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: sete %al
-; X64-NEXT: retq
+; X64-SDAGISEL-LABEL: is_plus_inf_f:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: movd %xmm0, %eax
+; X64-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-SDAGISEL-NEXT: sete %al
+; X64-SDAGISEL-NEXT: retq
;
; X86-FASTISEL-LABEL: is_plus_inf_f:
; X86-FASTISEL: # %bb.0: # %entry
@@ -258,6 +441,30 @@ define i1 @is_plus_inf_f(float %x) {
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: is_plus_inf_f:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: movd %xmm0, %eax
+; X64-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-FASTISEL-NEXT: sete %al
+; X64-FASTISEL-NEXT: retq
+;
+; X86-GISEL-LABEL: is_plus_inf_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-GISEL-NEXT: sete %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: is_plus_inf_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: sete %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf"
ret i1 %0
@@ -270,12 +477,12 @@ define i1 @is_minus_inf_f(float %x) {
; X86-SDAGISEL-NEXT: sete %al
; X86-SDAGISEL-NEXT: retl
;
-; X64-LABEL: is_minus_inf_f:
-; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
-; X64-NEXT: sete %al
-; X64-NEXT: retq
+; X64-SDAGISEL-LABEL: is_minus_inf_f:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: movd %xmm0, %eax
+; X64-SDAGISEL-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-SDAGISEL-NEXT: sete %al
+; X64-SDAGISEL-NEXT: retq
;
; X86-FASTISEL-LABEL: is_minus_inf_f:
; X86-FASTISEL: # %bb.0: # %entry
@@ -288,6 +495,30 @@ define i1 @is_minus_inf_f(float %x) {
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: is_minus_inf_f:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: movd %xmm0, %eax
+; X64-FASTISEL-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-FASTISEL-NEXT: sete %al
+; X64-FASTISEL-NEXT: retq
+;
+; X86-GISEL-LABEL: is_minus_inf_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-GISEL-NEXT: sete %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: is_minus_inf_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-GISEL-NEXT: sete %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf"
ret i1 %0
@@ -300,12 +531,12 @@ define i1 @not_is_minus_inf_f(float %x) {
; X86-SDAGISEL-NEXT: setne %al
; X86-SDAGISEL-NEXT: retl
;
-; X64-LABEL: not_is_minus_inf_f:
-; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
-; X64-NEXT: setne %al
-; X64-NEXT: retq
+; X64-SDAGISEL-LABEL: not_is_minus_inf_f:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: movd %xmm0, %eax
+; X64-SDAGISEL-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-SDAGISEL-NEXT: setne %al
+; X64-SDAGISEL-NEXT: retq
;
; X86-FASTISEL-LABEL: not_is_minus_inf_f:
; X86-FASTISEL: # %bb.0: # %entry
@@ -318,6 +549,52 @@ define i1 @not_is_minus_inf_f(float %x) {
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: not_is_minus_inf_f:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: movd %xmm0, %eax
+; X64-FASTISEL-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-FASTISEL-NEXT: setne %al
+; X64-FASTISEL-NEXT: retq
+;
+; X86-GISEL-LABEL: not_is_minus_inf_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: pushl %ebx
+; X86-GISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-GISEL-NEXT: .cfi_offset %ebx, -8
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: movl %eax, %ecx
+; X86-GISEL-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %edx, %edx
+; X86-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000
+; X86-GISEL-NEXT: setb %bl
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: sete %ah
+; X86-GISEL-NEXT: orb %dl, %ah
+; X86-GISEL-NEXT: orb %bl, %ah
+; X86-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000
+; X86-GISEL-NEXT: seta %al
+; X86-GISEL-NEXT: orb %ah, %al
+; X86-GISEL-NEXT: popl %ebx
+; X86-GISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: not_is_minus_inf_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: movl %eax, %ecx
+; X64-GISEL-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %edx, %edx
+; X64-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000
+; X64-GISEL-NEXT: setb %sil
+; X64-GISEL-NEXT: orb %dl, %sil
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: set...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
…vm#161726) Just like with private, the lowering for these bounds are all pretty trivial. This patch enables them for reduction, which has everything in common except the init pattern, but that is handled/managed by Sema. This also adds sufficient testing to spot-check the allocation/initialization/destruction/etc.
Primarily targeted simplification case of substring being a singleton by inlining a search loop (with an exception where runtime function performs better). Few trivial simplifications also covered. This is a reapply of llvm#157883 with additional fix to avoid generation of new ops during analysis that mess up greedy rewriter if we end up bailing out without any simplification but just leaving few stranded new ops. For technical reasons this patch comes as a new PR.
These insertion points were added in llvm#146551 and llvm#146908 to support the one-shot dialect conversion driver which performs changes to the IR immediately and would otherwise invalidate previous insertion points. Since then, the insertion point has been made resilient against op erasure (llvm#146955) making the changes now redundant.
…allow element extraction/insertion intrinsics to be used in constexpr llvm#159753 (llvm#161302) FIXES: llvm#159753 Enable constexpr evaluation for X86 vector element extract/insert builtins. and adds corresponding tests Index is masked with `(Idx & (NumElts - 1))`, matching existing CodeGen.
…mask` instead of hard code zero (llvm#161771) There is no test change at this moment because we don't have a target that has this feature by default yet.
This patch introduces support for the jobserver protocol to control parallelism for device offloading tasks. When running a parallel build with a modern build system like `make -jN` or `ninja -jN`, each Clang process might also be configured to use multiple threads for its own tasks (e.g., via `--offload-jobs=4`). This can lead to an explosion of threads (N * 4), causing heavy system load, CPU contention, and ultimately slowing down the entire build. This patch allows Clang to act as a cooperative client of the build system's jobserver. It extends the `--offload-jobs` option to accept the value 'jobserver'. With the recent addition of jobserver support to the Ninja build system, this functionality now benefits users of both Make and Ninja. When `--offload-jobs=jobserver` is specified, Clang's thread pool will: 1. Parse the MAKEFLAGS environment variable to find the jobserver details. 2. Before dispatching a task, acquire a job slot from the jobserver. If none are available, the worker thread will block. 3. Release the job slot once the task is complete. This ensures that the total number of active offload tasks across all Clang processes does not exceed the limit defined by the parent build system, leading to more efficient and controlled parallel builds. Implementation: - A new library, `llvm/Support/Jobserver`, is added to provide a platform-agnostic client for the jobserver protocol, with backends for Unix (FIFO) and Windows (semaphores). - `llvm/Support/ThreadPool` and `llvm/Support/Parallel` are updated with a `jobserver_concurrency` strategy to integrate this logic. - The Clang driver and linker-wrapper are modified to recognize the 'jobserver' argument and enable the new thread pool strategy. - New unit and integration tests are added to validate the feature.
The Ada front end can emit somewhat complicated DWARF expressions for the offset of a field. While working in this area I found that I needed DW_OP_rot (to implement a branch-free computation -- it looked more difficult to add support for branching); and DW_OP_neg and DW_OP_abs (just basic functionality).
When using information from dereferenceable assumptions, we need to make sure that the memory is not freed between the assume and the specified context instruction. Instead of just checking canBeFreed, check if there any calls that may free between the assume and the context instruction. This patch introduces a willNotFreeBetween to check for calls that may free between an assume and a context instructions, to also be used in llvm#161255. PR: llvm#161725
Add support for the standalone OpenMP tile construct: ```f90 !$omp tile sizes(...) DO i = 1, 100 ... ``` This is complementary to llvm#143715 which added support for the tile construct as part of another loop-associated construct such as worksharing-loop, distribute, etc.
Now that llvm#161007 will attempt to fold this back to ADD(x,x) in X86FixupInstTunings, we can more aggressively create X86ISD::VSHLI nodes to avoid missed optimisations due to oneuse limits, avoids unnecessary freezes and allows AVX512 to fold to mi memory folding variants. I've currently limited SSE targets to cases where ADD is the only user of x to prevent extra moves - AVX shift patterns benefit from breaking the ADD+ADD+ADD chains into shifts, but its not so beneficial on SSE with the extra moves.
…9507) The transformation pattern is identical to the uint_to_fp conversion from v32i1 to v32f32.
…lvm#161666) This flag is really convinient in most cases. It's easy to figure out what value to pass for most cases. However, it can sometimes match too many times, like for template functions that has non-decuded (aka. explicitly specified) template parameters - because they don't appear in the parameter list, thus they are not accounted for in the current logic. It would be nice to improve `getFunctionName` but I'd say to just settle on using USRs. So this PR enables passing USRs to the flag, while keeping previous behavior.
Also remove the gratuitous spaces after "," that break strict CSV compliance. As the debug function name does not uniquely identify an entry point, add the main-TU name and the USR values for each entry point snapshot to reduce the likelihood of collisions between declarations across large projects. While adding a filename to each row increases the file size substantially, the difference in size for the compressed is acceptable. I evaluated it on our set of 200+ open source C and C++ projects with 3M entry points, and got the following results when adding these two columns: - Raw CSV file increased from 530MB to 1.1GB - Compressed file (XZ) increased from 54 MB to 78 MB -- CPP-7098 --------- Co-authored-by: Balazs Benics <[email protected]>
…checkers (llvm#161664) Previously, when using `-analyze-function` to target a specific function, the analyzer would incorrectly report "Every top-level function was skipped" even when the function was successfully analyzed by syntax-only checkers. This happened because `NumFunctionsAnalyzed` only counted path-sensitive analysis, not syntax-only analysis. The misuse detection logic would see 0 functions analyzed and incorrectly conclude the function wasn't found.
revert this patch due to failure in unittests/Support, e.g. https://lab.llvm.org/buildbot/#/builders/33/builds/24178/steps/6/logs/FAIL__LLVM-Unit__SupportTests_61 This reverts commit ffc503e.
…atch legalized types (llvm#161802) When running with `-debug`, print a note when the replacement types (during a `ConversionPatternRewriter::replaceOp`) do not match the legalized types of the current type converter. That's not an API violation, but it could indicate a bug in user code. Example output: ``` [dialect-conversion:1] ** Replace : 'test.multiple_1_to_n_replacement'(0x56b745f99470) [dialect-conversion:1] Note: Replacing op result of type f16 with value(s) of type (f16, f16), but the legalized type(s) is/are (f16) ```
… msvc (llvm#161811) Clang 20 (and early 21 versions; let's hope it can be fixed before the later versions before such versions become relevant for libcxx CI) have got an issue with its intrinsics headers, where they use unreserved names, that users are allowed to override. See llvm#161808 for the issue report. This only crops up in the MSVC build configurations, as recent versions of some MSVC/UCRT headers include `<intrin.h>`, which ends up pulling in most intrinsics headers, exposing this issue in the Clang headers. This should unblock llvm#161736 from being merged.
The i16/i32 shuffle intrinsics (`pshufw`, `pshuflw`, `pshufhw`, `pshufd`) currently cannot be used in constant expressions. This patch adds support in both bytecode interpreter (InterpBuiltin.cpp) and constant evaluator (ExprConstant.cpp) for pshuf intrinsics, enabling their use in constant expressions. ## Intrinsics covered - `_mm_shuffle_pi16` (MMX `pshufw`) - `_mm_shufflelo_epi16` / `_mm_shufflehi_epi16` - `_mm_shuffle_epi32` - Their AVX2/AVX512 vector-width variants - Masked and maskz forms (handled indirectly via `__builtin_ia32_select*`) Fixes llvm#156611
Replace mul and mul_u ops with a neg operation if their second operand is a splat value -1. Apply the optimization also for mul_u ops if their first operand is a splat value -1 due to their commutativity.
``` /home/david.spickett/llvm-project/llvm/lib/Analysis/HashRecognize.cpp:100:54: warning: comparison of integers of different signs: 'typename iterator_traits<ilist_iterator_w_bits<node_options<Instruction, true, false, void, true, BasicBlock>, false, false>>::difference_type' (aka 'int') and 'size_type' (aka 'unsigned int') [-Wsign-compare] 100 | return std::distance(Latch->begin(), Latch->end()) != Visited.size(); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~ ``` By using Latch->size() instead.
This should be always on. Fixes SWDEV-555931.
…m#161786) Tolerate setting negative values in tablegen, and store them as a saturated uint8_t value. This will allow naive uses of the copy cost to directly add it as a cost without considering the degenerate negative case. The degenerate negative cases are only used in InstrEmitter / DAG scheduling, so leave the special case processing there. There are also fixmes about this system already there. This is the expedient fix for an out of tree target regression after llvm#160084. Currently targets can set a negative copy cost to mark copies as "impossible". However essentially all the in-tree uses only uses this for non-allocatable condition registers. We probably should replace the InstrEmitter/DAG scheduler uses with a more direct check for a copyable register but that has test changes.
Some of dependency opcodes for G_IS_FPCLASS are now supported. This patch adds lowering for G_IS_FPCLASS.
Test is updated for GISEL Run separately Test PR.