From d144da63d0f5fb309ac868bb9e13b4d4ae1e882b Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Thu, 17 Apr 2025 17:37:39 +0000 Subject: [PATCH] [DAGCombiner] Fold and/or of NaN SETCC - tests follow up --- llvm/test/CodeGen/NVPTX/and-or-setcc.ll | 4 +- llvm/test/CodeGen/X86/and-or-setcc.ll | 118 ++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll index 21be9df94d553..6c3514c1ad946 100644 --- a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll +++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %} +; RUN: llc < %s | FileCheck %s +; RUN: %if ptxas %{ llc < %s | %ptxas-verify %} target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/CodeGen/X86/and-or-setcc.ll b/llvm/test/CodeGen/X86/and-or-setcc.ll index cb8ecca9348e6..a6a9362908811 100644 --- a/llvm/test/CodeGen/X86/and-or-setcc.ll +++ b/llvm/test/CodeGen/X86/and-or-setcc.ll @@ -55,3 +55,121 @@ define i1 @or_uno(float %a, float %b) { %e = or i1 %c, %d ret i1 %e } + +define <4 x i1> @and_ord_vec(<4 x float> %a, <4 x float> %b) { +; X86-LABEL: and_ord_vec: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fstps (%esp) # 4-byte Folded Spill +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fucompp +; X86-NEXT: fnstsw %ax +; X86-NEXT: fucompp +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: fnstsw %ax +; X86-NEXT: fucompp +; X86-NEXT: setnp %dh +; X86-NEXT: shlb $2, %dh +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: fnstsw %ax +; X86-NEXT: flds (%esp) # 4-byte Folded Reload +; X86-NEXT: fxch %st(1) +; X86-NEXT: fucompp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: setnp %dl +; X86-NEXT: shlb $3, %dl +; X86-NEXT: orb %dh, %dl +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setnp %dh +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setnp %al +; X86-NEXT: addb %al, %al +; X86-NEXT: orb %dh, %al +; X86-NEXT: orb %dl, %al +; X86-NEXT: movb %al, (%ecx) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: popl %ecx +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl $4 +; +; X64-LABEL: and_ord_vec: +; X64: # %bb.0: +; X64-NEXT: cmpordps %xmm1, %xmm0 +; X64-NEXT: retq + %c = fcmp ord <4 x float> %a, %a + %d = fcmp ord <4 x float> %b, %b + %e = and <4 x i1> %c, %d + ret <4 x i1> %e +} + +define <4 x i1> @or_uno_vec(<4 x float> %a, <4 x float> %b) { +; X86-LABEL: or_uno_vec: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fstps (%esp) # 4-byte Folded Spill +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fucompp +; X86-NEXT: fnstsw %ax +; X86-NEXT: fucompp +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: fnstsw %ax +; X86-NEXT: fucompp +; X86-NEXT: setp %dh +; X86-NEXT: shlb $2, %dh +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: fnstsw %ax +; X86-NEXT: flds (%esp) # 4-byte Folded Reload +; X86-NEXT: fxch %st(1) +; X86-NEXT: fucompp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: setp %dl +; X86-NEXT: shlb $3, %dl +; X86-NEXT: orb %dh, %dl +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %dh +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %al +; X86-NEXT: addb %al, %al +; X86-NEXT: orb %dh, %al +; X86-NEXT: orb %dl, %al +; X86-NEXT: movb %al, (%ecx) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: popl %ecx +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl $4 +; +; X64-LABEL: or_uno_vec: +; X64: # %bb.0: +; X64-NEXT: cmpunordps %xmm1, %xmm0 +; X64-NEXT: retq + %c = fcmp uno <4 x float> %a, %a + %d = fcmp uno <4 x float> %b, %b + %e = or <4 x i1> %c, %d + ret <4 x i1> %e +}