Skip to content

Commit f435930

Browse files
authored
[X86] combineVectorSizedSetCCEquality - allow 256/512-bit vector icmp_ne/eq zero comparisons (#163373)
We avoid creating vector movmsk/ptest comparisons with zero if we can just use scalar OR instead, but this doesn't make sense for 256-bit or larger vectors which creates a more complex OR chain. This more closely matches what we do for icmp_ne/eq against non-zero values. I'm hoping that we can eventually allow even larger vectors to be handled with a OR/AND chains - but for now this just allows us to handle legal 256/512-bit vector widths.
1 parent b2797d9 commit f435930

File tree

2 files changed

+42
-25
lines changed

2 files changed

+42
-25
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22866,7 +22866,7 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
2286622866
// be generated by the memcmp expansion pass with oversized integer compares
2286722867
// (see PR33325).
2286822868
bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
22869-
if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
22869+
if (isNullConstant(Y) && OpSize == 128 && !IsOrXorXorTreeCCZero)
2287022870
return SDValue();
2287122871

2287222872
// Don't perform this combine if constructing the vector will be expensive.

llvm/test/CodeGen/X86/setcc-wide-types.ll

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,15 +1493,23 @@ define i1 @allbits_i128_load_arg(ptr %w) {
14931493
}
14941494

14951495
define i1 @anybits_i256_load_arg(ptr %w) {
1496-
; ANY-LABEL: anybits_i256_load_arg:
1497-
; ANY: # %bb.0:
1498-
; ANY-NEXT: movq (%rdi), %rax
1499-
; ANY-NEXT: movq 8(%rdi), %rcx
1500-
; ANY-NEXT: orq 24(%rdi), %rcx
1501-
; ANY-NEXT: orq 16(%rdi), %rax
1502-
; ANY-NEXT: orq %rcx, %rax
1503-
; ANY-NEXT: setne %al
1504-
; ANY-NEXT: retq
1496+
; SSE-LABEL: anybits_i256_load_arg:
1497+
; SSE: # %bb.0:
1498+
; SSE-NEXT: movq (%rdi), %rax
1499+
; SSE-NEXT: movq 8(%rdi), %rcx
1500+
; SSE-NEXT: orq 24(%rdi), %rcx
1501+
; SSE-NEXT: orq 16(%rdi), %rax
1502+
; SSE-NEXT: orq %rcx, %rax
1503+
; SSE-NEXT: setne %al
1504+
; SSE-NEXT: retq
1505+
;
1506+
; AVXANY-LABEL: anybits_i256_load_arg:
1507+
; AVXANY: # %bb.0:
1508+
; AVXANY-NEXT: vmovdqu (%rdi), %ymm0
1509+
; AVXANY-NEXT: vptest %ymm0, %ymm0
1510+
; AVXANY-NEXT: setne %al
1511+
; AVXANY-NEXT: vzeroupper
1512+
; AVXANY-NEXT: retq
15051513
%ld = load i256, ptr %w
15061514
%cmp = icmp ne i256 %ld, 0
15071515
ret i1 %cmp
@@ -1552,21 +1560,30 @@ define i1 @allbits_i256_load_arg(ptr %w) {
15521560
}
15531561

15541562
define i1 @anybits_i512_load_arg(ptr %w) {
1555-
; ANY-LABEL: anybits_i512_load_arg:
1556-
; ANY: # %bb.0:
1557-
; ANY-NEXT: movq 16(%rdi), %rax
1558-
; ANY-NEXT: movq (%rdi), %rcx
1559-
; ANY-NEXT: movq 8(%rdi), %rdx
1560-
; ANY-NEXT: movq 24(%rdi), %rsi
1561-
; ANY-NEXT: orq 56(%rdi), %rsi
1562-
; ANY-NEXT: orq 40(%rdi), %rdx
1563-
; ANY-NEXT: orq %rsi, %rdx
1564-
; ANY-NEXT: orq 48(%rdi), %rax
1565-
; ANY-NEXT: orq 32(%rdi), %rcx
1566-
; ANY-NEXT: orq %rax, %rcx
1567-
; ANY-NEXT: orq %rdx, %rcx
1568-
; ANY-NEXT: setne %al
1569-
; ANY-NEXT: retq
1563+
; NO512-LABEL: anybits_i512_load_arg:
1564+
; NO512: # %bb.0:
1565+
; NO512-NEXT: movq 16(%rdi), %rax
1566+
; NO512-NEXT: movq (%rdi), %rcx
1567+
; NO512-NEXT: movq 8(%rdi), %rdx
1568+
; NO512-NEXT: movq 24(%rdi), %rsi
1569+
; NO512-NEXT: orq 56(%rdi), %rsi
1570+
; NO512-NEXT: orq 40(%rdi), %rdx
1571+
; NO512-NEXT: orq %rsi, %rdx
1572+
; NO512-NEXT: orq 48(%rdi), %rax
1573+
; NO512-NEXT: orq 32(%rdi), %rcx
1574+
; NO512-NEXT: orq %rax, %rcx
1575+
; NO512-NEXT: orq %rdx, %rcx
1576+
; NO512-NEXT: setne %al
1577+
; NO512-NEXT: retq
1578+
;
1579+
; AVX512-LABEL: anybits_i512_load_arg:
1580+
; AVX512: # %bb.0:
1581+
; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
1582+
; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0
1583+
; AVX512-NEXT: kortestw %k0, %k0
1584+
; AVX512-NEXT: setne %al
1585+
; AVX512-NEXT: vzeroupper
1586+
; AVX512-NEXT: retq
15701587
%ld = load i512, ptr %w
15711588
%cmp = icmp ne i512 %ld, 0
15721589
ret i1 %cmp

0 commit comments

Comments
 (0)