[X86] fixup-bw-inst.ll - regenerate test checks to simplify diff for llvm#123787

RKSimon · RKSimon · commit 58be6fd1b4f6 · 2025-01-22T10:47:09.000Z
diff --git a/llvm/test/CodeGen/X86/fixup-bw-inst.ll b/llvm/test/CodeGen/X86/fixup-bw-inst.ll
@@ -1,7 +1,6 @@
-; RUN: llc -fixup-byte-word-insts=1 < %s | \
-; RUN: FileCheck -check-prefix CHECK -check-prefix BWON %s
-; RUN: llc -fixup-byte-word-insts=0 < %s | \
-; RUN: FileCheck -check-prefix CHECK -check-prefix BWOFF %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -fixup-byte-word-insts=1 < %s | FileCheck %s -check-prefix=BWON
+; RUN: llc -fixup-byte-word-insts=0 < %s | FileCheck %s -check-prefix=BWOFF
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -11,22 +10,40 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; This has byte loads interspersed with byte stores, in a single
 ; basic-block loop.  The upper portion should be dead, so the movb loads
 ; should have been changed into movzbl instead.
-; CHECK-LABEL: foo1
-; load:
-; BWON:  movzbl
-; BWOFF: movb
-; store:
-; CHECK: movb
-; load:
-; BWON: movzbl
-; BWOFF: movb
-; store:
-; CHECK: movb
-; CHECK: ret
-define void @foo1(i32 %count,
-                  ptr noalias nocapture %q,
-                  ptr noalias nocapture %p)
-                    nounwind uwtable noinline ssp {
+define void @foo1(i32 %count, ptr noalias nocapture %q, ptr noalias nocapture %p) nounwind uwtable noinline ssp {
+; BWON-LABEL: foo1:
+; BWON:       ## %bb.0:
+; BWON-NEXT:    testl %edi, %edi
+; BWON-NEXT:    jle LBB0_2
+; BWON-NEXT:    .p2align 4
+; BWON-NEXT:  LBB0_1: ## %a4
+; BWON-NEXT:    ## =>This Inner Loop Header: Depth=1
+; BWON-NEXT:    movzbl (%rsi), %eax
+; BWON-NEXT:    movb %al, (%rdx)
+; BWON-NEXT:    movzbl 1(%rsi), %eax
+; BWON-NEXT:    movb %al, 1(%rdx)
+; BWON-NEXT:    addq $8, %rdx
+; BWON-NEXT:    decl %edi
+; BWON-NEXT:    jne LBB0_1
+; BWON-NEXT:  LBB0_2: ## %._crit_edge
+; BWON-NEXT:    retq
+;
+; BWOFF-LABEL: foo1:
+; BWOFF:       ## %bb.0:
+; BWOFF-NEXT:    testl %edi, %edi
+; BWOFF-NEXT:    jle LBB0_2
+; BWOFF-NEXT:    .p2align 4
+; BWOFF-NEXT:  LBB0_1: ## %a4
+; BWOFF-NEXT:    ## =>This Inner Loop Header: Depth=1
+; BWOFF-NEXT:    movb (%rsi), %al
+; BWOFF-NEXT:    movb %al, (%rdx)
+; BWOFF-NEXT:    movb 1(%rsi), %al
+; BWOFF-NEXT:    movb %al, 1(%rdx)
+; BWOFF-NEXT:    addq $8, %rdx
+; BWOFF-NEXT:    decl %edi
+; BWOFF-NEXT:    jne LBB0_1
+; BWOFF-NEXT:  LBB0_2: ## %._crit_edge
+; BWOFF-NEXT:    retq
   %1 = icmp sgt i32 %count, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
@@ -56,22 +73,40 @@ a4:                                       ; preds = %3, %.lr.ph
 ; This has word loads interspersed with word stores.
 ; The upper portion should be dead, so the movw loads should have
 ; been changed into movzwl instead.
-; CHECK-LABEL: foo2
-; load:
-; BWON:  movzwl
-; BWOFF: movw
-; store:
-; CHECK: movw
-; load:
-; BWON:  movzwl
-; BWOFF: movw
-; store:
-; CHECK: movw
-; CHECK: ret
-define void @foo2(i32 %count,
-                  ptr noalias nocapture %q,
-                  ptr noalias nocapture %p)
-                    nounwind uwtable noinline ssp {
+define void @foo2(i32 %count, ptr noalias nocapture %q, ptr noalias nocapture %p) nounwind uwtable noinline ssp {
+; BWON-LABEL: foo2:
+; BWON:       ## %bb.0:
+; BWON-NEXT:    testl %edi, %edi
+; BWON-NEXT:    jle LBB1_2
+; BWON-NEXT:    .p2align 4
+; BWON-NEXT:  LBB1_1: ## %a4
+; BWON-NEXT:    ## =>This Inner Loop Header: Depth=1
+; BWON-NEXT:    movzwl (%rsi), %eax
+; BWON-NEXT:    movw %ax, (%rdx)
+; BWON-NEXT:    movzwl 2(%rsi), %eax
+; BWON-NEXT:    movw %ax, 2(%rdx)
+; BWON-NEXT:    addq $16, %rdx
+; BWON-NEXT:    decl %edi
+; BWON-NEXT:    jne LBB1_1
+; BWON-NEXT:  LBB1_2: ## %._crit_edge
+; BWON-NEXT:    retq
+;
+; BWOFF-LABEL: foo2:
+; BWOFF:       ## %bb.0:
+; BWOFF-NEXT:    testl %edi, %edi
+; BWOFF-NEXT:    jle LBB1_2
+; BWOFF-NEXT:    .p2align 4
+; BWOFF-NEXT:  LBB1_1: ## %a4
+; BWOFF-NEXT:    ## =>This Inner Loop Header: Depth=1
+; BWOFF-NEXT:    movw (%rsi), %ax
+; BWOFF-NEXT:    movw %ax, (%rdx)
+; BWOFF-NEXT:    movw 2(%rsi), %ax
+; BWOFF-NEXT:    movw %ax, 2(%rdx)
+; BWOFF-NEXT:    addq $16, %rdx
+; BWOFF-NEXT:    decl %edi
+; BWOFF-NEXT:    jne LBB1_1
+; BWOFF-NEXT:  LBB1_2: ## %._crit_edge
+; BWOFF-NEXT:    retq
   %1 = icmp sgt i32 %count, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
@@ -98,11 +133,18 @@ a4:                                       ; preds = %3, %.lr.ph
 
 ; This test contains nothing but a simple byte load and store.
 ; movb encodes smaller, but we use movzbl for the load for better perf.
-; CHECK-LABEL: foo3:
-; BWON:  movzbl
-; BWOFF: movb
-; CHECK: movb
 define void @foo3(ptr%dst, ptr%src) {
+; BWON-LABEL: foo3:
+; BWON:       ## %bb.0:
+; BWON-NEXT:    movzbl (%rsi), %eax
+; BWON-NEXT:    movb %al, (%rdi)
+; BWON-NEXT:    retq
+;
+; BWOFF-LABEL: foo3:
+; BWOFF:       ## %bb.0:
+; BWOFF-NEXT:    movb (%rsi), %al
+; BWOFF-NEXT:    movb %al, (%rdi)
+; BWOFF-NEXT:    retq
   %t0 = load i8, ptr%src, align 1
   store i8 %t0, ptr%dst, align 1
   ret void
@@ -111,11 +153,18 @@ define void @foo3(ptr%dst, ptr%src) {
 ; This test contains nothing but a simple word load and store.  Since
 ; movw and movzwl are the same size, we should always choose to use
 ; movzwl instead.
-; CHECK-LABEL: foo4:
-; BWON:  movzwl
-; BWOFF: movw
-; CHECK: movw
 define void @foo4(ptr%dst, ptr%src) {
+; BWON-LABEL: foo4:
+; BWON:       ## %bb.0:
+; BWON-NEXT:    movzwl (%rsi), %eax
+; BWON-NEXT:    movw %ax, (%rdi)
+; BWON-NEXT:    retq
+;
+; BWOFF-LABEL: foo4:
+; BWOFF:       ## %bb.0:
+; BWOFF-NEXT:    movw (%rsi), %ax
+; BWOFF-NEXT:    movw %ax, (%rdi)
+; BWOFF-NEXT:    retq
   %t0 = load i16, ptr%src, align 2
   store i16 %t0, ptr%dst, align 2
   ret void