llvm · klensy · May 29, 2024 · May 29, 2024 · May 29, 2024 · May 29, 2024
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll b/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll
@@ -293,7 +293,7 @@ define i32 @store_with_pointer_phi_incoming_phi(ptr %A, ptr %B, ptr %C, i1 %c.0,
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ;
-; CHECK-EMPTY
+; CHECK-EMPTY:
 entry:
   br label %loop.header
 
@@ -376,7 +376,7 @@ define i32 @store_with_pointer_phi_incoming_phi_irreducible_cycle(ptr %A, ptr %B
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ;
-; CHECK-EMPTY
+; CHECK-EMPTY:
 entry:
   br label %loop.header
 

diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/branch-outside.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/branch-outside.ll
@@ -1,6 +1,6 @@
 ; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
 
-; CHECK=LABEL: UniformityInfo for function 'basic':
+; CHECK-LABEL: UniformityInfo for function 'basic':
 ; CHECK: CYCLES ASSSUMED DIVERGENT:
 ; CHECK:   depth=1: entries(P T) Q
 define amdgpu_kernel void @basic(i32 %a, i32 %b, i32 %c) {
@@ -37,7 +37,7 @@ exit:
   ret void
 }
 
-; CHECK=LABEL: UniformityInfo for function 'nested':
+; CHECK-LABEL: UniformityInfo for function 'nested':
 ; CHECK: CYCLES ASSSUMED DIVERGENT:
 ; CHECK:  depth=1: entries(P T) Q A C B
 define amdgpu_kernel void @nested(i32 %a, i32 %b, i32 %c) {

diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll
@@ -1,6 +1,6 @@
 ; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
 
-; CHECK=LABEL: UniformityInfo for function 'basic':
+; CHECK-LABEL: UniformityInfo for function 'basic':
 ; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
 ; CHECK: CYCLES WITH DIVERGENT EXIT:
 ; CHECK:   depth=1: entries(P T) Q

diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/reducible-headers.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/reducible-headers.ll
@@ -31,7 +31,7 @@
 ; at P should not be marked divergent.
 
 define amdgpu_kernel void @nested_irreducible(i32 %a, i32 %b, i32 %c) {
-; CHECK=LABEL: UniformityInfo for function 'nested_irreducible':
+; CHECK-LABEL: UniformityInfo for function 'nested_irreducible':
 ; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
 ; CHECK: CYCLES WITH DIVERGENT EXIT:
 ; CHECK-DAG:   depth=2: entries(P T) R Q
@@ -118,7 +118,7 @@ exit:
 ; Thus, any PHI at P should not be marked divergent.
 
 define amdgpu_kernel void @header_label_1(i32 %a, i32 %b, i32 %c) {
-; CHECK=LABEL: UniformityInfo for function 'header_label_1':
+; CHECK-LABEL: UniformityInfo for function 'header_label_1':
 ; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
 ; CHECK: CYCLES WITH DIVERGENT EXIT:
 ; CHECK:  depth=1: entries(H) Q P U T R

diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/temporal_diverge.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/temporal_diverge.ll
@@ -169,7 +169,7 @@ X:
   br label %G
 
 G:
-; C HECK: DIVERGENT: %div.user =
+; CHECK: DIVERGENT: %div.user =
   %div.user = add i32 %uni.inc, 5
   br i1 %uni.cond, label %G, label %Y
 ; CHECK: DIVERGENT: %div.user =

diff --git a/llvm/test/Bitcode/convergence-control.ll b/llvm/test/Bitcode/convergence-control.ll
@@ -18,7 +18,7 @@ B:
 C:
   ; CHECK-LABEL: C:
   ; CHECK: [[C:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[B]]) ]
-  ; CHEC K: call void @f() [ "convergencectrl"(token [[C]]) ]
+  ; CHECK: call void @f() [ "convergencectrl"(token [[C]]) ]
   ;
   %c = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %b) ]
   call void @f() [ "convergencectrl"(token %c) ]

diff --git a/llvm/test/CodeGen/AArch64/aarch64-bf16-ldst-intrinsics.ll b/llvm/test/CodeGen/AArch64/aarch64-bf16-ldst-intrinsics.ll
@@ -320,8 +320,8 @@ declare { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3lane.
 define %struct.bfloat16x8x3_t @test_vld3q_lane_bf16(ptr %ptr, [3 x <8 x bfloat>] %src.coerce) local_unnamed_addr nounwind {
 ; CHECK-LABEL: test_vld3q_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECKT:    ld3 { v0.h, v1.h, v2.h }[7], [x0]
-; CHECKT:    ret
+; CHECK:    ld3 { v0.h, v1.h, v2.h }[7], [x0]
+; CHECK:    ret
 entry:
   %src.coerce.fca.0.extract = extractvalue [3 x <8 x bfloat>] %src.coerce, 0
   %src.coerce.fca.1.extract = extractvalue [3 x <8 x bfloat>] %src.coerce, 1

diff --git a/llvm/test/CodeGen/AArch64/aarch64-mulv.ll b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=aarch64 -aarch64-enable-sink-fold=true -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
 ; RUN: llc -mtriple=aarch64 -aarch64-enable-sink-fold=true -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
-; CHECK_GI:        warning: Instruction selection used fallback path for mulv_v3i64
+; CHECK-GI:        warning: Instruction selection used fallback path for mulv_v3i64
 
 declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>)
 declare i8 @llvm.vector.reduce.mul.v3i8(<3 x i8>)

diff --git a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
@@ -8,7 +8,7 @@ define i32 @fct(i32 %i1, i32 %i2) {
 ; Sign extension is used more than once, thus it should not be folded.
 ; CodeGenPrepare is not sharing sext across uses, thus this is folded because
 ; of that.
-; _CHECK-NOT: , sxtw]
+; CHECK-NOT: , sxtw]
 entry:
   %idxprom = sext i32 %i1 to i64
   %0 = load ptr, ptr @block, align 8

diff --git a/llvm/test/CodeGen/AArch64/cxx-tlscc.ll b/llvm/test/CodeGen/AArch64/cxx-tlscc.ll
@@ -46,7 +46,7 @@ __tls_init.exit:
 ; CHECK-NOT: stp x20, x19
 ; FIXME: The splitting logic in the register allocator fails to split along
 ;        control flow here, we used to get this right by accident before...
-; CHECK-NOTXX: stp x14, x13
+; COM: CHECK-NOT: stp x14, x13
 ; CHECK-NOT: stp x12, x11
 ; CHECK-NOT: stp x10, x9
 ; CHECK-NOT: stp x8, x7
@@ -65,7 +65,7 @@ __tls_init.exit:
 ; CHECK-NOT: ldp x8, x7
 ; CHECK-NOT: ldp x10, x9
 ; CHECK-NOT: ldp x12, x11
-; CHECK-NOTXX: ldp x14, x13
+; COM: CHECK-NOT: ldp x14, x13
 ; CHECK-NOT: ldp x20, x19
 ; CHECK-NOT: ldp d1, d0
 ; CHECK-NOT: ldp d3, d2

diff --git a/llvm/test/CodeGen/AArch64/fp16-fmla.ll b/llvm/test/CodeGen/AArch64/fp16-fmla.ll
@@ -84,11 +84,11 @@ entry:
 
 define <4 x half> @test_FMLAv4i16_indexed_OP1(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_FMLAv4i16_indexed_OP1:
-; CHECK-FIXME: Currently LLVM produces inefficient code:
+; FIXME: Currently LLVM produces inefficient code:
 ; CHECK: mul
 ; CHECK: fadd
-; CHECK-FIXME: It should instead produce the following instruction:
-; CHECK-FIXME: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; FIXME: It should instead produce the following instruction:
+; COM: CHECK: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %mul = mul <4 x i16> %c, %b
   %m = bitcast <4 x i16> %mul to <4 x half>
@@ -98,11 +98,11 @@ entry:
 
 define <4 x half> @test_FMLAv4i16_indexed_OP2(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_FMLAv4i16_indexed_OP2:
-; CHECK-FIXME: Currently LLVM produces inefficient code:
+; FIXME: Currently LLVM produces inefficient code:
 ; CHECK: mul
 ; CHECK: fadd
-; CHECK-FIXME: It should instead produce the following instruction:
-; CHECK-FIXME: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; FIXME: It should instead produce the following instruction:
+; COM: CHECK: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %mul = mul <4 x i16> %c, %b
   %m = bitcast <4 x i16> %mul to <4 x half>
@@ -112,11 +112,11 @@ entry:
 
 define <8 x half> @test_FMLAv8i16_indexed_OP1(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
 ; CHECK-LABEL: test_FMLAv8i16_indexed_OP1:
-; CHECK-FIXME: Currently LLVM produces inefficient code:
+; FIXME: Currently LLVM produces inefficient code:
 ; CHECK: mul
 ; CHECK: fadd
-; CHECK-FIXME: It should instead produce the following instruction:
-; CHECK-FIXME: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; FIXME: It should instead produce the following instruction:
+; COM: CHECK: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %mul = mul <8 x i16> %c, %b
   %m = bitcast <8 x i16> %mul to <8 x half>
@@ -126,11 +126,11 @@ entry:
 
 define <8 x half> @test_FMLAv8i16_indexed_OP2(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
 ; CHECK-LABEL: test_FMLAv8i16_indexed_OP2:
-; CHECK-FIXME: Currently LLVM produces inefficient code:
+; FIXME: Currently LLVM produces inefficient code:
 ; CHECK: mul
 ; CHECK: fadd
-; CHECK-FIXME: It should instead produce the following instruction:
-; CHECK-FIXME: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; FIXME: It should instead produce the following instruction:
+; COM: CHECK: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %mul = mul <8 x i16> %c, %b
   %m = bitcast <8 x i16> %mul to <8 x half>
@@ -178,11 +178,11 @@ entry:
 
 define <4 x half> @test_FMLSv4i16_indexed_OP2(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_FMLSv4i16_indexed_OP2:
-; CHECK-FIXME: Currently LLVM produces inefficient code:
+; FIXME: Currently LLVM produces inefficient code:
 ; CHECK: mul
 ; CHECK: fsub
-; CHECK-FIXME: It should instead produce the following instruction:
-; CHECK-FIXME: fmls    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; FIXME: It should instead produce the following instruction:
+; COM: CHECK: fmls    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %mul = mul <4 x i16> %c, %b
   %m = bitcast <4 x i16> %mul to <4 x half>
@@ -192,12 +192,12 @@ entry:
 
 define <8 x half> @test_FMLSv8i16_indexed_OP1(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
 ; CHECK-LABEL: test_FMLSv8i16_indexed_OP1:
-; CHECK-FIXME: Currently LLVM produces inefficient code:
+; FIXME: Currently LLVM produces inefficient code:
 ; CHECK: mul
 ; CHECK: fsub
-; CHECK-FIXME: It should instead produce the following instruction:
-; CHECK-FIXME: fneg    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-; CHECK-FIXME: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; FIXME: It should instead produce the following instruction:
+; COM: CHECK: fneg    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; COM: CHECK: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %mul = mul <8 x i16> %c, %b
   %m = bitcast <8 x i16> %mul to <8 x half>
@@ -207,11 +207,11 @@ entry:
 
 define <8 x half> @test_FMLSv8i16_indexed_OP2(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
 ; CHECK-LABEL: test_FMLSv8i16_indexed_OP2:
-; CHECK-FIXME: Currently LLVM produces inefficient code:
+; FIXME: Currently LLVM produces inefficient code:
 ; CHECK: mul
 ; CHECK: fsub
-; CHECK-FIXME: It should instead produce the following instruction:
-; CHECK-FIXME: fmls    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; FIXME: It should instead produce the following instruction:
+; COM: CHECK: fmls    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %mul = mul <8 x i16> %c, %b
   %m = bitcast <8 x i16> %mul to <8 x half>

diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
@@ -83,11 +83,11 @@ entry:
   ret double 0x400921FB54442D18
 
 ; CHECK-LABEL: litf:
-; CHECK-DONT:      adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
-; CHECK-DONT-NEXT: ldr  {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
-; CHECK-FUSE:      mov  [[R:x[0-9]+]], #11544
-; CHECK-FUSE:      movk [[R]], #21572, lsl #16
-; CHECK-FUSE:      movk [[R]], #8699, lsl #32
-; CHECK-FUSE:      movk [[R]], #16393, lsl #48
-; CHECK-FUSE:      fmov {{d[0-9]+}}, [[R]]
+; CHECKDONT:      adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
+; CHECKDONT-NEXT: ldr  {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
+; CHECKFUSE:      mov  [[R:x[0-9]+]], #11544
+; CHECKFUSE:      movk [[R]], #21572, lsl #16
+; CHECKFUSE:      movk [[R]], #8699, lsl #32
+; CHECKFUSE:      movk [[R]], #16393, lsl #48
+; CHECKFUSE:      fmov {{d[0-9]+}}, [[R]]
 }
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-sls.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-sls.ll
@@ -96,7 +96,7 @@ entry:
 ; CHECK-NEXT:      {{^[ \t]+b }}
 ; CHECK-NEXT:      //NO_APP
      ; For direct branches, no mitigation is needed.
-; ISDDSB-NOT: dsb sy
+; ISBDSB-NOT: dsb sy
 ; SB-NOT:     {{ sb$}}
 
 asm.fallthrough:               ; preds = %entry

diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention.ll
@@ -149,7 +149,7 @@ define [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1([2 x <vscale x 4 x i1>
 }
 
 ; Test that a scalable predicate argument in [1 x <vscale x 32 x i1>] type is assigned to two P registers.
-; CHECK-LABLE: name: sve_signature_pred_1xv32i1
+; CHECK-LABEL: name: sve_signature_pred_1xv32i1
 ; CHECK: [[RES1:%[0-9]+]]:ppr = COPY $p3
 ; CHECK: [[RES0:%[0-9]+]]:ppr = COPY $p2
 ; CHECK: $p0 = COPY [[RES0]]
@@ -160,7 +160,7 @@ define [1 x <vscale x 32 x i1>] @sve_signature_pred_1xv32i1([1 x <vscale x 32 x
 }
 
 ; Test that a scalable predicate argument in [2 x <vscale x 32 x i1>] type is assigned to four P registers.
-; CHECK-LABLE: name: sve_signature_pred_2xv32i1
+; CHECK-LABEL: name: sve_signature_pred_2xv32i1
 ; CHECK: [[RES3:%[0-9]+]]:ppr = COPY $p3
 ; CHECK: [[RES2:%[0-9]+]]:ppr = COPY $p2
 ; CHECK: [[RES1:%[0-9]+]]:ppr = COPY $p1

diff --git a/llvm/test/CodeGen/AArch64/swift-error.ll b/llvm/test/CodeGen/AArch64/swift-error.ll
@@ -10,7 +10,7 @@ entry:
   ret void
 }
 
-; CHEECK-LABEL: g
+; CHECK-LABEL: g
 ; CHECK: str x30, [sp, #-16]!
 ; CHECK: bl f
 ; CHECK: ldr x30, [sp], #16

diff --git a/llvm/test/CodeGen/AMDGPU/and.ll b/llvm/test/CodeGen/AMDGPU/and.ll
@@ -198,9 +198,9 @@ define amdgpu_kernel void @s_and_constant_i64(ptr addrspace(1) %out, i64 %a) {
 }
 
 ; FUNC-LABEL: {{^}}s_and_multi_use_constant_i64:
-; XSI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x80000{{$}}
-; XSI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0x80{{$}}
-; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s[[[KLO]]:[[KHI]]]
+; SI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x80000{{$}}
+; SI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0x80{{$}}
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s[[[KLO]]:[[KHI]]]
 define amdgpu_kernel void @s_and_multi_use_constant_i64(ptr addrspace(1) %out, i64 %a, i64 %b) {
   %and0 = and i64 %a, 549756338176
   %and1 = and i64 %b, 549756338176
@@ -398,7 +398,7 @@ define amdgpu_kernel void @s_and_inline_imm_1_i64(ptr addrspace(1) %out, ptr add
 }
 
 ; FUNC-LABEL: {{^}}s_and_inline_imm_1.0_i64
-; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0
 
 ; SI: s_load_dword
 ; SI: s_load_dwordx2
@@ -413,7 +413,7 @@ define amdgpu_kernel void @s_and_inline_imm_1.0_i64(ptr addrspace(1) %out, ptr a
 }
 
 ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_1.0_i64
-; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1.0
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1.0
 
 ; SI: s_load_dword
 ; SI: s_load_dwordx2
@@ -428,7 +428,7 @@ define amdgpu_kernel void @s_and_inline_imm_neg_1.0_i64(ptr addrspace(1) %out, p
 }
 
 ; FUNC-LABEL: {{^}}s_and_inline_imm_0.5_i64
-; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0.5
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0.5
 
 ; SI: s_load_dword
 ; SI: s_load_dwordx2
@@ -443,7 +443,7 @@ define amdgpu_kernel void @s_and_inline_imm_0.5_i64(ptr addrspace(1) %out, ptr a
 }
 
 ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_0.5_i64:
-; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -0.5
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -0.5
 
 ; SI: s_load_dword
 ; SI: s_load_dwordx2
@@ -484,7 +484,7 @@ define amdgpu_kernel void @s_and_inline_imm_neg_2.0_i64(ptr addrspace(1) %out, p
 }
 
 ; FUNC-LABEL: {{^}}s_and_inline_imm_4.0_i64:
-; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 4.0
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 4.0
 
 ; SI: s_load_dword
 ; SI: s_load_dwordx2
@@ -499,7 +499,7 @@ define amdgpu_kernel void @s_and_inline_imm_4.0_i64(ptr addrspace(1) %out, ptr a
 }
 
 ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_4.0_i64:
-; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -4.0
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -4.0
 
 ; SI: s_load_dword
 ; SI: s_load_dwordx2

diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
@@ -49,7 +49,7 @@ define amdgpu_kernel void @max_10_sgprs() #0 {
 ; features when the number of registers is frozen), this ends up using
 ; more than expected.
 
-; XALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs:
+; ALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs:
 ; XTOSGPR: SGPRBlocks: 1
 ; XTOSGPR: NumSGPRsForWavesPerEU: 16
 
@@ -87,13 +87,13 @@ define amdgpu_kernel void @max_10_sgprs() #0 {
 ;}
 
 ; The following test is commented out for now; http://llvm.org/PR31230
-; XALL-LABEL: max_12_sgprs_12_input_sgprs{{$}}
+; COM: ALL-LABEL: max_12_sgprs_12_input_sgprs{{$}}
 ; ; Make sure copies for input buffer are not clobbered. This requires
 ; ; swapping the order the registers are copied from what normally
 ; ; happens.
 
-; XALL: SGPRBlocks: 2
-; XALL: NumSGPRsForWavesPerEU: 18
+; COM: ALL: SGPRBlocks: 2
+; COM: ALL: NumSGPRsForWavesPerEU: 18
 ;define amdgpu_kernel void @max_12_sgprs_12_input_sgprs(ptr addrspace(1) %out1,
 ;                                        ptr addrspace(1) %out2,
 ;                                        ptr addrspace(1) %out3,