1- ; RUN: llc -fixup-byte-word-insts=1 < %s | \
2- ; RUN: FileCheck -check-prefix CHECK -check-prefix BWON %s
3- ; RUN: llc -fixup-byte-word-insts=0 < %s | \
4- ; RUN: FileCheck -check-prefix CHECK -check-prefix BWOFF %s
1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+ ; RUN: llc -fixup-byte-word-insts=1 < %s | FileCheck %s -check-prefix=BWON
3+ ; RUN: llc -fixup-byte-word-insts=0 < %s | FileCheck %s -check-prefix=BWOFF
54
65target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
76target triple = "x86_64-apple-macosx10.8.0"
@@ -11,22 +10,40 @@ target triple = "x86_64-apple-macosx10.8.0"
1110; This has byte loads interspersed with byte stores, in a single
1211; basic-block loop. The upper portion should be dead, so the movb loads
1312; should have been changed into movzbl instead.
14- ; CHECK-LABEL: foo1
15- ; load:
16- ; BWON: movzbl
17- ; BWOFF: movb
18- ; store:
19- ; CHECK: movb
20- ; load:
21- ; BWON: movzbl
22- ; BWOFF: movb
23- ; store:
24- ; CHECK: movb
25- ; CHECK: ret
26- define void @foo1 (i32 %count ,
27- ptr noalias nocapture %q ,
28- ptr noalias nocapture %p )
29- nounwind uwtable noinline ssp {
13+ define void @foo1 (i32 %count , ptr noalias nocapture %q , ptr noalias nocapture %p ) nounwind uwtable noinline ssp {
14+ ; BWON-LABEL: foo1:
15+ ; BWON: ## %bb.0:
16+ ; BWON-NEXT: testl %edi, %edi
17+ ; BWON-NEXT: jle LBB0_2
18+ ; BWON-NEXT: .p2align 4
19+ ; BWON-NEXT: LBB0_1: ## %a4
20+ ; BWON-NEXT: ## =>This Inner Loop Header: Depth=1
21+ ; BWON-NEXT: movzbl (%rsi), %eax
22+ ; BWON-NEXT: movb %al, (%rdx)
23+ ; BWON-NEXT: movzbl 1(%rsi), %eax
24+ ; BWON-NEXT: movb %al, 1(%rdx)
25+ ; BWON-NEXT: addq $8, %rdx
26+ ; BWON-NEXT: decl %edi
27+ ; BWON-NEXT: jne LBB0_1
28+ ; BWON-NEXT: LBB0_2: ## %._crit_edge
29+ ; BWON-NEXT: retq
30+ ;
31+ ; BWOFF-LABEL: foo1:
32+ ; BWOFF: ## %bb.0:
33+ ; BWOFF-NEXT: testl %edi, %edi
34+ ; BWOFF-NEXT: jle LBB0_2
35+ ; BWOFF-NEXT: .p2align 4
36+ ; BWOFF-NEXT: LBB0_1: ## %a4
37+ ; BWOFF-NEXT: ## =>This Inner Loop Header: Depth=1
38+ ; BWOFF-NEXT: movb (%rsi), %al
39+ ; BWOFF-NEXT: movb %al, (%rdx)
40+ ; BWOFF-NEXT: movb 1(%rsi), %al
41+ ; BWOFF-NEXT: movb %al, 1(%rdx)
42+ ; BWOFF-NEXT: addq $8, %rdx
43+ ; BWOFF-NEXT: decl %edi
44+ ; BWOFF-NEXT: jne LBB0_1
45+ ; BWOFF-NEXT: LBB0_2: ## %._crit_edge
46+ ; BWOFF-NEXT: retq
3047 %1 = icmp sgt i32 %count , 0
3148 br i1 %1 , label %.lr.ph , label %._crit_edge
3249
@@ -56,22 +73,40 @@ a4: ; preds = %3, %.lr.ph
5673; This has word loads interspersed with word stores.
5774; The upper portion should be dead, so the movw loads should have
5875; been changed into movzwl instead.
59- ; CHECK-LABEL: foo2
60- ; load:
61- ; BWON: movzwl
62- ; BWOFF: movw
63- ; store:
64- ; CHECK: movw
65- ; load:
66- ; BWON: movzwl
67- ; BWOFF: movw
68- ; store:
69- ; CHECK: movw
70- ; CHECK: ret
71- define void @foo2 (i32 %count ,
72- ptr noalias nocapture %q ,
73- ptr noalias nocapture %p )
74- nounwind uwtable noinline ssp {
76+ define void @foo2 (i32 %count , ptr noalias nocapture %q , ptr noalias nocapture %p ) nounwind uwtable noinline ssp {
77+ ; BWON-LABEL: foo2:
78+ ; BWON: ## %bb.0:
79+ ; BWON-NEXT: testl %edi, %edi
80+ ; BWON-NEXT: jle LBB1_2
81+ ; BWON-NEXT: .p2align 4
82+ ; BWON-NEXT: LBB1_1: ## %a4
83+ ; BWON-NEXT: ## =>This Inner Loop Header: Depth=1
84+ ; BWON-NEXT: movzwl (%rsi), %eax
85+ ; BWON-NEXT: movw %ax, (%rdx)
86+ ; BWON-NEXT: movzwl 2(%rsi), %eax
87+ ; BWON-NEXT: movw %ax, 2(%rdx)
88+ ; BWON-NEXT: addq $16, %rdx
89+ ; BWON-NEXT: decl %edi
90+ ; BWON-NEXT: jne LBB1_1
91+ ; BWON-NEXT: LBB1_2: ## %._crit_edge
92+ ; BWON-NEXT: retq
93+ ;
94+ ; BWOFF-LABEL: foo2:
95+ ; BWOFF: ## %bb.0:
96+ ; BWOFF-NEXT: testl %edi, %edi
97+ ; BWOFF-NEXT: jle LBB1_2
98+ ; BWOFF-NEXT: .p2align 4
99+ ; BWOFF-NEXT: LBB1_1: ## %a4
100+ ; BWOFF-NEXT: ## =>This Inner Loop Header: Depth=1
101+ ; BWOFF-NEXT: movw (%rsi), %ax
102+ ; BWOFF-NEXT: movw %ax, (%rdx)
103+ ; BWOFF-NEXT: movw 2(%rsi), %ax
104+ ; BWOFF-NEXT: movw %ax, 2(%rdx)
105+ ; BWOFF-NEXT: addq $16, %rdx
106+ ; BWOFF-NEXT: decl %edi
107+ ; BWOFF-NEXT: jne LBB1_1
108+ ; BWOFF-NEXT: LBB1_2: ## %._crit_edge
109+ ; BWOFF-NEXT: retq
75110 %1 = icmp sgt i32 %count , 0
76111 br i1 %1 , label %.lr.ph , label %._crit_edge
77112
@@ -98,11 +133,18 @@ a4: ; preds = %3, %.lr.ph
98133
99134; This test contains nothing but a simple byte load and store.
100135; movb encodes smaller, but we use movzbl for the load for better perf.
101- ; CHECK-LABEL: foo3:
102- ; BWON: movzbl
103- ; BWOFF: movb
104- ; CHECK: movb
105136define void @foo3 (ptr %dst , ptr %src ) {
137+ ; BWON-LABEL: foo3:
138+ ; BWON: ## %bb.0:
139+ ; BWON-NEXT: movzbl (%rsi), %eax
140+ ; BWON-NEXT: movb %al, (%rdi)
141+ ; BWON-NEXT: retq
142+ ;
143+ ; BWOFF-LABEL: foo3:
144+ ; BWOFF: ## %bb.0:
145+ ; BWOFF-NEXT: movb (%rsi), %al
146+ ; BWOFF-NEXT: movb %al, (%rdi)
147+ ; BWOFF-NEXT: retq
106148 %t0 = load i8 , ptr %src , align 1
107149 store i8 %t0 , ptr %dst , align 1
108150 ret void
@@ -111,11 +153,18 @@ define void @foo3(ptr%dst, ptr%src) {
111153; This test contains nothing but a simple word load and store. Since
112154; movw and movzwl are the same size, we should always choose to use
113155; movzwl instead.
114- ; CHECK-LABEL: foo4:
115- ; BWON: movzwl
116- ; BWOFF: movw
117- ; CHECK: movw
118156define void @foo4 (ptr %dst , ptr %src ) {
157+ ; BWON-LABEL: foo4:
158+ ; BWON: ## %bb.0:
159+ ; BWON-NEXT: movzwl (%rsi), %eax
160+ ; BWON-NEXT: movw %ax, (%rdi)
161+ ; BWON-NEXT: retq
162+ ;
163+ ; BWOFF-LABEL: foo4:
164+ ; BWOFF: ## %bb.0:
165+ ; BWOFF-NEXT: movw (%rsi), %ax
166+ ; BWOFF-NEXT: movw %ax, (%rdi)
167+ ; BWOFF-NEXT: retq
119168 %t0 = load i16 , ptr %src , align 2
120169 store i16 %t0 , ptr %dst , align 2
121170 ret void
0 commit comments