Skip to content

Commit 28fdd74

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merge llvm/main into amd-debug
2 parents 164db3a + 43a9ec2 commit 28fdd74

File tree

5 files changed

+168
-34
lines changed

5 files changed

+168
-34
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2723,6 +2723,16 @@ static std::optional<Instruction *> instCombineSVEUxt(InstCombiner &IC,
27232723
return std::nullopt;
27242724
}
27252725

2726+
static std::optional<Instruction *>
2727+
instCombineInStreamingMode(InstCombiner &IC, IntrinsicInst &II) {
2728+
SMEAttrs FnSMEAttrs(*II.getFunction());
2729+
bool IsStreaming = FnSMEAttrs.hasStreamingInterfaceOrBody();
2730+
if (IsStreaming || !FnSMEAttrs.hasStreamingCompatibleInterface())
2731+
return IC.replaceInstUsesWith(
2732+
II, ConstantInt::getBool(II.getType(), IsStreaming));
2733+
return std::nullopt;
2734+
}
2735+
27262736
std::optional<Instruction *>
27272737
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
27282738
IntrinsicInst &II) const {
@@ -2828,6 +2838,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
28282838
return instCombineSVEUxt(IC, II, 16);
28292839
case Intrinsic::aarch64_sve_uxtw:
28302840
return instCombineSVEUxt(IC, II, 32);
2841+
case Intrinsic::aarch64_sme_in_streaming_mode:
2842+
return instCombineInStreamingMode(IC, II);
28312843
}
28322844

28332845
return std::nullopt;
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=instcombine -mtriple aarch64 -mattr=+sme -S -o - < %s | FileCheck %s
3+
4+
define i1 @test_in_streaming_mode_streaming_compatible() "aarch64_pstate_sm_compatible" {
5+
; CHECK-LABEL: define i1 @test_in_streaming_mode_streaming_compatible(
6+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[SM:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
8+
; CHECK-NEXT: ret i1 [[SM]]
9+
;
10+
%sm = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
11+
ret i1 %sm
12+
}
13+
14+
define i1 @test_in_streaming_mode_streaming() "aarch64_pstate_sm_enabled" {
15+
; CHECK-LABEL: define i1 @test_in_streaming_mode_streaming(
16+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
17+
; CHECK-NEXT: ret i1 true
18+
;
19+
%sm = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
20+
ret i1 %sm
21+
}
22+
23+
define i1 @test_in_streaming_mode_streaming_compatible_streaming_body() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
24+
; CHECK-LABEL: define i1 @test_in_streaming_mode_streaming_compatible_streaming_body(
25+
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
26+
; CHECK-NEXT: ret i1 true
27+
;
28+
%sm = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
29+
ret i1 %sm
30+
}
31+
32+
define i1 @test_in_streaming_mode_streaming_body() "aarch64_pstate_sm_body" {
33+
; CHECK-LABEL: define i1 @test_in_streaming_mode_streaming_body(
34+
; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
35+
; CHECK-NEXT: ret i1 true
36+
;
37+
%sm = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
38+
ret i1 %sm
39+
}
40+
41+
define i1 @test_in_streaming_mode_non_streaming() {
42+
; CHECK-LABEL: define i1 @test_in_streaming_mode_non_streaming(
43+
; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
44+
; CHECK-NEXT: ret i1 false
45+
;
46+
%sm = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
47+
ret i1 %sm
48+
}

llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
22
; RUN: opt -p loop-vectorize -mtriple=arm64-apple-macosx -S %s | FileCheck %s
33

4-
define float @fmax_ugt_with_select(ptr %src, i64 %n) {
5-
; CHECK-LABEL: define float @fmax_ugt_with_select(
4+
define float @fmax_ogt_with_select(ptr %src, i64 %n) {
5+
; CHECK-LABEL: define float @fmax_ogt_with_select(
66
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
77
; CHECK-NEXT: [[ENTRY:.*]]:
88
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -11,7 +11,7 @@ define float @fmax_ugt_with_select(ptr %src, i64 %n) {
1111
; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
1212
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
1313
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
14-
; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[L]], [[MAX]]
14+
; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[L]], [[MAX]]
1515
; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
1616
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1717
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
@@ -28,7 +28,7 @@ loop:
2828
%max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
2929
%gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
3030
%l = load float, ptr %gep.src, align 4
31-
%cmp = fcmp ugt float %l, %max
31+
%cmp = fcmp ogt float %l, %max
3232
%max.next = select i1 %cmp, float %l, float %max
3333
%iv.next = add nuw nsw i64 %iv, 1
3434
%ec = icmp eq i64 %iv.next, %n

llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
22
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s
33

4-
define float @fmax_ugt_with_select(ptr %src, i64 %n) {
5-
; CHECK-LABEL: define float @fmax_ugt_with_select(
4+
define float @fmax_ogt_with_select(ptr %src, i64 %n) {
5+
; CHECK-LABEL: define float @fmax_ogt_with_select(
66
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
77
; CHECK-NEXT: [[ENTRY:.*]]:
88
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -11,7 +11,7 @@ define float @fmax_ugt_with_select(ptr %src, i64 %n) {
1111
; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
1212
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
1313
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
14-
; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[L]], [[MAX]]
14+
; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[L]], [[MAX]]
1515
; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
1616
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1717
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
@@ -28,7 +28,7 @@ loop:
2828
%max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
2929
%gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
3030
%l = load float, ptr %gep.src, align 4
31-
%cmp = fcmp ugt float %l, %max
31+
%cmp = fcmp ogt float %l, %max
3232
%max.next = select i1 %cmp, float %l, float %max
3333
%iv.next = add nuw nsw i64 %iv, 1
3434
%ec = icmp eq i64 %iv.next, %n

llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll

Lines changed: 100 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
22
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
33

4-
define float @fmax_ugt_with_select_1(ptr %src, i64 %n) {
5-
; CHECK-LABEL: define float @fmax_ugt_with_select_1(
4+
define float @fmax_ogt_with_select_1(ptr %src, i64 %n) {
5+
; CHECK-LABEL: define float @fmax_ogt_with_select_1(
66
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
77
; CHECK-NEXT: [[ENTRY:.*]]:
88
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -11,7 +11,7 @@ define float @fmax_ugt_with_select_1(ptr %src, i64 %n) {
1111
; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
1212
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
1313
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
14-
; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[L]], [[MAX]]
14+
; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[L]], [[MAX]]
1515
; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
1616
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1717
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
@@ -28,7 +28,7 @@ loop:
2828
%max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
2929
%gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
3030
%l = load float, ptr %gep.src, align 4
31-
%cmp = fcmp ugt float %l, %max
31+
%cmp = fcmp ogt float %l, %max
3232
%max.next = select i1 %cmp, float %l, float %max
3333
%iv.next = add nuw nsw i64 %iv, 1
3434
%ec = icmp eq i64 %iv.next, %n
@@ -38,8 +38,8 @@ exit:
3838
ret float %max.next
3939
}
4040

41-
define float @fmax_ugt_with_select_2(ptr %src, i64 %n) {
42-
; CHECK-LABEL: define float @fmax_ugt_with_select_2(
41+
define float @fmax_ogt_with_select_2(ptr %src, i64 %n) {
42+
; CHECK-LABEL: define float @fmax_ogt_with_select_2(
4343
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
4444
; CHECK-NEXT: [[ENTRY:.*]]:
4545
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -48,7 +48,7 @@ define float @fmax_ugt_with_select_2(ptr %src, i64 %n) {
4848
; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
4949
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
5050
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
51-
; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[MAX]], [[L]]
51+
; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[MAX]], [[L]]
5252
; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[MAX]], float [[L]]
5353
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
5454
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
@@ -65,7 +65,7 @@ loop:
6565
%max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
6666
%gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
6767
%l = load float, ptr %gep.src, align 4
68-
%cmp = fcmp ugt float %max, %l
68+
%cmp = fcmp ogt float %max, %l
6969
%max.next = select i1 %cmp, float %max, float %l
7070
%iv.next = add nuw nsw i64 %iv, 1
7171
%ec = icmp eq i64 %iv.next, %n
@@ -75,8 +75,8 @@ exit:
7575
ret float %max.next
7676
}
7777

78-
define float @fmax_ogt_with_select_1(ptr %src, i64 %n) {
79-
; CHECK-LABEL: define float @fmax_ogt_with_select_1(
78+
define float @fmax_olt_with_select_1(ptr %src, i64 %n) {
79+
; CHECK-LABEL: define float @fmax_olt_with_select_1(
8080
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
8181
; CHECK-NEXT: [[ENTRY:.*]]:
8282
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -85,8 +85,8 @@ define float @fmax_ogt_with_select_1(ptr %src, i64 %n) {
8585
; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
8686
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
8787
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
88-
; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[L]], [[MAX]]
89-
; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
88+
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[L]], [[MAX]]
89+
; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[MAX]], float [[L]]
9090
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
9191
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
9292
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
@@ -102,8 +102,8 @@ loop:
102102
%max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
103103
%gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
104104
%l = load float, ptr %gep.src, align 4
105-
%cmp = fcmp ogt float %l, %max
106-
%max.next = select i1 %cmp, float %l, float %max
105+
%cmp = fcmp olt float %l, %max
106+
%max.next = select i1 %cmp, float %max, float %l
107107
%iv.next = add nuw nsw i64 %iv, 1
108108
%ec = icmp eq i64 %iv.next, %n
109109
br i1 %ec, label %exit, label %loop
@@ -112,8 +112,8 @@ exit:
112112
ret float %max.next
113113
}
114114

115-
define float @fmax_ogt_with_select_2(ptr %src, i64 %n) {
116-
; CHECK-LABEL: define float @fmax_ogt_with_select_2(
115+
define float @fmax_olt_with_select_2(ptr %src, i64 %n) {
116+
; CHECK-LABEL: define float @fmax_olt_with_select_2(
117117
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
118118
; CHECK-NEXT: [[ENTRY:.*]]:
119119
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -122,8 +122,8 @@ define float @fmax_ogt_with_select_2(ptr %src, i64 %n) {
122122
; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
123123
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
124124
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
125-
; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[MAX]], [[L]]
126-
; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[MAX]], float [[L]]
125+
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[MAX]], [[L]]
126+
; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
127127
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
128128
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
129129
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
@@ -139,8 +139,8 @@ loop:
139139
%max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
140140
%gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
141141
%l = load float, ptr %gep.src, align 4
142-
%cmp = fcmp ogt float %max, %l
143-
%max.next = select i1 %cmp, float %max, float %l
142+
%cmp = fcmp olt float %max, %l
143+
%max.next = select i1 %cmp, float %l, float %max
144144
%iv.next = add nuw nsw i64 %iv, 1
145145
%ec = icmp eq i64 %iv.next, %n
146146
br i1 %ec, label %exit, label %loop
@@ -149,8 +149,8 @@ exit:
149149
ret float %max.next
150150
}
151151

152-
define float @fmax_ugt_with_select_store_result(ptr %src, ptr %dst, i64 %n) {
153-
; CHECK-LABEL: define float @fmax_ugt_with_select_store_result(
152+
define float @fmax_ogt_with_select_store_result(ptr %src, ptr %dst, i64 %n) {
153+
; CHECK-LABEL: define float @fmax_ogt_with_select_store_result(
154154
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
155155
; CHECK-NEXT: [[ENTRY:.*]]:
156156
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -159,7 +159,7 @@ define float @fmax_ugt_with_select_store_result(ptr %src, ptr %dst, i64 %n) {
159159
; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
160160
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
161161
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
162-
; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[L]], [[MAX]]
162+
; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[L]], [[MAX]]
163163
; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
164164
; CHECK-NEXT: store float [[MAX_NEXT]], ptr [[DST]], align 8
165165
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
@@ -177,7 +177,7 @@ loop:
177177
%max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
178178
%gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
179179
%l = load float, ptr %gep.src, align 4
180-
%cmp = fcmp ugt float %l, %max
180+
%cmp = fcmp ogt float %l, %max
181181
%max.next = select i1 %cmp, float %l, float %max
182182
store float %max.next, ptr %dst, align 8
183183
%iv.next = add nuw nsw i64 %iv, 1
@@ -323,7 +323,7 @@ define float @fmax_with_select_and_load_store(ptr %src, ptr noalias %dst, i64 %n
323323
; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
324324
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
325325
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
326-
; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[L]], [[MAX]]
326+
; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[L]], [[MAX]]
327327
; CHECK-NEXT: [[IV_1:%.*]] = add i64 [[IV]], 1
328328
; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV_1]]
329329
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_DST_1]], align 4
@@ -345,7 +345,7 @@ loop:
345345
%max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
346346
%gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
347347
%l = load float, ptr %gep.src, align 4
348-
%cmp = fcmp ugt float %l, %max
348+
%cmp = fcmp ogt float %l, %max
349349
%iv.1 = add i64 %iv, 1
350350
%gep.dst.1 = getelementptr inbounds i32, ptr %dst, i64 %iv.1
351351
%l.2 = load i32, ptr %gep.dst.1
@@ -359,3 +359,77 @@ loop:
359359
exit:
360360
ret float %max.next
361361
}
362+
363+
define float @fmax_ugt_with_select_1(ptr %src, i64 %n) {
364+
; CHECK-LABEL: define float @fmax_ugt_with_select_1(
365+
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
366+
; CHECK-NEXT: [[ENTRY:.*]]:
367+
; CHECK-NEXT: br label %[[LOOP:.*]]
368+
; CHECK: [[LOOP]]:
369+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
370+
; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
371+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
372+
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
373+
; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[L]], [[MAX]]
374+
; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
375+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
376+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
377+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
378+
; CHECK: [[EXIT]]:
379+
; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
380+
; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
381+
;
382+
entry:
383+
br label %loop
384+
385+
loop:
386+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
387+
%max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
388+
%gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
389+
%l = load float, ptr %gep.src, align 4
390+
%cmp = fcmp ugt float %l, %max
391+
%max.next = select i1 %cmp, float %l, float %max
392+
%iv.next = add nuw nsw i64 %iv, 1
393+
%ec = icmp eq i64 %iv.next, %n
394+
br i1 %ec, label %exit, label %loop
395+
396+
exit:
397+
ret float %max.next
398+
}
399+
400+
define float @fmax_oge_with_select_1(ptr %src, i64 %n) {
401+
; CHECK-LABEL: define float @fmax_oge_with_select_1(
402+
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
403+
; CHECK-NEXT: [[ENTRY:.*]]:
404+
; CHECK-NEXT: br label %[[LOOP:.*]]
405+
; CHECK: [[LOOP]]:
406+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
407+
; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
408+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
409+
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
410+
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[L]], [[MAX]]
411+
; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
412+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
413+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
414+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
415+
; CHECK: [[EXIT]]:
416+
; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
417+
; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
418+
;
419+
entry:
420+
br label %loop
421+
422+
loop:
423+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
424+
%max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
425+
%gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
426+
%l = load float, ptr %gep.src, align 4
427+
%cmp = fcmp oge float %l, %max
428+
%max.next = select i1 %cmp, float %l, float %max
429+
%iv.next = add nuw nsw i64 %iv, 1
430+
%ec = icmp eq i64 %iv.next, %n
431+
br i1 %ec, label %exit, label %loop
432+
433+
exit:
434+
ret float %max.next
435+
}

0 commit comments

Comments
 (0)