Skip to content

Commit 6cd424a

Browse files
committed
add checks for second var as an operand
1 parent deeed94 commit 6cd424a

File tree

1 file changed

+67
-48
lines changed

1 file changed

+67
-48
lines changed
Lines changed: 67 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,93 @@
11
;; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b %s -o - | FileCheck %s
22

3-
define dso_local void @store_isnan_f32(ptr %a, ptr %isnan_a) local_unnamed_addr {
3+
define dso_local void @store_isnan_f32(ptr %a, ptr %b, ptr %isnan_cmp) local_unnamed_addr {
44
entry:
5-
%arrayidx = getelementptr inbounds nuw float, ptr %a, i32 0
6-
%0 = load <32 x float>, ptr %arrayidx, align 4
7-
%.vectorized = fcmp uno <32 x float> %0, zeroinitializer
5+
%arrayidx_a = getelementptr inbounds nuw float, ptr %a, i32 0
6+
%arrayidx_b = getelementptr inbounds nuw float, ptr %b, i32 0
7+
%0 = load <32 x float>, ptr %arrayidx_a, align 4
8+
%1 = load <32 x float>, ptr %arrayidx_b, align 4
9+
%.vectorized = fcmp uno <32 x float> %0, %1
810
%.LS.instance = zext <32 x i1> %.vectorized to <32 x i32>
9-
%arrayidx1 = getelementptr inbounds nuw i32, ptr %isnan_a, i32 0
11+
%arrayidx1 = getelementptr inbounds nuw i32, ptr %isnan_cmp, i32 0
1012
store <32 x i32> %.LS.instance, ptr %arrayidx1, align 4
1113
ret void
1214
}
13-
; CHECK: store_isnan_f32
14-
; CHECK: [[VZERO32:v[0-9]+]] = vxor([[VZERO32]],[[VZERO32]])
15-
; CHECK: [[VLOAD32:v[0-9]+]] = vmemu(r0+#0)
16-
; CHECK: [[VONES32:v[0-9]+]] = vsplat([[RONE32:r[0-9]+]])
17-
; CHECK: {{q[0-9]+}} = vcmp.eq([[VLOAD32]].w,[[VLOAD32]].w)
18-
; CHECK: [[VOUT32:v[0-9]+]] = vmux({{q[0-9]+}},[[VZERO32]],[[VONES32]])
19-
; CHECK: vmemu(r1+#0) = [[VOUT32]]
2015

16+
; CHECK: store_isnan_f32
17+
; CHECK: [[RONE32:r[0-9]+]] = #1
18+
; CHECK: [[VOP2_F32:v[0-9]+]] = vxor([[VOP2_F32]],[[VOP2_F32]])
19+
; CHECK: [[VOP1_F32:v[0-9]+]] = vmemu(r0+#0)
20+
; CHECK: [[VONES32:v[0-9]+]] = vsplat([[RONE32]])
21+
; CHECK: [[Q1_F32:q[0-9]+]] = vcmp.eq([[VOP1_F32]].w,[[VOP1_F32]].w)
22+
; CHECK: [[VOP3_F32:v[0-9]+]] = vmemu(r1+#0)
23+
; CHECK: [[Q1_F32]] &= vcmp.eq([[VOP3_F32]].w,[[VOP3_F32]].w)
24+
; CHECK: [[VOUT_F32:v[0-9]+]] = vmux([[Q1_F32]],[[VOP2_F32]],[[VONES32]])
25+
; CHECK: vmemu(r2+#0) = [[VOUT_F32]]
2126

22-
define dso_local void @store_isnan_f16(ptr %a, ptr %isnan_a) local_unnamed_addr {
27+
define dso_local void @store_isnan_f16(ptr %a, ptr %b, ptr %isnan_cmp) local_unnamed_addr {
2328
entry:
24-
%arrayidx = getelementptr inbounds nuw half, ptr %a, i32 0
25-
%0 = load <64 x half>, ptr %arrayidx, align 2
26-
%.vectorized = fcmp uno <64 x half> %0, zeroinitializer
29+
%arrayidx_a = getelementptr inbounds nuw half, ptr %a, i32 0
30+
%arrayidx_b = getelementptr inbounds nuw half, ptr %b, i32 0
31+
%0 = load <64 x half>, ptr %arrayidx_a, align 2
32+
%1 = load <64 x half>, ptr %arrayidx_b, align 2
33+
%.vectorized = fcmp uno <64 x half> %0, %1
2734
%conv.LS.instance = zext <64 x i1> %.vectorized to <64 x i16>
28-
%arrayidx1 = getelementptr inbounds nuw i16, ptr %isnan_a, i32 0
35+
%arrayidx1 = getelementptr inbounds nuw i16, ptr %isnan_cmp, i32 0
2936
store <64 x i16> %conv.LS.instance, ptr %arrayidx1, align 2
3037
ret void
3138
}
32-
; CHECK: store_isnan_f16
33-
; CHECK: [[VZERO16:v[0-9]+]] = vxor([[VZERO16]],[[VZERO16]])
34-
; CHECK: [[VLOAD16:v[0-9]+]] = vmemu(r0+#0)
35-
; CHECK: [[VONES16:v[0-9]+]].h = vsplat([[RONE16:r[0-9]+]])
36-
; CHECK: {{q[0-9]+}} = vcmp.eq([[VLOAD16]].h,[[VLOAD16]].h)
37-
; CHECK: [[VOUT16:v[0-9]+]] = vmux({{q[0-9]+}},[[VZERO16]],[[VONES16]])
38-
; CHECK: vmemu(r1+#0) = [[VOUT16]]
39+
; CHECK-LABEL: store_isnan_f16
40+
; CHECK: [[RONE16:r[0-9]+]] = #1
41+
; CHECK: [[VOP2_F16:v[0-9]+]] = vxor([[VOP2_F16]],[[VOP2_F16]])
42+
; CHECK: [[VOP1_F16:v[0-9]+]] = vmemu(r0+#0)
43+
; CHECK: [[VONES16:v[0-9]+]].h = vsplat([[RONE16]])
44+
; CHECK: [[Q1_F16:q[0-9]+]] = vcmp.eq([[VOP1_F16]].h,[[VOP1_F16]].h)
45+
; CHECK: [[VOP3_F16:v[0-9]+]] = vmemu(r1+#0)
46+
; CHECK: [[Q1_F16]] &= vcmp.eq([[VOP3_F16]].h,[[VOP3_F16]].h)
47+
; CHECK: [[VOUT_F16:v[0-9]+]] = vmux([[Q1_F16]],[[VOP2_F16]],[[VONES16]])
48+
; CHECK: vmemu(r2+#0) = [[VOUT_F32]]
3949

40-
define dso_local void @store_isordered_f32(ptr %a, ptr %isordered_a) local_unnamed_addr {
50+
define dso_local void @store_isordered_f32(ptr %a, ptr %b, ptr %isordered_cmp) local_unnamed_addr {
4151
entry:
42-
%arrayidx = getelementptr inbounds nuw float, ptr %a, i32 0
43-
%0 = load <32 x float>, ptr %arrayidx, align 4
44-
%.vectorized = fcmp ord <32 x float> %0, zeroinitializer
52+
%arrayidx_a = getelementptr inbounds nuw float, ptr %a, i32 0
53+
%arrayidx_b = getelementptr inbounds nuw float, ptr %b, i32 0
54+
%0 = load <32 x float>, ptr %arrayidx_a, align 4
55+
%1 = load <32 x float>, ptr %arrayidx_b, align 4
56+
%.vectorized = fcmp ord <32 x float> %0, %1
4557
%.LS.instance = zext <32 x i1> %.vectorized to <32 x i32>
46-
%arrayidx1 = getelementptr inbounds nuw i32, ptr %isordered_a, i32 0
58+
%arrayidx1 = getelementptr inbounds nuw i32, ptr %isordered_cmp, i32 0
4759
store <32 x i32> %.LS.instance, ptr %arrayidx1, align 4
4860
ret void
4961
}
50-
; CHECK: store_isordered_f32
51-
; CHECK: [[V_ZERO32:v[0-9]+]] = vxor([[V_ZERO32]],[[V_ZERO32]])
52-
; CHECK: [[V_LOAD32:v[0-9]+]] = vmemu(r0+#0)
53-
; CHECK: [[V_ONES32:v[0-9]+]] = vsplat([[RO32:r[0-9]+]])
54-
; CHECK: {{q[0-9]+}} = vcmp.eq([[V_LOAD32]].w,[[V_LOAD32]].w)
55-
; CHECK: [[V_OUT32:v[0-9]+]] = vmux({{q[0-9]+}},[[V_ONES32]],[[V_ZERO32]])
56-
; CHECK: vmemu(r1+#0) = [[V_OUT32]]
62+
; CHECK-LABEL: store_isordered_f32
63+
; CHECK: [[VOP2_ORD_F32:v[0-9]+]] = vxor([[VOP2_ORD_F32]],[[VOP2_ORD_F32]])
64+
; CHECK: [[VOP1_ORD_F32:v[0-9]+]] = vmemu(r0+#0)
65+
; CHECK: [[VONES_ORD_F32:v[0-9]+]] = vsplat([[RONE32]])
66+
; CHECK: [[Q1_ORD_F32:q[0-9]+]] = vcmp.eq([[VOP1_ORD_F32]].w,[[VOP1_ORD_F32]].w)
67+
; CHECK: [[VOP3_ORD_F32:v[0-9]+]] = vmemu(r1+#0)
68+
; CHECK: [[Q1_ORD_F32]] &= vcmp.eq([[VOP3_ORD_F32]].w,[[VOP3_ORD_F32]].w)
69+
; CHECK: [[VOUT_ORD_F32:v[0-9]+]] = vmux([[Q1_ORD_F32]],[[VONES_ORD_F32]],[[VOP2_ORD_F32]])
70+
; CHECK: vmemu(r2+#0) = [[VOUT_ORD_F32]]
5771

58-
define dso_local void @store_isordered_f16(ptr %a, ptr %isordered_a) local_unnamed_addr {
72+
73+
define dso_local void @store_isordered_f16(ptr %a, ptr %b, ptr %isordered_cmp) local_unnamed_addr {
5974
entry:
60-
%arrayidx = getelementptr inbounds nuw half, ptr %a, i32 0
61-
%0 = load <64 x half>, ptr %arrayidx, align 2
62-
%.vectorized = fcmp ord <64 x half> %0, zeroinitializer
75+
%arrayidx_a = getelementptr inbounds nuw half, ptr %a, i32 0
76+
%arrayidx_b = getelementptr inbounds nuw half, ptr %b, i32 0
77+
%0 = load <64 x half>, ptr %arrayidx_a, align 2
78+
%1 = load <64 x half>, ptr %arrayidx_b, align 2
79+
%.vectorized = fcmp ord <64 x half> %0, %1
6380
%conv.LS.instance = zext <64 x i1> %.vectorized to <64 x i16>
64-
%arrayidx1 = getelementptr inbounds nuw i16, ptr %isordered_a, i32 0
81+
%arrayidx1 = getelementptr inbounds nuw i16, ptr %isordered_cmp, i32 0
6582
store <64 x i16> %conv.LS.instance, ptr %arrayidx1, align 2
6683
ret void
6784
}
68-
; CHECK: store_isordered_f16
69-
; CHECK: [[V_ZERO16:v[0-9]+]] = vxor([[V_ZERO16]],[[V_ZERO16]])
70-
; CHECK: [[V_LOAD16:v[0-9]+]] = vmemu(r0+#0)
71-
; CHECK: [[V_ONES16:v[0-9]+]].h = vsplat([[RO16:r[0-9]+]])
72-
; CHECK: {{q[0-9]+}} = vcmp.eq([[V_LOAD16]].h,[[V_LOAD16]].h)
73-
; CHECK: [[V_OUT16:v[0-9]+]] = vmux({{q[0-9]+}},[[V_ONES16]],[[V_ZERO16]])
74-
; CHECK: vmemu(r1+#0) = [[V_OUT16]]
85+
; CHECK-LABEL: store_isordered_f16
86+
; CHECK: [[VOP2_ORD_F16:v[0-9]+]] = vxor([[VOP2_ORD_F16]],[[VOP2_ORD_F16]])
87+
; CHECK: [[VOP1_ORD_F16:v[0-9]+]] = vmemu(r0+#0)
88+
; CHECK: [[VONES_ORD_F16:v[0-9]+]].h = vsplat([[RONE16]])
89+
; CHECK: [[Q1_ORD_F16:q[0-9]+]] = vcmp.eq([[VOP1_ORD_F16]].h,[[VOP1_ORD_F16]].h)
90+
; CHECK: [[VOP3_ORD_F16:v[0-9]+]] = vmemu(r1+#0)
91+
; CHECK: [[Q1_ORD_F16]] &= vcmp.eq([[VOP3_ORD_F16]].h,[[VOP3_ORD_F16]].h)
92+
; CHECK: [[VOUT_ORD_F16:v[0-9]+]] = vmux([[Q1_ORD_F16]],[[VONES_ORD_F16]],[[VOP2_ORD_F16]])
93+
; CHECK: vmemu(r2+#0) = [[VOUT_ORD_F16]]

0 commit comments

Comments
 (0)