@@ -80,18 +80,16 @@ define void @v1x8_levels_6_7_8_9_10_11_12_13(i32 %arg0, ptr align 16 %arg1) {
8080 ret void
8181}
8282
83- define void @v1_4_4_4_2_1_to_v8_8_levels_6_7 (i32 %arg0 , ptr addrspace (3 ) align 16 %arg1_ptr , i32 %arg2 , i32 %arg3 , i32 %arg4 , i32 %arg5 , half %arg6_half , half %arg7_half ) {
83+ define void @v1_4_4_4_2_1_to_v8_8_levels_6_7 (i32 %arg0 , ptr addrspace (3 ) align 16 %arg1_ptr , i32 %arg2 , i32 %arg3 , i32 %arg4 , i32 %arg5 , half %arg6_half , half %arg7_half , < 2 x half > %arg8_2xhalf ) {
8484; CHECK-LABEL: define void @v1_4_4_4_2_1_to_v8_8_levels_6_7(
85- ; CHECK-SAME: i32 [[ARG0:%.*]], ptr addrspace(3) align 16 [[ARG1_PTR:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]], i32 [[ARG4:%.*]], i32 [[ARG5:%.*]], half [[ARG6_HALF:%.*]], half [[ARG7_HALF:%.*]]) #[[ATTR0]] {
85+ ; CHECK-SAME: i32 [[ARG0:%.*]], ptr addrspace(3) align 16 [[ARG1_PTR:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]], i32 [[ARG4:%.*]], i32 [[ARG5:%.*]], half [[ARG6_HALF:%.*]], half [[ARG7_HALF:%.*]], <2 x half> [[ARG8_2XHALF:%.*]] ) #[[ATTR0]] {
8686; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[ARG1_PTR]], i32 458752
8787; CHECK-NEXT: br [[DOTPREHEADER11_PREHEADER:label %.*]]
8888; CHECK: [[_PREHEADER11_PREHEADER:.*:]]
8989; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[ARG0]], 6
9090; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP1]], i32 [[TMP2]]
9191; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[ARG2]]
9292; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[ARG3]]
93- ; CHECK-NEXT: [[VEC2_INIT:%.*]] = insertelement <2 x half> undef, half [[ARG7_HALF]], i32 0
94- ; CHECK-NEXT: [[VEC2:%.*]] = shufflevector <2 x half> [[VEC2_INIT]], <2 x half> undef, <2 x i32> zeroinitializer
9593; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[ARG0]], 2
9694; CHECK-NEXT: br i1 [[CMP]], [[DOTLR_PH:label %.*]], [[DOTEXIT_POINT:label %.*]]
9795; CHECK: [[_LR_PH:.*:]]
@@ -102,44 +100,41 @@ define void @v1_4_4_4_2_1_to_v8_8_levels_6_7(i32 %arg0, ptr addrspace(3) align 1
102100; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x half> [[TMP8]], half 0xH0000, i32 2
103101; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x half> [[TMP9]], half 0xH0000, i32 3
104102; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x half> [[TMP10]], half 0xH0000, i32 4
105- ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x half> [[VEC2 ]], i32 0
103+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x half> [[ARG8_2XHALF ]], i32 0
106104; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x half> [[TMP11]], half [[TMP12]], i32 5
107- ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x half> [[VEC2 ]], i32 1
105+ ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x half> [[ARG8_2XHALF ]], i32 1
108106; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x half> [[TMP13]], half [[TMP14]], i32 6
109107; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x half> [[TMP15]], half [[ARG7_HALF]], i32 7
110108; CHECK-NEXT: store <8 x half> [[TMP16]], ptr addrspace(3) [[TMP6]], align 2
111109; CHECK-NEXT: br [[DOTEXIT_POINT]]
112110; CHECK: [[_EXIT_POINT:.*:]]
113111; CHECK-NEXT: ret void
114112;
115- %37 = getelementptr inbounds i8 , ptr addrspace (3 ) %arg1_ptr , i32 458752
113+ %base1 = getelementptr inbounds i8 , ptr addrspace (3 ) %arg1_ptr , i32 458752
116114 br label %.preheader11.preheader
117115
118116.preheader11.preheader:
119- %258 = shl nuw nsw i32 %arg0 , 6
120- %259 = getelementptr inbounds i8 , ptr addrspace (3 ) %37 , i32 %258
117+ %base2 = shl nuw nsw i32 %arg0 , 6
118+ %base3 = getelementptr inbounds i8 , ptr addrspace (3 ) %base1 , i32 %base2
121119
122- %268 = getelementptr inbounds i8 , ptr addrspace (3 ) %259 , i32 %arg2
123- %269 = getelementptr inbounds i8 , ptr addrspace (3 ) %268 , i32 %arg3
124-
125- %vec2_init = insertelement <2 x half > undef , half %arg7_half , i32 0
126- %vec2 = shufflevector <2 x half > %vec2_init , <2 x half > undef , <2 x i32 > zeroinitializer
120+ %base4 = getelementptr inbounds i8 , ptr addrspace (3 ) %base3 , i32 %arg2
121+ %base5 = getelementptr inbounds i8 , ptr addrspace (3 ) %base4 , i32 %arg3
127122
128123 %cmp = icmp sgt i32 %arg0 , 2
129124 br i1 %cmp , label %.lr.ph , label %.exit_point
130125
131126.lr.ph:
132- %gep = getelementptr inbounds i8 , ptr addrspace (3 ) %269 , i32 %arg4
127+ %gep = getelementptr inbounds i8 , ptr addrspace (3 ) %base5 , i32 %arg4
133128
134- %1000 = getelementptr inbounds i8 , ptr addrspace (3 ) %gep , i32 %arg5
135- %1002 = getelementptr inbounds i8 , ptr addrspace (3 ) %1000 , i32 2
136- %1010 = getelementptr inbounds i8 , ptr addrspace (3 ) %1000 , i32 10
137- %1014 = getelementptr inbounds i8 , ptr addrspace (3 ) %1000 , i32 14
129+ %dst = getelementptr inbounds i8 , ptr addrspace (3 ) %gep , i32 %arg5
130+ %dst_off2 = getelementptr inbounds i8 , ptr addrspace (3 ) %dst , i32 2
131+ %dst_off10 = getelementptr inbounds i8 , ptr addrspace (3 ) %dst , i32 10
132+ %dst_off14 = getelementptr inbounds i8 , ptr addrspace (3 ) %dst , i32 14
138133
139- store half %arg6_half , ptr addrspace (3 ) %1000 , align 2
140- store <4 x half > zeroinitializer , ptr addrspace (3 ) %1002 , align 2
141- store <2 x half > %vec2 , ptr addrspace (3 ) %1010 , align 2
142- store half %arg7_half , ptr addrspace (3 ) %1014 , align 2
134+ store half %arg6_half , ptr addrspace (3 ) %dst , align 2
135+ store <4 x half > zeroinitializer , ptr addrspace (3 ) %dst_off2 , align 2
136+ store <2 x half > %arg8_2xhalf , ptr addrspace (3 ) %dst_off10 , align 2
137+ store half %arg7_half , ptr addrspace (3 ) %dst_off14 , align 2
143138 br label %.exit_point
144139
145140.exit_point:
0 commit comments