@@ -1177,7 +1177,51 @@ bb:
11771177 %tmp28 = extractelement <16 x i32 > %tmp27 , i64 0
11781178 ret i32 %tmp28
11791179}
1180+ declare <16 x i32 > @llvm.abs.v16i32 (<16 x i32 >, i1 immarg)
1181+
1182+ define i32 @PR143456 (ptr %p0 , ptr %p1 ) {
1183+ ; SSE2-LABEL: PR143456:
1184+ ; SSE2: # %bb.0:
1185+ ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1186+ ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1187+ ; SSE2-NEXT: movdqa %xmm0, %xmm2
1188+ ; SSE2-NEXT: pminub %xmm1, %xmm2
1189+ ; SSE2-NEXT: pmaxub %xmm1, %xmm0
1190+ ; SSE2-NEXT: psubb %xmm2, %xmm0
1191+ ; SSE2-NEXT: pxor %xmm1, %xmm1
1192+ ; SSE2-NEXT: psadbw %xmm0, %xmm1
1193+ ; SSE2-NEXT: movd %xmm1, %eax
1194+ ; SSE2-NEXT: movzbl %al, %eax
1195+ ; SSE2-NEXT: retq
1196+ ;
1197+ ; AVX-LABEL: PR143456:
1198+ ; AVX: # %bb.0:
1199+ ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1200+ ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1201+ ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm2
1202+ ; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
1203+ ; AVX-NEXT: vpsubb %xmm2, %xmm0, %xmm0
1204+ ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1205+ ; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
1206+ ; AVX-NEXT: vpextrb $0, %xmm0, %eax
1207+ ; AVX-NEXT: retq
1208+ %v0 = load <8 x i8 >, ptr %p0 , align 1
1209+ %v1 = load <8 x i8 >, ptr %p1 , align 1
1210+ %max = tail call <8 x i8 > @llvm.umax.v8i8 (<8 x i8 > %v0 , <8 x i8 > %v1 )
1211+ %min = tail call <8 x i8 > @llvm.umin.v8i8 (<8 x i8 > %v0 , <8 x i8 > %v1 )
1212+ %abd = sub nuw <8 x i8 > %max , %min
1213+ %rdx.shuf = shufflevector <8 x i8 > %abd , <8 x i8 > poison, <8 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 , i32 poison, i32 poison, i32 poison, i32 poison>
1214+ %bin.rdx = add <8 x i8 > %abd , %rdx.shuf
1215+ %rdx.shuf15 = shufflevector <8 x i8 > %bin.rdx , <8 x i8 > poison, <8 x i32 > <i32 2 , i32 3 , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1216+ %bin.rdx16 = add <8 x i8 > %bin.rdx , %rdx.shuf15
1217+ %rdx.shuf17 = shufflevector <8 x i8 > %bin.rdx16 , <8 x i8 > poison, <8 x i32 > <i32 1 , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1218+ %bin.rdx18 = add <8 x i8 > %bin.rdx16 , %rdx.shuf17
1219+ %elt = extractelement <8 x i8 > %bin.rdx18 , i32 0
1220+ %res = zext i8 %elt to i32
1221+ ret i32 %res
1222+ }
1223+ declare <8 x i8 > @llvm.umax.v8i8 (<8 x i8 >, <8 x i8 >)
1224+ declare <8 x i8 > @llvm.umin.v8i8 (<8 x i8 >, <8 x i8 >)
1225+ declare i8 @llvm.vector.reduce.add.v8i8 (<8 x i8 >)
11801226
1181- ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
1182- declare <16 x i32 > @llvm.abs.v16i32 (<16 x i32 >, i1 immarg) #0
11831227attributes #0 = { nofree nosync nounwind readnone speculatable willreturn }
0 commit comments