Skip to content

Commit 88f99ed

Browse files
authored
Merge pull request opencv#26071 from tingboliao:4.x
Remove the redundant codes of cv::convertMaps and mRGBA2RGBA<uchar> opencv#26071 (1) cv::convertMaps: the branch [else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 ) if( nninterpolate )] is unreachable, as the condition is satisfied in lines 1959 to 1961, calculated in advance and return directly. (2) mRGBA2RGBA<uchar>: dst[0], dst[1], dst[2] and dst[3] is calculated repeatedly. Introduced in opencv#13440 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
1 parent e9c3e1a commit 88f99ed

File tree

2 files changed

+33
-57
lines changed

2 files changed

+33
-57
lines changed

modules/imgproc/src/color_rgb.simd.hpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,11 +1088,6 @@ struct mRGBA2RGBA<uchar>
10881088

10891089
uchar v3_half = v3 / 2;
10901090

1091-
dst[0] = (v3==0)? 0 : (v0 * max_val + v3_half) / v3;
1092-
dst[1] = (v3==0)? 0 : (v1 * max_val + v3_half) / v3;
1093-
dst[2] = (v3==0)? 0 : (v2 * max_val + v3_half) / v3;
1094-
dst[3] = v3;
1095-
10961091
dst[0] = (v3==0)? 0 : saturate_cast<uchar>((v0 * max_val + v3_half) / v3);
10971092
dst[1] = (v3==0)? 0 : saturate_cast<uchar>((v1 * max_val + v3_half) / v3);
10981093
dst[2] = (v3==0)? 0 : saturate_cast<uchar>((v2 * max_val + v3_half) / v3);

modules/imgproc/src/imgwarp.cpp

Lines changed: 33 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,65 +2082,46 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
20822082
}
20832083
else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
20842084
{
2085-
if( nninterpolate )
2085+
#if CV_TRY_SSE4_1
2086+
if( useSSE4_1 )
2087+
opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
2088+
else
2089+
#endif
20862090
{
20872091
#if CV_SIMD128
2088-
int span = VTraits<v_float32x4>::vlanes();
20892092
{
2090-
for( ; x <= (size.width << 1) - span * 2; x += span * 2 )
2091-
v_store(dst1 + x, v_pack(v_round(v_load(src1f + x)),
2092-
v_round(v_load(src1f + x + span))));
2093+
v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
2094+
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
2095+
v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
2096+
int span = VTraits<v_uint16x8>::vlanes();
2097+
for (; x <= size.width - span; x += span )
2098+
{
2099+
v_float32x4 v_src0[2], v_src1[2];
2100+
v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
2101+
v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
2102+
v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale));
2103+
v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale));
2104+
v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale));
2105+
v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale));
2106+
2107+
v_int16x8 v_dst[2];
2108+
v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
2109+
v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
2110+
v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
2111+
2112+
v_store(dst2 + x, v_pack_u(
2113+
v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))),
2114+
v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask)))));
2115+
}
20932116
}
20942117
#endif
20952118
for( ; x < size.width; x++ )
20962119
{
2097-
dst1[x*2] = saturate_cast<short>(src1f[x*2]);
2098-
dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
2099-
}
2100-
}
2101-
else
2102-
{
2103-
#if CV_TRY_SSE4_1
2104-
if( useSSE4_1 )
2105-
opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
2106-
else
2107-
#endif
2108-
{
2109-
#if CV_SIMD128
2110-
{
2111-
v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
2112-
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
2113-
v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
2114-
int span = VTraits<v_uint16x8>::vlanes();
2115-
for (; x <= size.width - span; x += span )
2116-
{
2117-
v_float32x4 v_src0[2], v_src1[2];
2118-
v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
2119-
v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
2120-
v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale));
2121-
v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale));
2122-
v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale));
2123-
v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale));
2124-
2125-
v_int16x8 v_dst[2];
2126-
v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
2127-
v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
2128-
v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
2129-
2130-
v_store(dst2 + x, v_pack_u(
2131-
v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))),
2132-
v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask)))));
2133-
}
2134-
}
2135-
#endif
2136-
for( ; x < size.width; x++ )
2137-
{
2138-
int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
2139-
int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
2140-
dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
2141-
dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
2142-
dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
2143-
}
2120+
int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
2121+
int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
2122+
dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
2123+
dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
2124+
dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
21442125
}
21452126
}
21462127
}

0 commit comments

Comments
 (0)