Skip to content

Commit a6592b0

Browse files
acyensovrasov
authored andcommitted
OCL SURF: Fix descriptor calculation.
1 parent cdbdb57 commit a6592b0

File tree

1 file changed

+72
-14
lines changed
  • modules/xfeatures2d/src/opencl

1 file changed

+72
-14
lines changed

modules/xfeatures2d/src/opencl/surf.cl

Lines changed: 72 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -875,9 +875,6 @@ inline float linearFilter(
875875
float centerX, float centerY, float win_offset,
876876
float cos_dir, float sin_dir, float y, float x )
877877
{
878-
x -= 0.5f;
879-
y -= 0.5f;
880-
881878
float out = 0.0f;
882879

883880
const int x1 = round(x);
@@ -900,6 +897,60 @@ inline float linearFilter(
900897
return out;
901898
}
902899

900+
inline float areaFilter( __PARAM_imgTex__, int img_rows, int img_cols,
901+
float centerX, float centerY, float win_offset,
902+
float cos_dir, float sin_dir, float x, float y, float s)
903+
{
904+
float fsx1 = x * s;
905+
float fsx2 = fsx1 + s;
906+
907+
int sx1 = convert_int_rtp(fsx1);
908+
int sx2 = convert_int_rtn(fsx2);
909+
910+
float fsy1 = y * s;
911+
float fsy2 = fsy1 + s;
912+
913+
int sy1 = convert_int_rtp(fsy1);
914+
int sy2 = convert_int_rtn(fsy2);
915+
916+
float scale = 1.f / (s * s);
917+
float out = 0.f;
918+
919+
for (int dy = sy1; dy < sy2; ++dy)
920+
{
921+
for (int dx = sx1; dx < sx2; ++dx)
922+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, dy, dx) * scale;
923+
924+
if (sx1 > fsx1)
925+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, dy, (sx1 -1)) * ((sx1 - fsx1) * scale);
926+
927+
if (sx2 < fsx2)
928+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, dy, sx2) * ((fsx2 -sx2) * scale);
929+
}
930+
931+
if (sy1 > fsy1)
932+
for (int dx = sx1; dx < sx2; ++dx)
933+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
934+
935+
if (sy2 < fsy2)
936+
for (int dx = sx1; dx < sx2; ++dx)
937+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, sy2, dx) * ((fsy2 -sy2) * scale);
938+
939+
if ((sy1 > fsy1) && (sx1 > fsx1))
940+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
941+
942+
if ((sy1 > fsy1) && (sx2 < fsx2))
943+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
944+
945+
if ((sy2 < fsy2) && (sx2 < fsx2))
946+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
947+
948+
if ((sy2 < fsy2) && (sx1 > fsx1))
949+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
950+
951+
return out;
952+
}
953+
903954
void calc_dx_dy(
904955
__PARAM_imgTex__,
905956
int img_rows, int img_cols,
@@ -946,9 +997,18 @@ void calc_dx_dy(
946997
const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;
947998
const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;
948999

949-
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] =
950-
linearFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY,
951-
win_offset, cos_dir, sin_dir, icoo, jcoo);
1000+
if (s > 1)
1001+
{
1002+
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] =
1003+
areaFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY,
1004+
win_offset, cos_dir, sin_dir, xIndex, yIndex, s);
1005+
}
1006+
else
1007+
{
1008+
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] =
1009+
linearFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY,
1010+
win_offset, cos_dir, sin_dir, icoo, jcoo);
1011+
}
9521012

9531013
barrier(CLK_LOCAL_MEM_FENCE);
9541014

@@ -1075,18 +1135,16 @@ void SURF_computeDescriptors64(
10751135
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
10761136

10771137
barrier(CLK_LOCAL_MEM_FENCE);
1078-
if (tid < 25)
1138+
if (tid == 0)
10791139
{
10801140
__global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
10811141

10821142
// write dx, dy, |dx|, |dy|
1083-
if (tid == 0)
1084-
{
1085-
descriptors_block[0] = sdx[0];
1086-
descriptors_block[1] = sdy[0];
1087-
descriptors_block[2] = sdxabs[0];
1088-
descriptors_block[3] = sdyabs[0];
1089-
}
1143+
1144+
descriptors_block[0] = sdx[0];
1145+
descriptors_block[1] = sdy[0];
1146+
descriptors_block[2] = sdxabs[0];
1147+
descriptors_block[3] = sdyabs[0];
10901148
}
10911149
}
10921150

0 commit comments

Comments
 (0)