@@ -875,9 +875,6 @@ inline float linearFilter(
875
875
float centerX , float centerY , float win_offset ,
876
876
float cos_dir , float sin_dir , float y , float x )
877
877
{
878
- x -= 0.5f ;
879
- y -= 0.5f ;
880
-
881
878
float out = 0.0f ;
882
879
883
880
const int x1 = round (x );
@@ -900,6 +897,60 @@ inline float linearFilter(
900
897
return out ;
901
898
}
902
899
900
+ inline float areaFilter ( __PARAM_imgTex__ , int img_rows , int img_cols ,
901
+ float centerX , float centerY , float win_offset ,
902
+ float cos_dir , float sin_dir , float x , float y , float s )
903
+ {
904
+ float fsx1 = x * s ;
905
+ float fsx2 = fsx1 + s ;
906
+
907
+ int sx1 = convert_int_rtp (fsx1 );
908
+ int sx2 = convert_int_rtn (fsx2 );
909
+
910
+ float fsy1 = y * s ;
911
+ float fsy2 = fsy1 + s ;
912
+
913
+ int sy1 = convert_int_rtp (fsy1 );
914
+ int sy2 = convert_int_rtn (fsy2 );
915
+
916
+ float scale = 1.f / (s * s );
917
+ float out = 0.f ;
918
+
919
+ for (int dy = sy1 ; dy < sy2 ; ++ dy )
920
+ {
921
+ for (int dx = sx1 ; dx < sx2 ; ++ dx )
922
+ out = out + readerGet (centerX , centerY , win_offset , cos_dir , sin_dir , dy , dx ) * scale ;
923
+
924
+ if (sx1 > fsx1 )
925
+ out = out + readerGet (centerX , centerY , win_offset , cos_dir , sin_dir , dy , (sx1 - 1 )) * ((sx1 - fsx1 ) * scale );
926
+
927
+ if (sx2 < fsx2 )
928
+ out = out + readerGet (centerX , centerY , win_offset , cos_dir , sin_dir , dy , sx2 ) * ((fsx2 - sx2 ) * scale );
929
+ }
930
+
931
+ if (sy1 > fsy1 )
932
+ for (int dx = sx1 ; dx < sx2 ; ++ dx )
933
+ out = out + readerGet (centerX , centerY , win_offset , cos_dir , sin_dir , (sy1 - 1 ) , dx ) * ((sy1 - fsy1 ) * scale );
934
+
935
+ if (sy2 < fsy2 )
936
+ for (int dx = sx1 ; dx < sx2 ; ++ dx )
937
+ out = out + readerGet (centerX , centerY , win_offset , cos_dir , sin_dir , sy2 , dx ) * ((fsy2 - sy2 ) * scale );
938
+
939
+ if ((sy1 > fsy1 ) && (sx1 > fsx1 ))
940
+ out = out + readerGet (centerX , centerY , win_offset , cos_dir , sin_dir , (sy1 - 1 ) , (sx1 - 1 )) * ((sy1 - fsy1 ) * (sx1 - fsx1 ) * scale );
941
+
942
+ if ((sy1 > fsy1 ) && (sx2 < fsx2 ))
943
+ out = out + readerGet (centerX , centerY , win_offset , cos_dir , sin_dir , (sy1 - 1 ) , sx2 ) * ((sy1 - fsy1 ) * (fsx2 - sx2 ) * scale );
944
+
945
+ if ((sy2 < fsy2 ) && (sx2 < fsx2 ))
946
+ out = out + readerGet (centerX , centerY , win_offset , cos_dir , sin_dir , sy2 , sx2 ) * ((fsy2 - sy2 ) * (fsx2 - sx2 ) * scale );
947
+
948
+ if ((sy2 < fsy2 ) && (sx1 > fsx1 ))
949
+ out = out + readerGet (centerX , centerY , win_offset , cos_dir , sin_dir , sy2 , (sx1 - 1 )) * ((fsy2 - sy2 ) * (sx1 - fsx1 ) * scale );
950
+
951
+ return out ;
952
+ }
953
+
903
954
void calc_dx_dy (
904
955
__PARAM_imgTex__ ,
905
956
int img_rows , int img_cols ,
@@ -946,9 +997,18 @@ void calc_dx_dy(
946
997
const float icoo = ((float )yIndex / (PATCH_SZ + 1 )) * win_size ;
947
998
const float jcoo = ((float )xIndex / (PATCH_SZ + 1 )) * win_size ;
948
999
949
- s_PATCH [get_local_id (1 ) * 6 + get_local_id (0 )] =
950
- linearFilter (__PASS_imgTex__ , img_rows , img_cols , centerX , centerY ,
951
- win_offset , cos_dir , sin_dir , icoo , jcoo );
1000
+ if (s > 1 )
1001
+ {
1002
+ s_PATCH [get_local_id (1 ) * 6 + get_local_id (0 )] =
1003
+ areaFilter (__PASS_imgTex__ , img_rows , img_cols , centerX , centerY ,
1004
+ win_offset , cos_dir , sin_dir , xIndex , yIndex , s );
1005
+ }
1006
+ else
1007
+ {
1008
+ s_PATCH [get_local_id (1 ) * 6 + get_local_id (0 )] =
1009
+ linearFilter (__PASS_imgTex__ , img_rows , img_cols , centerX , centerY ,
1010
+ win_offset , cos_dir , sin_dir , icoo , jcoo );
1011
+ }
952
1012
953
1013
barrier (CLK_LOCAL_MEM_FENCE );
954
1014
@@ -1075,18 +1135,16 @@ void SURF_computeDescriptors64(
1075
1135
reduce_sum25 (sdx , sdy , sdxabs , sdyabs , tid );
1076
1136
1077
1137
barrier (CLK_LOCAL_MEM_FENCE );
1078
- if (tid < 25 )
1138
+ if (tid == 0 )
1079
1139
{
1080
1140
__global float * descriptors_block = descriptors + descriptors_step * get_group_id (0 ) + (get_group_id (1 ) << 2 );
1081
1141
1082
1142
// write dx, dy, |dx|, |dy|
1083
- if (tid == 0 )
1084
- {
1085
- descriptors_block [0 ] = sdx [0 ];
1086
- descriptors_block [1 ] = sdy [0 ];
1087
- descriptors_block [2 ] = sdxabs [0 ];
1088
- descriptors_block [3 ] = sdyabs [0 ];
1089
- }
1143
+
1144
+ descriptors_block [0 ] = sdx [0 ];
1145
+ descriptors_block [1 ] = sdy [0 ];
1146
+ descriptors_block [2 ] = sdxabs [0 ];
1147
+ descriptors_block [3 ] = sdyabs [0 ];
1090
1148
}
1091
1149
}
1092
1150
0 commit comments