33from shapely import wkt
44from shapely .geometry import Point
55
6+
67def distance_on_sphere (p1 , p2 ):
78 """
89 p1 and p2 are two lists that have two elements. They are numpy arrays of the long and lat
@@ -30,13 +31,19 @@ def distance_on_sphere(p1, p2):
3031 delta_lat = p2 [1 ] - p1 [1 ]
3132 delta_long = p2 [0 ] - p1 [0 ]
3233
33- a = np .sin (delta_lat / 2 ) ** 2 + np .cos (p1 [1 ]) * np .cos (p2 [1 ]) * np .sin (delta_long / 2 ) ** 2
34+ a = (
35+ np .sin (delta_lat / 2 ) ** 2
36+ + np .cos (p1 [1 ]) * np .cos (p2 [1 ]) * np .sin (delta_long / 2 ) ** 2
37+ )
3438 c = 2 * np .arcsin (np .sqrt (a ))
3539
3640 distances = earth_radius * c
3741 return distances
3842
43+
3944"""-----------------------------------Filtering Points------------------------------------------------"""
45+
46+
4047def filter_points (df , threshold_distance ):
4148 """
4249 Filter points from a DataFrame based on a threshold distance.
@@ -61,31 +68,37 @@ def filter_points(df, threshold_distance):
6168 lat = df ["lat" ].to_numpy ()
6269 long = df ["long" ].to_numpy ()
6370
64-
65- distances = distance_on_sphere ([long [1 :],lat [1 :]],
66- [long [:- 1 ],lat [:- 1 ]])
71+ distances = distance_on_sphere ([long [1 :], lat [1 :]], [long [:- 1 ], lat [:- 1 ]])
6772 road_length = np .sum (distances )
6873
69- #save the last point if the road segment is relavitely small (< 2*road_length)
74+ # save the last point if the road segment is relavitely small (< 2*road_length)
7075 if threshold_distance <= road_length < 2 * threshold_distance :
7176 mask [- 1 ] = True
7277
7378 accumulated_distance = 0
7479 for i , distance in enumerate (distances ):
7580 accumulated_distance += distance
7681 if accumulated_distance >= threshold_distance :
77- mask [i + 1 ] = True
82+ mask [i + 1 ] = True
7883 accumulated_distance = 0 # Reset accumulated distance
7984
8085 to_be_returned_df = df [mask ]
8186 # since the last point has to be omitted in the vectorized distance calculation, it is being checked manually
8287 p2 = to_be_returned_df .iloc [0 ]
83- distance = distance_on_sphere ([float (p2 ["long" ]),float (p2 ["lat" ])],[long [- 1 ],lat [- 1 ]])
84-
85- #last point will be added if it suffices the length condition
86- #last point will be added in case there is only one point returned
87- if distance >= threshold_distance or len (to_be_returned_df ) == 1 :
88- to_be_returned_df = pd .concat ([to_be_returned_df ,pd .DataFrame (df .iloc [- 1 ],columns = to_be_returned_df .columns )],axis = 0 )
88+ distance = distance_on_sphere (
89+ [float (p2 ["long" ]), float (p2 ["lat" ])], [long [- 1 ], lat [- 1 ]]
90+ )
91+
92+ # last point will be added if it suffices the length condition
93+ # last point will be added in case there is only one point returned
94+ if distance >= threshold_distance or len (to_be_returned_df ) == 1 :
95+ to_be_returned_df = pd .concat (
96+ [
97+ to_be_returned_df ,
98+ pd .DataFrame (df .iloc [- 1 ], columns = to_be_returned_df .columns ),
99+ ],
100+ axis = 0 ,
101+ )
89102 return to_be_returned_df
90103
91104
@@ -109,19 +122,23 @@ def spatial_sampling(df, interval_length):
109122 if len (df ) == 1 :
110123 return df
111124
112- df ['long' ] = df ['geometry' ].apply (lambda geom : geom .x if geom .geom_type == 'Point' else None )
113- df ['lat' ] = df ['geometry' ].apply (lambda geom : geom .y if geom .geom_type == 'Point' else None )
114- sorted_df = df .sort_values (by = ['captured_at' ])
125+ df ["long" ] = df ["geometry" ].apply (
126+ lambda geom : geom .x if geom .geom_type == "Point" else None
127+ )
128+ df ["lat" ] = df ["geometry" ].apply (
129+ lambda geom : geom .y if geom .geom_type == "Point" else None
130+ )
131+ sorted_df = df .sort_values (by = ["captured_at" ])
115132
116133 sampled_sequence_df = pd .DataFrame ()
117134
118135 # loop through each sequence
119- for sequence in sorted_df ['sequence_id' ].unique ():
120- sequence_df = sorted_df [sorted_df ['sequence_id' ] == sequence ]
121-
122- filtered_sorted_sub_df = filter_points (sequence_df ,interval_length )
123- sampled_sequence_df = pd .concat ([sampled_sequence_df ,filtered_sorted_sub_df ],axis = 0 )
124-
136+ for sequence in sorted_df ["sequence_id" ].unique ():
137+ sequence_df = sorted_df [sorted_df ["sequence_id" ] == sequence ]
125138
139+ filtered_sorted_sub_df = filter_points (sequence_df , interval_length )
140+ sampled_sequence_df = pd .concat (
141+ [sampled_sequence_df , filtered_sorted_sub_df ], axis = 0
142+ )
126143
127144 return sampled_sequence_df
0 commit comments