Merge branch 'feature/street' of https://github.com/mapswipe/python-mapswipe-workers into feature/street

Gigaszi · Gigaszi · commit 81d44ede20f9 · 2024-11-28T15:09:04.000+01:00
diff --git a/mapswipe_workers/mapswipe_workers/project_types/street/project.py b/mapswipe_workers/mapswipe_workers/project_types/street/project.py
@@ -21,7 +21,7 @@
     build_multipolygon_from_layer_geometries,
     check_if_layer_has_too_many_geometries,
     save_geojson_to_file,
-    multipolygon_to_wkt
+    multipolygon_to_wkt,
 )
 from mapswipe_workers.project_types.project import BaseProject, BaseTask, BaseGroup
 from mapswipe_workers.utils.process_mapillary import get_image_metadata
@@ -56,7 +56,6 @@ def __init__(self, project_draft):
             sampling_threshold=project_draft.get("samplingThreshold", None),
         )
 
-
         self.imageIds = ImageMetadata["ids"]
         self.imageGeometries = ImageMetadata["geometries"]
 
@@ -83,7 +82,9 @@ def validate_geometries(self):
         self.inputGeometriesFileName = save_geojson_to_file(
             self.projectId, self.geometry
         )
-        layer, datasource = load_geojson_to_ogr(self.projectId, self.inputGeometriesFileName)
+        layer, datasource = load_geojson_to_ogr(
+            self.projectId, self.inputGeometriesFileName
+        )
 
         # check if inputs fit constraints
         check_if_layer_is_empty(self.projectId, layer)
@@ -97,7 +98,9 @@ def validate_geometries(self):
         del datasource
         del layer
 
-        logger.info(f"{self.projectId}" f" - validate geometry - " f"input geometry is correct.")
+        logger.info(
+            f"{self.projectId}" f" - validate geometry - " f"input geometry is correct."
+        )
         wkt_geometry = multipolygon_to_wkt(multi_polygon)
         return wkt_geometry
 
diff --git a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py
@@ -127,15 +127,26 @@ def coordinate_download(
             return pd.DataFrame(downloaded_metadata)
 
         target_columns = [
-            "id", "geometry", "captured_at", "is_pano", "compass_angle", "sequence", "organization_id"
+            "id",
+            "geometry",
+            "captured_at",
+            "is_pano",
+            "compass_angle",
+            "sequence",
+            "organization_id",
         ]
         for col in target_columns:
             if col not in downloaded_metadata.columns:
                 downloaded_metadata[col] = None
 
-        if downloaded_metadata.isna().all().all() == False or downloaded_metadata.empty == True:
+        if (
+            downloaded_metadata.isna().all().all() == False
+            or downloaded_metadata.empty == True
+        ):
             downloaded_metadata = downloaded_metadata[
-                downloaded_metadata['geometry'].apply(lambda point: point.within(polygon))
+                downloaded_metadata["geometry"].apply(
+                    lambda point: point.within(polygon)
+                )
             ]
 
         return downloaded_metadata
@@ -187,9 +198,7 @@ def filter_results(
     df = results_df.copy()
     if is_pano is not None:
         if df["is_pano"].isna().all():
-            logger.exception(
-                "No Mapillary Feature in the AoI has a 'is_pano' value."
-            )
+            logger.exception("No Mapillary Feature in the AoI has a 'is_pano' value.")
             return None
         df = df[df["is_pano"] == is_pano]
 
@@ -220,25 +229,28 @@ def get_image_metadata(
     organization_id: str = None,
     start_time: str = None,
     end_time: str = None,
-    sampling_threshold = None,
+    sampling_threshold=None,
 ):
     aoi_polygon = geojson_to_polygon(aoi_geojson)
-    downloaded_metadata = coordinate_download(
-        aoi_polygon, level, attempt_limit
-    )
+    downloaded_metadata = coordinate_download(aoi_polygon, level, attempt_limit)
     downloaded_metadata = downloaded_metadata[
-        downloaded_metadata['geometry'].apply(lambda geom: isinstance(geom, Point))
+        downloaded_metadata["geometry"].apply(lambda geom: isinstance(geom, Point))
     ]
 
     downloaded_metadata = filter_results(
         downloaded_metadata, is_pano, organization_id, start_time, end_time
     )
     if sampling_threshold is not None:
         downloaded_metadata = spatial_sampling(downloaded_metadata, sampling_threshold)
-    if downloaded_metadata.isna().all().all() == False or downloaded_metadata.empty == False:
+    if (
+        downloaded_metadata.isna().all().all() == False
+        or downloaded_metadata.empty == False
+    ):
         if len(downloaded_metadata) > 100000:
-            err = (f"Too many Images with selected filter "
-                   f"options for the AoI: {len(downloaded_metadata)}")
+            err = (
+                f"Too many Images with selected filter "
+                f"options for the AoI: {len(downloaded_metadata)}"
+            )
             raise ValueError(err)
         else:
             return {
diff --git a/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py b/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py
@@ -3,6 +3,7 @@
 from shapely import wkt
 from shapely.geometry import Point
 
+
 def distance_on_sphere(p1, p2):
     """
     p1 and p2 are two lists that have two elements. They are numpy arrays of the long and lat
@@ -30,13 +31,19 @@ def distance_on_sphere(p1, p2):
     delta_lat = p2[1] - p1[1]
     delta_long = p2[0] - p1[0]
 
-    a = np.sin(delta_lat / 2) ** 2 + np.cos(p1[1]) * np.cos(p2[1]) * np.sin(delta_long / 2) ** 2
+    a = (
+        np.sin(delta_lat / 2) ** 2
+        + np.cos(p1[1]) * np.cos(p2[1]) * np.sin(delta_long / 2) ** 2
+    )
     c = 2 * np.arcsin(np.sqrt(a))
 
     distances = earth_radius * c
     return distances
 
+
 """-----------------------------------Filtering Points------------------------------------------------"""
+
+
 def filter_points(df, threshold_distance):
     """
     Filter points from a DataFrame based on a threshold distance.
@@ -61,31 +68,37 @@ def filter_points(df, threshold_distance):
     lat = df["lat"].to_numpy()
     long = df["long"].to_numpy()
 
-
-    distances = distance_on_sphere([long[1:],lat[1:]],
-                                   [long[:-1],lat[:-1]])
+    distances = distance_on_sphere([long[1:], lat[1:]], [long[:-1], lat[:-1]])
     road_length = np.sum(distances)
 
-    #save the last point if the road segment is relavitely small (< 2*road_length)
+    # save the last point if the road segment is relavitely small (< 2*road_length)
     if threshold_distance <= road_length < 2 * threshold_distance:
         mask[-1] = True
 
     accumulated_distance = 0
     for i, distance in enumerate(distances):
         accumulated_distance += distance
         if accumulated_distance >= threshold_distance:
-            mask[i+1] = True
+            mask[i + 1] = True
             accumulated_distance = 0  # Reset accumulated distance
 
     to_be_returned_df = df[mask]
     # since the last point has to be omitted in the vectorized distance calculation, it is being checked manually
     p2 = to_be_returned_df.iloc[0]
-    distance = distance_on_sphere([float(p2["long"]),float(p2["lat"])],[long[-1],lat[-1]])
-
-    #last point will be added if it suffices the length condition
-    #last point will be added in case there is only one point returned
-    if distance >= threshold_distance or len(to_be_returned_df) ==1:
-        to_be_returned_df = pd.concat([to_be_returned_df,pd.DataFrame(df.iloc[-1],columns=to_be_returned_df.columns)],axis=0)
+    distance = distance_on_sphere(
+        [float(p2["long"]), float(p2["lat"])], [long[-1], lat[-1]]
+    )
+
+    # last point will be added if it suffices the length condition
+    # last point will be added in case there is only one point returned
+    if distance >= threshold_distance or len(to_be_returned_df) == 1:
+        to_be_returned_df = pd.concat(
+            [
+                to_be_returned_df,
+                pd.DataFrame(df.iloc[-1], columns=to_be_returned_df.columns),
+            ],
+            axis=0,
+        )
     return to_be_returned_df
 
 
@@ -109,19 +122,23 @@ def spatial_sampling(df, interval_length):
     if len(df) == 1:
         return df
 
-    df['long'] = df['geometry'].apply(lambda geom: geom.x if geom.geom_type == 'Point' else None)
-    df['lat'] = df['geometry'].apply(lambda geom: geom.y if geom.geom_type == 'Point' else None)
-    sorted_df = df.sort_values(by=['captured_at'])
+    df["long"] = df["geometry"].apply(
+        lambda geom: geom.x if geom.geom_type == "Point" else None
+    )
+    df["lat"] = df["geometry"].apply(
+        lambda geom: geom.y if geom.geom_type == "Point" else None
+    )
+    sorted_df = df.sort_values(by=["captured_at"])
 
     sampled_sequence_df = pd.DataFrame()
 
     # loop through each sequence
-    for sequence in sorted_df['sequence_id'].unique():
-        sequence_df = sorted_df[sorted_df['sequence_id'] == sequence]
-
-        filtered_sorted_sub_df = filter_points(sequence_df,interval_length)
-        sampled_sequence_df = pd.concat([sampled_sequence_df,filtered_sorted_sub_df],axis=0)
-
+    for sequence in sorted_df["sequence_id"].unique():
+        sequence_df = sorted_df[sorted_df["sequence_id"] == sequence]
 
+        filtered_sorted_sub_df = filter_points(sequence_df, interval_length)
+        sampled_sequence_df = pd.concat(
+            [sampled_sequence_df, filtered_sorted_sub_df], axis=0
+        )
 
     return sampled_sequence_df