-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexclude_nearby_bridges.py
More file actions
70 lines (52 loc) · 2.5 KB
/
exclude_nearby_bridges.py
File metadata and controls
70 lines (52 loc) · 2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
def load_bridge_info(csv_file):
"""Load bridge information CSV into a DataFrame."""
return pd.read_csv(csv_file)
def load_nearby_join(csv_file):
"""Load nearby join CSV into a DataFrame."""
return pd.read_csv(csv_file)
def filter_duplicates_and_output(bridge_df, join_df, output_csv):
"""Filter duplicates based on osm_similarity score and output filtered bridge info."""
filtered_df = join_df[
join_df["8 - Structure Number"] != join_df["8 - Structure Number_2"]
]
# Set to keep IDs that should be retained
remove_ids = set()
# Iterate over join_df with index to avoid modification during iteration
for index, row in filtered_df.iterrows():
sn1 = row["8 - Structure Number"]
sn2 = row["8 - Structure Number_2"]
if (sn1 not in remove_ids) and (sn2 not in remove_ids):
try:
# Retrieve osm_similarity scores
osm_similarity_sn1 = bridge_df.loc[
bridge_df["8 - Structure Number"] == sn1, "osm_similarity"
].values[0]
osm_similarity_sn2 = bridge_df.loc[
bridge_df["8 - Structure Number"] == sn2, "osm_similarity"
].values[0]
except IndexError:
# Handle the case where ID is not found in bridge_df
print(f"id {sn1} or {sn2} not found in bridge_df")
continue
# Determine which ID to retain based on osm_similarity score
if osm_similarity_sn1 > osm_similarity_sn2:
remove_ids.add(sn2)
elif osm_similarity_sn2 > osm_similarity_sn1:
remove_ids.add(sn1)
else:
remove_ids.add(sn2) # Arbitrarily keep sn1 if scores are equal
else:
continue
# Print set of IDs that are retained
print("IDs to be removed:", remove_ids)
# Filter bridge_df based on retain_ids and output to a new CSV
filtered_bridge_df = bridge_df[~bridge_df["8 - Structure Number"].isin(remove_ids)]
filtered_bridge_df.to_csv(output_csv, index=False)
print(f"Filtered bridge information saved to '{output_csv}'.")
def run(bridge_match_percentage, nearby_join_csv, final_bridges_csv):
# Load data
bridge_df = load_bridge_info(bridge_match_percentage)
join_df = load_nearby_join(nearby_join_csv)
# Filter duplicates based on osm_similarity score and output filtered bridge info
filter_duplicates_and_output(bridge_df, join_df, final_bridges_csv)