Skip to content

Commit 886074f

Browse files
authored
Update get_osm_data script (#241)
* Enchance OSM getter * Update OSM script
1 parent d7a7c40 commit 886074f

File tree

1 file changed

+73
-237
lines changed

1 file changed

+73
-237
lines changed

utils/get_osm_data.py

Lines changed: 73 additions & 237 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,10 @@
1515
from argparse import ArgumentParser
1616
import logging
1717
import osmnx as ox
18-
import networkx as nx
19-
import matplotlib.patches as mpatches
20-
import matplotlib.pyplot as plt
21-
import pandas as pd
22-
from shapely.geometry import MultiLineString, LineString
23-
from shapely.ops import linemerge
2418

25-
RGBA_RED = (1, 0, 0, 0.3)
19+
__version__ = "2025.1.16"
20+
21+
RGBA_RED = (1, 0, 0, 1)
2622
RGBA_WHITE = (1, 1, 1, 1)
2723

2824
FLAGS_MOTORWAY = ["motorway", "motorway_link"]
@@ -45,171 +41,6 @@
4541
"busway",
4642
]
4743

48-
49-
def merge_edges(
50-
graph: nx.DiGraph, previous_node: int, successive_node: int, node: int
51-
) -> dict:
52-
"""
53-
Merge two edges into a single edge.
54-
The function merges the edges into a single edge if the following conditions are met:
55-
- the name of the two edges is the same
56-
- the number of lanes is the same
57-
- the geometry of the two edges is contiguous
58-
- the coordinates of the previous_node and successive_node are in the geometry
59-
60-
Parameters:
61-
----------------
62-
graph (networkx.DiGraph): the graph
63-
previous_node (int): the previous node
64-
successive_node (int): the successive node
65-
node (int): the id of the node which will be removed
66-
67-
Returns:
68-
----------------
69-
dict: the new edge
70-
"""
71-
try:
72-
data_u = graph.get_edge_data(previous_node, node)[0]
73-
data_v = graph.get_edge_data(node, successive_node)[0]
74-
data_u.setdefault("lanes", 1)
75-
data_v.setdefault("lanes", 1)
76-
if (
77-
not (data_u["name"] in data_v["name"] or data_v["name"] in data_u["name"])
78-
or data_u["lanes"] != data_v["lanes"]
79-
):
80-
return None
81-
edge_uv = data_u.copy()
82-
# set length as the sum
83-
edge_uv["length"] = data_u["length"] + data_v["length"]
84-
edge_uv["lanes"] = int(data_u["lanes"])
85-
# merge also linestrings
86-
edge_uv["geometry"] = data_u["geometry"].union(data_v["geometry"])
87-
if isinstance(edge_uv["geometry"], MultiLineString):
88-
edge_uv["geometry"] = linemerge(edge_uv["geometry"])
89-
else:
90-
edge_uv["geometry"] = edge_uv["geometry"]
91-
if isinstance(edge_uv["geometry"], LineString):
92-
coords = list(edge_uv["geometry"].coords)
93-
else:
94-
# If it's still a MultiLineString,
95-
# handle it by iterating through its individual LineStrings
96-
coords = []
97-
for line in edge_uv["geometry"]:
98-
coords.extend(
99-
list(line.coords)
100-
) # Add coords from each individual LineString
101-
# take the list from coordinates of previous_node to coordinates of successive_node
102-
u_coords = (graph.nodes[previous_node]["x"], graph.nodes[previous_node]["y"])
103-
v_coords = (
104-
graph.nodes[successive_node]["x"],
105-
graph.nodes[successive_node]["y"],
106-
)
107-
if u_coords not in coords or v_coords not in coords:
108-
return None
109-
# cut coords from u_index to v_index
110-
edge_uv["geometry"] = LineString(
111-
coords[coords.index(u_coords) : coords.index(v_coords)]
112-
)
113-
except TypeError:
114-
# type error means that data_u or data_v cannot be created,
115-
# which means that the road is a one-way road
116-
# thus, skip the type error
117-
return None
118-
119-
return edge_uv
120-
121-
122-
def simplify_graph(graph_original: nx.DiGraph) -> nx.DiGraph:
123-
"""
124-
Simplify the graph by removing nodes that have only two neighborsand are actually the same
125-
street.
126-
The function merges the edges into a single edge.
127-
128-
Parameters:
129-
----------------
130-
graph_original (networkx.DiGraph): the graph to simplify
131-
132-
Returns:
133-
----------------
134-
networkx.DiGraph: the simplified graph
135-
"""
136-
graph = graph_original.copy()
137-
previous_nodes = 0
138-
while previous_nodes != len(graph.nodes):
139-
logging.info("New cycle: current_nodes=%d", len(graph.nodes))
140-
previous_nodes = len(graph.nodes)
141-
for node in graph.copy().nodes:
142-
# define neighborus as list of predecessors and successors
143-
neighbours = list(graph.predecessors(node)) + list(graph.successors(node))
144-
if (
145-
len(neighbours) != 2
146-
or graph.in_degree(node) != graph.out_degree(node)
147-
or graph.in_degree(node) > 2
148-
):
149-
continue
150-
u, v = neighbours
151-
if graph.has_edge(u, v):
152-
continue
153-
edge_uv = merge_edges(graph, u, v, node)
154-
edge_vu = merge_edges(graph, v, u, node)
155-
156-
if not (edge_uv is None and edge_vu is None):
157-
if edge_uv is not None:
158-
# print(f"Edges {u} -> {node} and {node} -> {v} can be merged.")
159-
graph.add_edge(
160-
u,
161-
v,
162-
length=edge_uv["length"],
163-
name=edge_uv["name"],
164-
geometry=edge_uv["geometry"],
165-
)
166-
# print(f"Added edge {graph.get_edge_data(u, v)}")
167-
if edge_vu is not None:
168-
# print(f"Edges {v} -> {node} and {node} -> {u} can be merged.")
169-
graph.add_edge(
170-
v,
171-
u,
172-
length=edge_vu["length"],
173-
name=edge_vu["name"],
174-
geometry=edge_vu["geometry"],
175-
)
176-
# print(f"Added edge {graph.get_edge_data(v, u)}")
177-
graph.remove_node(node)
178-
# print(f"Removed node {node}")
179-
180-
# Remove all nodes that are not in the giant component
181-
graph.remove_nodes_from(
182-
set(graph.nodes) - max(list(nx.weakly_connected_components(graph)), key=len)
183-
)
184-
# remove all self-loops
185-
graph.remove_edges_from(list(nx.selfloop_edges(graph)))
186-
# check if there are edges with same u and v. If true, keep only the one with the bigger lanes
187-
edges_to_remove = []
188-
seen_edges = {}
189-
190-
for u, v, data in graph.edges(data=True):
191-
lanes = data.get("lanes", 0)
192-
193-
if (u, v) not in seen_edges:
194-
seen_edges[(u, v)] = (lanes, None) # Store first edge and its lanes count
195-
else:
196-
existing_lanes, existing_edge = seen_edges[(u, v)]
197-
198-
if lanes > existing_lanes:
199-
edges_to_remove.append(
200-
existing_edge
201-
) # Remove the previous edge if the current one has more lanes
202-
seen_edges[(u, v)] = (lanes, (u, v)) # Update to keep current edge
203-
else:
204-
edges_to_remove.append(
205-
(u, v)
206-
) # Remove the current edge if it has fewer 'lanes'
207-
208-
graph.remove_edges_from(edges_to_remove)
209-
210-
return graph
211-
212-
21344
if __name__ == "__main__":
21445
parser = ArgumentParser("Script to get the OSM data of a place.")
21546
parser.add_argument(
@@ -220,12 +51,24 @@ def simplify_graph(graph_original: nx.DiGraph) -> nx.DiGraph:
22051
parser.add_argument(
22152
"--exclude-motorway",
22253
action="store_true",
223-
help="Exclude motorways from the data",
54+
help="Exclude motorways from the data. Default is False",
22455
)
22556
parser.add_argument(
22657
"--exclude-residential",
22758
action="store_true",
228-
help="Exclude residential roads from the data",
59+
help="Exclude residential roads from the data. Default is False",
60+
)
61+
parser.add_argument(
62+
"--allow-duplicates",
63+
action="store_true",
64+
help="Allow duplicated edges in the data. Default is False",
65+
)
66+
parser.add_argument(
67+
"-t",
68+
"--tolerance",
69+
type=int,
70+
default=20,
71+
help="Radius in meters to merge intersections. For more info, see osmnx documentation.",
22972
)
23073
parser = parser.parse_args()
23174
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
@@ -240,78 +83,46 @@ def simplify_graph(graph_original: nx.DiGraph) -> nx.DiGraph:
24083
logging.ERROR, f"\033[1;31m{logging.getLevelName(logging.ERROR)}\033[1;0m"
24184
)
24285

243-
# get the street network for San Cesario sul Panaro
244-
G_ALL = ox.graph_from_place(parser.place, network_type="drive")
86+
logging.info("Welcome to get_osm_data.py v%s", __version__)
87+
88+
# define CUSTOM_FILTER basing on FLAGS and args
89+
FLAGS = FLAGS_NORMAL
90+
if not parser.exclude_motorway:
91+
FLAGS += FLAGS_MOTORWAY
92+
if not parser.exclude_residential:
93+
FLAGS += FLAGS_RESIDENTIAL
94+
CUSTOM_FILTER = f"[\"highway\"~\"{'|'.join(FLAGS)}\"]"
95+
logging.info("Custom filter: %s", CUSTOM_FILTER)
96+
GRAPH = ox.graph_from_place(parser.place, network_type="drive")
97+
ox.plot_graph(GRAPH, show=False, close=True, save=True, filepath="./original.png")
24598
logging.info(
246-
"Graph created with %d nodes and %d edges.", len(G_ALL.nodes), len(G_ALL.edges)
247-
)
248-
249-
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G_ALL)
250-
gdf_edges["highway"] = gdf_edges["highway"].apply(
251-
lambda x: x[-1] if isinstance(x, list) else x
252-
)
253-
if "lanes" not in gdf_edges.columns:
254-
gdf_edges["lanes"] = 1
255-
gdf_edges["lanes"] = gdf_edges["lanes"].apply(
256-
lambda x: max(x) if isinstance(x, list) else 1 if pd.isna(x) else x
99+
"Original network has %d nodes and %d edges.",
100+
len(GRAPH.nodes),
101+
len(GRAPH.edges),
257102
)
258-
gdf_edges["name"] = gdf_edges["name"].apply(
259-
lambda x: " ".join(x) if isinstance(x, list) else " " if pd.isna(x) else x
103+
GRAPH = ox.graph_from_place(
104+
parser.place, network_type="drive", custom_filter=CUSTOM_FILTER
260105
)
261-
# gdf_edges = gdf_edges[~gdf_edges["access"].isin(["no", "private"])]
262-
263-
# Make a plot to visualize the removed links
264-
removed_patch = mpatches.Patch(color=RGBA_RED, label="Removed Nodes and Edges")
265-
266-
if parser.exclude_motorway:
267-
gdf_edges = gdf_edges[~gdf_edges["highway"].isin(FLAGS_MOTORWAY)]
268-
if parser.exclude_residential:
269-
gdf_edges = gdf_edges[~gdf_edges["highway"].isin(FLAGS_RESIDENTIAL)]
270-
271-
# rebuild the graph
272-
G = ox.graph_from_gdfs(gdf_nodes, gdf_edges)
273-
G.remove_nodes_from(list(nx.isolates(G)))
274106
logging.info(
275-
"Graph filtered: now it has %d nodes and %d edges.", len(G.nodes), len(G.edges)
107+
"Custom filtered graph has %d nodes and %d edges.",
108+
len(GRAPH.nodes),
109+
len(GRAPH.edges),
276110
)
277-
G = simplify_graph(G)
278-
logging.info(
279-
"Graph simplified: now it has %d nodes and %d edges.",
280-
len(G.nodes),
281-
len(G.edges),
111+
GRAPH = ox.consolidate_intersections(
112+
ox.project_graph(GRAPH), tolerance=parser.tolerance
282113
)
283-
# assert that graph has not isolated nodes
284-
assert not list(nx.isolates(G))
285-
# assert that graph has not self-loops
286-
assert not list(nx.selfloop_edges(G))
287-
288-
fig, ax = ox.plot_graph(
289-
G_ALL,
290-
node_color=[
291-
RGBA_RED if node not in G.nodes else RGBA_WHITE for node in G_ALL.nodes
292-
],
293-
edge_color=[
294-
RGBA_RED if edge not in G.edges else RGBA_WHITE for edge in G_ALL.edges
295-
],
296-
show=False,
297-
close=False,
114+
logging.info(
115+
"Consolidated graph has %d nodes and %d edges.",
116+
len(GRAPH.nodes),
117+
len(GRAPH.edges),
298118
)
299-
ax.legend(handles=[removed_patch])
300-
fig.set_size_inches(16, 9)
301-
plt.savefig("removed_nodes_and_edges.png")
302-
303-
# Plot resulting graph
304-
fig, ax = ox.plot_graph(G, show=False, close=False)
305-
fig.set_size_inches(16, 9)
306-
plt.savefig("final_graph.png")
307-
308-
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G)
119+
# plot graph on a 16x9 figure and save into file
120+
ox.plot_graph(GRAPH, show=False, close=True, save=True, filepath="./final.png")
121+
gdf_nodes, gdf_edges = ox.graph_to_gdfs(ox.project_graph(GRAPH, to_latlong=True))
309122
# notice that osmnid is the index of the gdf_nodes DataFrame, so take it as a column
310123
gdf_nodes.reset_index(inplace=True)
311124
gdf_edges.reset_index(inplace=True)
312125

313-
# assert that there are no edges with the same u and v
314-
assert not gdf_edges.duplicated(subset=["u", "v"]).any()
315126
# Prepare node dataframe
316127
gdf_nodes = gdf_nodes[["osmid", "x", "y", "highway"]]
317128
# Prepare edge dataframe
@@ -320,6 +131,31 @@ def simplify_graph(graph_original: nx.DiGraph) -> nx.DiGraph:
320131
gdf_edges = gdf_edges[
321132
["u", "v", "length", "oneway", "lanes", "highway", "maxspeed", "name"]
322133
]
134+
if parser.allow_duplicates:
135+
N_DUPLICATES = 0
136+
else:
137+
# Check for duplicate edges
138+
duplicated_mask = gdf_edges.duplicated(subset=["u", "v"])
139+
N_DUPLICATES = duplicated_mask.sum()
140+
141+
if N_DUPLICATES > 0:
142+
logging.warning(
143+
"There are %d duplicated edges which will be removed. "
144+
"Please look at them in the promped plot.",
145+
N_DUPLICATES,
146+
)
147+
# Plot the graph with duplicated edges in red
148+
edge_colors = [
149+
RGBA_RED if duplicated_mask.iloc[i] else RGBA_WHITE
150+
for i in range(len(gdf_edges))
151+
]
152+
ox.plot_graph(GRAPH, edge_color=edge_colors)
153+
154+
# Remove duplicated edges
155+
gdf_edges = gdf_edges.drop_duplicates(subset=["u", "v"])
323156
# Save the data
324-
gdf_nodes.to_csv("nodes.csv", sep=";", index=False)
325-
gdf_edges.to_csv("edges.csv", sep=";", index=False)
157+
place = parser.place.split(",")[0].strip().lower()
158+
gdf_nodes.to_csv(f"{place}_nodes.csv", sep=";", index=False)
159+
logging.info('Nodes correctly saved in "%s_nodes.csv"', place)
160+
gdf_edges.to_csv(f"{place}_edges.csv", sep=";", index=False)
161+
logging.info('Edges correctly saved in "%s_edges.csv"', place)

0 commit comments

Comments
 (0)