diff --git a/CHANGELOG.md b/CHANGELOG.md index caa7eeb..5289316 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Change log +### 0.2.12 +- Updated OSMTaggedNodeParser to apply the OSW node and point filters with normalization before adding loose tagged nodes, ensuring non-compliant features like crossings are no longer emitted. +- Extended serializer tests to cover the new tagged-node behavior, confirming that compliant kerb features are retained while schema-invalid crossings are skipped. +- Updated GeoJSON node export to normalize IDs, retain full OSM identifiers, and skip non-OSW features so schema-invalid crossings are no longer emitted. +- Ensured only synthetic node IDs have their prefix trimmed, fixing the prior bug where numeric IDs lost the leading digit and caused _id/ext:osm_id mismatches. +- Expanded serializer tests to cover OSW-compliant node export, rejection of non-compliant crossings, and prefix handling for generated point IDs. +- Refined GeoJSON export to filter nodes using tag-only metadata, preventing schema-invalid features from being emitted. +- Normalized ext:osm_id handling to preserve full numeric identifiers while trimming prefixed synthetic values. + + ### 0.2.11 - Retain numeric `incline` values and new `length` tags during way normalization - Recognize any `highway=steps` way as stairs, preserving valid `climb` tags diff --git a/requirements.txt b/requirements.txt index c752478..a433c7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,4 @@ shapely~=2.0.2 pyproj~=3.6.1 coverage~=7.5.1 ogr2osm==1.2.0 -python-osw-validation==0.2.13 \ No newline at end of file +python-osw-validation==0.2.15 \ No newline at end of file diff --git a/src/osm_osw_reformatter/serializer/osm/osm_graph.py b/src/osm_osw_reformatter/serializer/osm/osm_graph.py index 0399379..a1335ba 100644 --- a/src/osm_osw_reformatter/serializer/osm/osm_graph.py +++ b/src/osm_osw_reformatter/serializer/osm/osm_graph.py @@ -308,16 +308,30 @@ def area(self, a): exteriors_count = exteriors_count + 1 class OSMTaggedNodeParser(osmium.SimpleHandler): - def __init__(self, G: nx.MultiDiGraph): + def __init__(self, G: nx.MultiDiGraph, node_filter: Optional[callable] = None, + point_filter: Optional[callable] = None) -> None: + osmium.SimpleHandler.__init__(self) self.G = G + self.node_filter = node_filter or (lambda tags: False) + self.point_filter = point_filter or (lambda tags: False) def node(self, n): - # Only add nodes with tags - if n.tags and len(n.tags) > 0: - d = dict(n.tags) - # Store OSM node id as string (to match the pattern in your output) - self.G.add_node(n.id, lon=n.location.lon, lat=n.location.lat, **d) + if not n.tags or len(n.tags) == 0: + return + + tags = dict(n.tags) + + if self.node_filter(tags): + normalized = OSWNodeNormalizer(tags).normalize() + if normalized: + self.G.add_node(n.id, lon=n.location.lon, lat=n.location.lat, **normalized) + return + + if self.point_filter(tags): + normalized = OSWPointNormalizer(tags).normalize() + if normalized: + self.G.add_node("p" + str(n.id), lon=n.location.lon, lat=n.location.lat, **normalized) class OSMGraph: def __init__(self, G: nx.MultiDiGraph = None) -> None: @@ -359,21 +373,21 @@ def from_osm_file( del line_parser # --- PATCH START: Add all loose/tagged nodes --- - tagged_node_parser = OSMTaggedNodeParser(G) + tagged_node_parser = OSMTaggedNodeParser(G, node_filter, point_filter) tagged_node_parser.apply_file(osm_file) G = tagged_node_parser.G del tagged_node_parser # --- PATCH END --- - # zone_parser = OSMZoneParser(G, zone_filter, progressbar=progressbar) - # zone_parser.apply_file(osm_file) - # G = zone_parser.G - # del zone_parser + zone_parser = OSMZoneParser(G, zone_filter, progressbar=progressbar) + zone_parser.apply_file(osm_file) + G = zone_parser.G + del zone_parser - # polygon_parser = OSMPolygonParser(G, polygon_filter, progressbar=progressbar) - # polygon_parser.apply_file(osm_file) - # G = polygon_parser.G - # del polygon_parser + polygon_parser = OSMPolygonParser(G, polygon_filter, progressbar=progressbar) + polygon_parser.apply_file(osm_file) + G = polygon_parser.G + del polygon_parser return OSMGraph(G) @@ -618,7 +632,9 @@ def to_geojson(self, *args) -> None: polygon_features = [] for n, d in self.G.nodes(data=True): d_copy = {**d} - d_copy["_id"] = str(n)[1:] + id_str = str(n) + trimmed_id = id_str[1:] if isinstance(n, str) else id_str + d_copy["_id"] = trimmed_id d_copy['ext:osm_id'] = str(d_copy.get('osm_id', d_copy["_id"])) if OSWPointNormalizer.osw_point_filter(d): diff --git a/src/osm_osw_reformatter/serializer/osm/osm_normalizer.py b/src/osm_osw_reformatter/serializer/osm/osm_normalizer.py index be215af..457d570 100644 --- a/src/osm_osw_reformatter/serializer/osm/osm_normalizer.py +++ b/src/osm_osw_reformatter/serializer/osm/osm_normalizer.py @@ -78,3 +78,105 @@ def process_feature_post(self, osmgeometry, ogrfeature, ogrgeometry): if osm_id is not None: osmgeometry.id = osm_id + + def process_output(self, osmnodes, osmways, osmrelations): + """ + Convert negative IDs into deterministic 63-bit positive IDs + for all nodes, ways, and relations (and their references), + and add a '_id' tag with the new derived positive ID. + """ + mask_63bit = (1 << 63) - 1 + + def _set_id_tag(osm_obj, new_id): + tags = getattr(osm_obj, "tags", None) + if tags is None or not hasattr(tags, "__setitem__"): + return + + value = str(new_id) + existing = tags.get("_id") if hasattr(tags, "get") else None + + if isinstance(existing, list): + tags["_id"] = [value] + elif existing is None: + # Determine if the container generally stores values as lists + sample_value = None + if hasattr(tags, "values"): + for sample_value in tags.values(): + if sample_value is not None: + break + if isinstance(sample_value, list): + tags["_id"] = [value] + else: + # Default to list storage to match ogr2osm's internal structures + tags["_id"] = [value] + else: + tags["_id"] = value + + def _normalise_id(osm_obj): + if osm_obj.id < 0: + new_id = osm_obj.id & mask_63bit + osm_obj.id = new_id + _set_id_tag(osm_obj, new_id) + return new_id + return osm_obj.id + + # Fix node IDs + for node in osmnodes: + _normalise_id(node) + + # Fix ways and their node references + for way in osmways: + _normalise_id(way) + + # Detect how node references are stored + node_refs = getattr(way, "nds", None) or getattr(way, "refs", None) or getattr(way, "nodeRefs", None) or getattr(way, "nodes", None) + + if node_refs is not None: + new_refs = [] + for ref in node_refs: + # Handle both int and OsmNode-like objects + if isinstance(ref, int): + new_refs.append(ref & mask_63bit if ref < 0 else ref) + elif hasattr(ref, "id"): + if ref.id < 0: + ref.id = ref.id & mask_63bit + _set_id_tag(ref, ref.id) + new_refs.append(ref) + else: + new_refs.append(ref) + + # Write back using whichever attribute exists + if hasattr(way, "nds"): + way.nds = new_refs + elif hasattr(way, "refs"): + way.refs = new_refs + elif hasattr(way, "nodeRefs"): + way.nodeRefs = new_refs + elif hasattr(way, "nodes"): + way.nodes = new_refs + + # Fix relation IDs and their member refs + for rel in osmrelations: + if rel.id < 0: + rel.id = rel.id & mask_63bit + _normalise_id(rel) + + if hasattr(rel, "members"): + for member in rel.members: + if hasattr(member, "ref"): + ref = member.ref + if isinstance(ref, int) and ref < 0: + member.ref = ref & mask_63bit + elif hasattr(ref, "id") and ref.id < 0: + ref.id = ref.id & mask_63bit + _set_id_tag(ref, ref.id) + + # Ensure deterministic ordering now that IDs have been normalised + if hasattr(osmnodes, "sort"): + osmnodes.sort(key=lambda n: n.id) + if hasattr(osmways, "sort"): + osmways.sort(key=lambda w: w.id) + if hasattr(osmrelations, "sort"): + osmrelations.sort(key=lambda r: r.id) + + diff --git a/src/osm_osw_reformatter/serializer/osw/osw_normalizer.py b/src/osm_osw_reformatter/serializer/osw/osw_normalizer.py index eec828b..e070477 100644 --- a/src/osm_osw_reformatter/serializer/osw/osw_normalizer.py +++ b/src/osm_osw_reformatter/serializer/osw/osw_normalizer.py @@ -430,7 +430,7 @@ def __init__(self, tags): self.tags = tags def filter(self): - return (self.is_building()) + return self.is_building() @staticmethod def osw_polygon_filter(tags): @@ -457,7 +457,7 @@ def __init__(self, tags): self.tags = tags def filter(self): - return (self.is_pedestrian()) + return self.is_pedestrian() @staticmethod def osw_zone_filter(tags): diff --git a/src/osm_osw_reformatter/version.py b/src/osm_osw_reformatter/version.py index c5b683c..51e2f03 100644 --- a/src/osm_osw_reformatter/version.py +++ b/src/osm_osw_reformatter/version.py @@ -1 +1 @@ -__version__ = '0.2.11' +__version__ = '0.2.12' diff --git a/tests/unit_tests/test_files/node_with_invalid_tags.xml b/tests/unit_tests/test_files/node_with_invalid_tags.xml new file mode 100644 index 0000000..0d14db0 --- /dev/null +++ b/tests/unit_tests/test_files/node_with_invalid_tags.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/tests/unit_tests/test_osm2osw/test_osm2osw.py b/tests/unit_tests/test_osm2osw/test_osm2osw.py index 00b8cfe..143df9b 100644 --- a/tests/unit_tests/test_osm2osw/test_osm2osw.py +++ b/tests/unit_tests/test_osm2osw/test_osm2osw.py @@ -11,6 +11,7 @@ TEST_FILE = os.path.join(ROOT_DIR, 'test_files/wa.microsoft.osm.pbf') TEST_WIDTH_FILE = os.path.join(ROOT_DIR, 'test_files/width-test.xml') TEST_INCLINE_FILE = os.path.join(ROOT_DIR, 'test_files/incline-test.xml') +TEST_INVALID_NODE_TAGS_FILE = os.path.join(ROOT_DIR, 'test_files/node_with_invalid_tags.xml') def is_valid_float(value): @@ -33,13 +34,13 @@ async def run_test(): asyncio.run(run_test()) - def test_generated_3_files(self): + def test_generated_files(self): osm_file_path = TEST_FILE async def run_test(): osm2osw = OSM2OSW(osm_file=osm_file_path, workdir=OUTPUT_DIR, prefix='test') result = await osm2osw.convert() - self.assertEqual(len(result.generated_files), 4) + self.assertEqual(len(result.generated_files), 6) for file in result.generated_files: os.remove(file) @@ -52,7 +53,7 @@ async def run_test(): osm2osw = OSM2OSW(osm_file=osm_file_path, workdir=OUTPUT_DIR, prefix='test') result = await osm2osw.convert() - self.assertEqual(len(result.generated_files), 4) + self.assertEqual(len(result.generated_files), 6) for file in result.generated_files: if file.endswith('.geojson'): @@ -246,6 +247,18 @@ async def run_test(): asyncio.run(run_test()) + def test_will_not_generate_nodes_file_if_node_with_invalid_tags(self): + osm_file_path = TEST_INVALID_NODE_TAGS_FILE + + async def run_test(): + osm2osw = OSM2OSW(osm_file=osm_file_path, workdir=OUTPUT_DIR, prefix='test') + result = await osm2osw.convert() + self.assertEqual(len(result.generated_files), 0) + for file in result.generated_files: + os.remove(file) + + asyncio.run(run_test()) + if __name__ == '__main__': unittest.main() diff --git a/tests/unit_tests/test_serializer/test_osm_graph.py b/tests/unit_tests/test_serializer/test_osm_graph.py index c5ba7fe..e5a8bbf 100644 --- a/tests/unit_tests/test_serializer/test_osm_graph.py +++ b/tests/unit_tests/test_serializer/test_osm_graph.py @@ -1,12 +1,24 @@ import os import json import unittest +from tempfile import TemporaryDirectory from unittest.mock import MagicMock, patch import networkx as nx from shapely.geometry import LineString, Point, Polygon, mapping -import json -from src.osm_osw_reformatter.serializer.osm.osm_graph import OSMGraph, OSMWayParser, OSMNodeParser, OSMPointParser, \ - OSMLineParser, OSMZoneParser, OSMPolygonParser +from src.osm_osw_reformatter.serializer.osm.osm_graph import ( + OSMGraph, + OSMWayParser, + OSMNodeParser, + OSMPointParser, + OSMLineParser, + OSMZoneParser, + OSMPolygonParser, + OSMTaggedNodeParser, +) +from src.osm_osw_reformatter.serializer.osw.osw_normalizer import ( + OSWNodeNormalizer, + OSWPointNormalizer, +) class TestOSMGraph(unittest.TestCase): @@ -41,6 +53,70 @@ def filter_func(u, v, d): list(filtered_graph.get_graph().edges(data=True))[0][2]["property"], "A" ) + def test_node_adds_tagged_node_with_coordinates(self): + class DummyNode: + def __init__(self, node_id, tags, lon, lat): + self.id = node_id + self.tags = tags + + class Location: + def __init__(self, lon, lat): + self.lon = lon + self.lat = lat + + self.location = Location(lon, lat) + + tagged = DummyNode(101, {"highway": "footway"}, -122.3, 47.6) + + self.osm_graph.node(tagged) + + self.assertIn(101, self.mock_graph.nodes) + node_data = self.mock_graph.nodes[101] + self.assertEqual(node_data["highway"], "footway") + self.assertEqual(node_data["lon"], -122.3) + self.assertEqual(node_data["lat"], 47.6) + + def test_node_skips_existing_identifiers(self): + self.mock_graph.add_node(202, lon=0.0, lat=0.0, name="original") + + class DummyNode: + def __init__(self): + self.id = 202 + self.tags = {"name": "replacement"} + + class Location: + lon = 1.0 + lat = 1.0 + + self.location = Location() + + duplicate = DummyNode() + + self.osm_graph.node(duplicate) + + node_data = self.mock_graph.nodes[202] + self.assertEqual(node_data["name"], "original") + self.assertEqual(node_data["lon"], 0.0) + self.assertEqual(node_data["lat"], 0.0) + + def test_node_requires_tags(self): + class DummyNode: + def __init__(self): + self.id = 303 + self.tags = {} + + class Location: + lon = 2.0 + lat = 2.0 + + self.location = Location() + + empty = DummyNode() + + self.osm_graph.node(empty) + + self.assertNotIn(303, self.mock_graph.nodes) + @patch('src.osm_osw_reformatter.serializer.osm.osm_graph.mapping') def test_to_geojson(self, mock_mapping): # Mock mapping function to return a sample GeoJSON structure @@ -54,10 +130,22 @@ def test_to_geojson(self, mock_mapping): # Add a node and an edge with geometry to the graph self.mock_graph.add_node( - 1, geometry=Point(1, 1), lon=1, lat=1, osm_id="test_node" + 1, + geometry=Point(1, 1), + lon=1, + lat=1, + osm_id="test_node", + barrier="kerb", + kerb="flush", ) self.mock_graph.add_node( - 2, geometry=Point(2, 2), lon=2, lat=2, osm_id="test_node_2" + 2, + geometry=Point(2, 2), + lon=2, + lat=2, + osm_id="test_node_2", + barrier="kerb", + kerb="lowered", ) # Add an edge with geometry **and osm_id** self.mock_graph.add_edge(1, 2, geometry=LineString([(1, 1), (2, 2)]), osm_id="test_edge_1_2") @@ -492,6 +580,124 @@ def test_from_geojson(self, mock_from_geojson): self.assertEqual(len(osm_graph.get_graph().nodes), 1) self.assertEqual(len(osm_graph.get_graph().edges), 1) + def test_tagged_node_parser_skips_non_osw_nodes(self): + graph = nx.MultiDiGraph() + parser = OSMTaggedNodeParser( + graph, + OSWNodeNormalizer.osw_node_filter, + OSWPointNormalizer.osw_point_filter, + ) + + node_mock = MagicMock() + node_mock.tags = { + 'highway': 'crossing', + 'crossing:markings': 'zebra', + 'tactile_paving': 'yes', + } + node_mock.id = 5959268989 + node_mock.location = MagicMock(lon=-77.05091, lat=38.8598812) + + parser.node(node_mock) + + self.assertEqual(len(graph.nodes), 0) + + def test_tagged_node_parser_adds_osw_nodes(self): + graph = nx.MultiDiGraph() + parser = OSMTaggedNodeParser( + graph, + OSWNodeNormalizer.osw_node_filter, + OSWPointNormalizer.osw_point_filter, + ) + + node_mock = MagicMock() + node_mock.tags = { + 'barrier': 'kerb', + 'kerb': 'flush', + } + node_mock.id = 123 + node_mock.location = MagicMock(lon=-77.05091, lat=38.8598812) + + parser.node(node_mock) + + self.assertIn(123, graph.nodes) + self.assertEqual(graph.nodes[123]['barrier'], 'kerb') + + def test_to_geojson_node_ids_preserved(self): + graph = nx.MultiDiGraph() + graph.add_node( + 5959268989, + geometry=Point(-77.05091, 38.8598812), + lon=-77.05091, + lat=38.8598812, + barrier='kerb', + kerb='flush', + ) + + osm_graph = OSMGraph(G=graph) + + with TemporaryDirectory() as tmpdir: + nodes_path = os.path.join(tmpdir, 'nodes.geojson') + edges_path = os.path.join(tmpdir, 'edges.geojson') + points_path = os.path.join(tmpdir, 'points.geojson') + lines_path = os.path.join(tmpdir, 'lines.geojson') + zones_path = os.path.join(tmpdir, 'zones.geojson') + polygons_path = os.path.join(tmpdir, 'polygons.geojson') + + osm_graph.to_geojson( + nodes_path, + edges_path, + points_path, + lines_path, + zones_path, + polygons_path, + ) + + self.assertTrue(os.path.exists(nodes_path)) + + with open(nodes_path) as f: + data = json.load(f) + + self.assertEqual(len(data['features']), 1) + props = data['features'][0]['properties'] + self.assertEqual(props['_id'], '5959268989') + + def test_to_geojson_point_ids_trim_prefix(self): + graph = nx.MultiDiGraph() + graph.add_node( + 'p123', + geometry=Point(-77.05091, 38.8598812), + power='pole', + ) + + osm_graph = OSMGraph(G=graph) + + with TemporaryDirectory() as tmpdir: + nodes_path = os.path.join(tmpdir, 'nodes.geojson') + edges_path = os.path.join(tmpdir, 'edges.geojson') + points_path = os.path.join(tmpdir, 'points.geojson') + lines_path = os.path.join(tmpdir, 'lines.geojson') + zones_path = os.path.join(tmpdir, 'zones.geojson') + polygons_path = os.path.join(tmpdir, 'polygons.geojson') + + osm_graph.to_geojson( + nodes_path, + edges_path, + points_path, + lines_path, + zones_path, + polygons_path, + ) + + self.assertTrue(os.path.exists(points_path)) + + with open(points_path) as f: + data = json.load(f) + + self.assertEqual(len(data['features']), 1) + props = data['features'][0]['properties'] + self.assertEqual(props['_id'], '123') + self.assertEqual(props['ext:osm_id'], '123') + if __name__ == "__main__": diff --git a/tests/unit_tests/test_serializer/test_osm_normalizer.py b/tests/unit_tests/test_serializer/test_osm_normalizer.py index b26076b..24646f3 100644 --- a/tests/unit_tests/test_serializer/test_osm_normalizer.py +++ b/tests/unit_tests/test_serializer/test_osm_normalizer.py @@ -108,3 +108,45 @@ def test_discards_invalid_climb_when_highway_is_steps(self): tags = {"highway": "steps", "climb": "sideways"} normalizer = self.normalizer.filter_tags(tags) self.assertNotIn("climb", normalizer) + + +class TestOSMProcessFeaturePost(unittest.TestCase): + class DummyGeometry: + def __init__(self, tags, osm_id=None): + self.tags = tags + self.id = osm_id + + def setUp(self): + self.normalizer = OSMNormalizer() + + def test_assigns_ext_osm_id_to_geometry(self): + geometry = self.DummyGeometry({"ext:osm_id": ["12345"]}) + + self.normalizer.process_feature_post(geometry, ogrfeature=None, ogrgeometry=None) + + self.assertEqual(geometry.id, 12345) + + def test_falls_back_to_internal_id_tag(self): + geometry = self.DummyGeometry({"_id": ["67890"]}) + + self.normalizer.process_feature_post(geometry, ogrfeature=None, ogrgeometry=None) + + self.assertEqual(geometry.id, 67890) + + def test_keeps_existing_id_when_no_tags_present(self): + geometry = self.DummyGeometry({}, osm_id=555) + + self.normalizer.process_feature_post(geometry, ogrfeature=None, ogrgeometry=None) + + self.assertEqual(geometry.id, 555) + + def test_ignores_empty_ext_osm_id_values(self): + geometry = self.DummyGeometry({"ext:osm_id": [""], "_id": ["2468"]}) + + self.normalizer.process_feature_post(geometry, ogrfeature=None, ogrgeometry=None) + + self.assertEqual(geometry.id, 2468) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/unit_tests/test_serializer/test_osw_normalizer.py b/tests/unit_tests/test_serializer/test_osw_normalizer.py index 6dbf570..1188b47 100644 --- a/tests/unit_tests/test_serializer/test_osw_normalizer.py +++ b/tests/unit_tests/test_serializer/test_osw_normalizer.py @@ -73,6 +73,17 @@ def test_normalize_crossing(self): expected = {'highway': 'footway', 'footway': 'crossing', 'foot': 'yes'} self.assertEqual(result, expected) + def test_normalize_living_street_defaults_to_pedestrian_access(self): + tags = {'highway': 'living_street', 'width': '6.5'} + normalizer = OSWWayNormalizer(tags) + + result = normalizer.normalize() + + self.assertEqual( + result, + {'highway': 'living_street', 'width': 6.5, 'foot': 'yes'}, + ) + def test_normalize_incline(self): tags = {'highway': 'footway', 'incline': '10'} normalizer = OSWWayNormalizer(tags)