Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Change log

### 0.2.12
- Updated OSMTaggedNodeParser to apply the OSW node and point filters with normalization before adding loose tagged nodes, ensuring non-compliant features like crossings are no longer emitted.
- Extended serializer tests to cover the new tagged-node behavior, confirming that compliant kerb features are retained while schema-invalid crossings are skipped.
- Updated GeoJSON node export to normalize IDs, retain full OSM identifiers, and skip non-OSW features so schema-invalid crossings are no longer emitted.
- Ensured only synthetic node IDs have their prefix trimmed, fixing the prior bug where numeric IDs lost the leading digit and caused _id/ext:osm_id mismatches.
- Expanded serializer tests to cover OSW-compliant node export, rejection of non-compliant crossings, and prefix handling for generated point IDs.
- Refined GeoJSON export to filter nodes using tag-only metadata, preventing schema-invalid features from being emitted.
- Normalized ext:osm_id handling to preserve full numeric identifiers while trimming prefixed synthetic values.


### 0.2.11
- Retain numeric `incline` values and new `length` tags during way normalization
- Recognize any `highway=steps` way as stairs, preserving valid `climb` tags
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ shapely~=2.0.2
pyproj~=3.6.1
coverage~=7.5.1
ogr2osm==1.2.0
python-osw-validation==0.2.13
python-osw-validation==0.2.15
48 changes: 32 additions & 16 deletions src/osm_osw_reformatter/serializer/osm/osm_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,16 +308,30 @@ def area(self, a):
exteriors_count = exteriors_count + 1

class OSMTaggedNodeParser(osmium.SimpleHandler):
def __init__(self, G: nx.MultiDiGraph):
def __init__(self, G: nx.MultiDiGraph, node_filter: Optional[callable] = None,
point_filter: Optional[callable] = None) -> None:

osmium.SimpleHandler.__init__(self)
self.G = G
self.node_filter = node_filter or (lambda tags: False)
self.point_filter = point_filter or (lambda tags: False)

def node(self, n):
# Only add nodes with tags
if n.tags and len(n.tags) > 0:
d = dict(n.tags)
# Store OSM node id as string (to match the pattern in your output)
self.G.add_node(n.id, lon=n.location.lon, lat=n.location.lat, **d)
if not n.tags or len(n.tags) == 0:
return

tags = dict(n.tags)

if self.node_filter(tags):
normalized = OSWNodeNormalizer(tags).normalize()
if normalized:
self.G.add_node(n.id, lon=n.location.lon, lat=n.location.lat, **normalized)
return

if self.point_filter(tags):
normalized = OSWPointNormalizer(tags).normalize()
if normalized:
self.G.add_node("p" + str(n.id), lon=n.location.lon, lat=n.location.lat, **normalized)

class OSMGraph:
def __init__(self, G: nx.MultiDiGraph = None) -> None:
Expand Down Expand Up @@ -359,21 +373,21 @@ def from_osm_file(
del line_parser

# --- PATCH START: Add all loose/tagged nodes ---
tagged_node_parser = OSMTaggedNodeParser(G)
tagged_node_parser = OSMTaggedNodeParser(G, node_filter, point_filter)
tagged_node_parser.apply_file(osm_file)
G = tagged_node_parser.G
del tagged_node_parser
# --- PATCH END ---

# zone_parser = OSMZoneParser(G, zone_filter, progressbar=progressbar)
# zone_parser.apply_file(osm_file)
# G = zone_parser.G
# del zone_parser
zone_parser = OSMZoneParser(G, zone_filter, progressbar=progressbar)
zone_parser.apply_file(osm_file)
G = zone_parser.G
del zone_parser

# polygon_parser = OSMPolygonParser(G, polygon_filter, progressbar=progressbar)
# polygon_parser.apply_file(osm_file)
# G = polygon_parser.G
# del polygon_parser
polygon_parser = OSMPolygonParser(G, polygon_filter, progressbar=progressbar)
polygon_parser.apply_file(osm_file)
G = polygon_parser.G
del polygon_parser

return OSMGraph(G)

Expand Down Expand Up @@ -618,7 +632,9 @@ def to_geojson(self, *args) -> None:
polygon_features = []
for n, d in self.G.nodes(data=True):
d_copy = {**d}
d_copy["_id"] = str(n)[1:]
id_str = str(n)
trimmed_id = id_str[1:] if isinstance(n, str) else id_str
d_copy["_id"] = trimmed_id
d_copy['ext:osm_id'] = str(d_copy.get('osm_id', d_copy["_id"]))

if OSWPointNormalizer.osw_point_filter(d):
Expand Down
102 changes: 102 additions & 0 deletions src/osm_osw_reformatter/serializer/osm/osm_normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,105 @@ def process_feature_post(self, osmgeometry, ogrfeature, ogrgeometry):

if osm_id is not None:
osmgeometry.id = osm_id

def process_output(self, osmnodes, osmways, osmrelations):
"""
Convert negative IDs into deterministic 63-bit positive IDs
for all nodes, ways, and relations (and their references),
and add a '_id' tag with the new derived positive ID.
"""
mask_63bit = (1 << 63) - 1

def _set_id_tag(osm_obj, new_id):
tags = getattr(osm_obj, "tags", None)
if tags is None or not hasattr(tags, "__setitem__"):
return

value = str(new_id)
existing = tags.get("_id") if hasattr(tags, "get") else None

if isinstance(existing, list):
tags["_id"] = [value]
elif existing is None:
# Determine if the container generally stores values as lists
sample_value = None
if hasattr(tags, "values"):
for sample_value in tags.values():
if sample_value is not None:
break
if isinstance(sample_value, list):
tags["_id"] = [value]
else:
# Default to list storage to match ogr2osm's internal structures
tags["_id"] = [value]
else:
tags["_id"] = value

def _normalise_id(osm_obj):
if osm_obj.id < 0:
new_id = osm_obj.id & mask_63bit
osm_obj.id = new_id
_set_id_tag(osm_obj, new_id)
return new_id
return osm_obj.id

# Fix node IDs
for node in osmnodes:
_normalise_id(node)

# Fix ways and their node references
for way in osmways:
_normalise_id(way)

# Detect how node references are stored
node_refs = getattr(way, "nds", None) or getattr(way, "refs", None) or getattr(way, "nodeRefs", None) or getattr(way, "nodes", None)

if node_refs is not None:
new_refs = []
for ref in node_refs:
# Handle both int and OsmNode-like objects
if isinstance(ref, int):
new_refs.append(ref & mask_63bit if ref < 0 else ref)
elif hasattr(ref, "id"):
if ref.id < 0:
ref.id = ref.id & mask_63bit
_set_id_tag(ref, ref.id)
new_refs.append(ref)
else:
new_refs.append(ref)

# Write back using whichever attribute exists
if hasattr(way, "nds"):
way.nds = new_refs
elif hasattr(way, "refs"):
way.refs = new_refs
elif hasattr(way, "nodeRefs"):
way.nodeRefs = new_refs
elif hasattr(way, "nodes"):
way.nodes = new_refs

# Fix relation IDs and their member refs
for rel in osmrelations:
if rel.id < 0:
rel.id = rel.id & mask_63bit
_normalise_id(rel)

if hasattr(rel, "members"):
for member in rel.members:
if hasattr(member, "ref"):
ref = member.ref
if isinstance(ref, int) and ref < 0:
member.ref = ref & mask_63bit
elif hasattr(ref, "id") and ref.id < 0:
ref.id = ref.id & mask_63bit
_set_id_tag(ref, ref.id)

# Ensure deterministic ordering now that IDs have been normalised
if hasattr(osmnodes, "sort"):
osmnodes.sort(key=lambda n: n.id)
if hasattr(osmways, "sort"):
osmways.sort(key=lambda w: w.id)
if hasattr(osmrelations, "sort"):
osmrelations.sort(key=lambda r: r.id)


4 changes: 2 additions & 2 deletions src/osm_osw_reformatter/serializer/osw/osw_normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ def __init__(self, tags):
self.tags = tags

def filter(self):
return (self.is_building())
return self.is_building()

@staticmethod
def osw_polygon_filter(tags):
Expand All @@ -457,7 +457,7 @@ def __init__(self, tags):
self.tags = tags

def filter(self):
return (self.is_pedestrian())
return self.is_pedestrian()

@staticmethod
def osw_zone_filter(tags):
Expand Down
2 changes: 1 addition & 1 deletion src/osm_osw_reformatter/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.2.11'
__version__ = '0.2.12'
7 changes: 7 additions & 0 deletions tests/unit_tests/test_files/node_with_invalid_tags.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?xml version='1.0' encoding='UTF-8'?>
<osm version='0.6' generator='JOSM'>
<node id='565516917' timestamp='2018-06-03T18:15:49Z' uid='5046269' user='Rich1234' visible='true' version='3' changeset='59515112' lat='38.8605033' lon='-77.0598865'>
<tag k='highway' v='traffic_signals' />
<tag k='source' v='survey' />
</node>
</osm>
19 changes: 16 additions & 3 deletions tests/unit_tests/test_osm2osw/test_osm2osw.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
TEST_FILE = os.path.join(ROOT_DIR, 'test_files/wa.microsoft.osm.pbf')
TEST_WIDTH_FILE = os.path.join(ROOT_DIR, 'test_files/width-test.xml')
TEST_INCLINE_FILE = os.path.join(ROOT_DIR, 'test_files/incline-test.xml')
TEST_INVALID_NODE_TAGS_FILE = os.path.join(ROOT_DIR, 'test_files/node_with_invalid_tags.xml')


def is_valid_float(value):
Expand All @@ -33,13 +34,13 @@ async def run_test():

asyncio.run(run_test())

def test_generated_3_files(self):
def test_generated_files(self):
osm_file_path = TEST_FILE

async def run_test():
osm2osw = OSM2OSW(osm_file=osm_file_path, workdir=OUTPUT_DIR, prefix='test')
result = await osm2osw.convert()
self.assertEqual(len(result.generated_files), 4)
self.assertEqual(len(result.generated_files), 6)
for file in result.generated_files:
os.remove(file)

Expand All @@ -52,7 +53,7 @@ async def run_test():
osm2osw = OSM2OSW(osm_file=osm_file_path, workdir=OUTPUT_DIR, prefix='test')
result = await osm2osw.convert()

self.assertEqual(len(result.generated_files), 4)
self.assertEqual(len(result.generated_files), 6)

for file in result.generated_files:
if file.endswith('.geojson'):
Expand Down Expand Up @@ -246,6 +247,18 @@ async def run_test():

asyncio.run(run_test())

def test_will_not_generate_nodes_file_if_node_with_invalid_tags(self):
osm_file_path = TEST_INVALID_NODE_TAGS_FILE

async def run_test():
osm2osw = OSM2OSW(osm_file=osm_file_path, workdir=OUTPUT_DIR, prefix='test')
result = await osm2osw.convert()
self.assertEqual(len(result.generated_files), 0)
for file in result.generated_files:
os.remove(file)

asyncio.run(run_test())


if __name__ == '__main__':
unittest.main()
Loading