Skip to content

Commit 8fbc318

Browse files
committed
when deleting duplicates, always keep the oldest node (preserve history)
1 parent a02c3ca commit 8fbc318

File tree

7 files changed

+59
-8
lines changed

7 files changed

+59
-8
lines changed

src/__tests__/mock/linz-dump.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,4 @@ WKT,address_id,change_id,address_class,unit_value,address_number,address_number_
199199
,70242,,Road,,8,,,,,Oakleigh,,8,Side Street,,,,,,,,,,,174.242,-36.9
200200
,70243,,Road,,243,,,,,Oakleigh,,243,Example Street,,,,,,,,,,,174.243,-36.9
201201
,70244,,Road,,9,,,,,Oakleigh,,9,Side Street,,,,,,,,,,,174.244,-36.9
202+
,70245,,Road,,245,,,,,Oakleigh,,245,Example Street,,,,,,,,,,,174.245,-36.9

src/__tests__/mock/planet.xml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,6 +1148,20 @@
11481148

11491149
<!-- see ways for test case 243, which is the same as the above, but for a building -->
11501150

1151+
<!-- 2 nodes, exact duplicates. 246 is newer (i.e. the node ID is larger), so it should be deleted -->
1152+
<node id="245" version="1" changeset="1" lat="-36.9" lon="174.245" user="" uid="0" visible="true" timestamp="2012-05-19T11:22:33Z">
1153+
<tag k="addr:housenumber" v="245" />
1154+
<tag k="addr:street" v="Example Street" />
1155+
<tag k="addr:hamlet" v="Oakleigh" />
1156+
<tag k="ref:linz:address_id" v="70245" />
1157+
</node>
1158+
<node id="246" version="1" changeset="1" lat="-36.9" lon="174.246" user="" uid="0" visible="true" timestamp="2012-05-19T11:22:33Z">
1159+
<tag k="addr:housenumber" v="245" />
1160+
<tag k="addr:street" v="Example Street" />
1161+
<tag k="addr:hamlet" v="Oakleigh" />
1162+
<tag k="ref:linz:address_id" v="70245" />
1163+
</node>
1164+
11511165
<!-- <ways> -->
11521166
<way id="31" version="1" changeset="1" user="" uid="0" visible="true" timestamp="2012-05-19T11:22:33Z">
11531167
<nd ref="28" />

src/__tests__/snapshot/duplicate-linz-ref.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ Oakleigh
77
70137 ✅ exists on https://osm.org/node/137 and https://osm.org/way/141
88
70147 ❌ exists on https://osm.org/node/147 and https://osm.org/node/148
99
70149 ⚠️ exists on https://osm.org/node/149 and https://osm.org/node/150 and https://osm.org/node/151
10+
70245 ✅ exists on https://osm.org/node/245 and https://osm.org/node/246

src/__tests__/snapshot/index.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,9 +270,9 @@
270270
"url": "https://osm-nz.github.io/linz-address-import/suburbs/MergeduplicateaddressesAuckland_140dpit.osmPatch.geo.json",
271271
"name": "Merge duplicate addresses - Auckland",
272272
"title": "Merge duplicate addresses - Auckland",
273-
"totalCount": 3,
273+
"totalCount": 4,
274274
"source": "",
275-
"snippet": "delete 2, edit 1",
275+
"snippet": "delete 3, edit 1",
276276
"extent": [
277277
[
278278
174.1509,

src/__tests__/snapshot/stats.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"EXISTS_BUT_WRONG_DATA": 44,
66
"EXISTS_BUT_NO_LINZ_REF": 5,
77
"MULTIPLE_EXIST_BUT_NO_LINZ_REF": 1,
8-
"MULTIPLE_EXIST": 4,
8+
"MULTIPLE_EXIST": 5,
99
"EXISTS_BUT_LOCATION_WRONG": 5,
1010
"TOTALLY_MISSING": 38,
1111
"NEEDS_DELETE": 15,
@@ -16,5 +16,5 @@
1616
"NEEDS_DELETE_ON_BUILDING": 1,
1717
"REPLACED_BY_BUILDING": 1
1818
},
19-
"total": 166
19+
"total": 167
2020
}

src/__tests__/snapshot/suburbs/MergeduplicateaddressesAuckland_140dpit.osmPatch.geo.json

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,40 @@
103103
"properties": {
104104
"__action": "delete"
105105
}
106+
},
107+
{
108+
"type": "Feature",
109+
"id": "n246",
110+
"geometry": {
111+
"type": "Polygon",
112+
"coordinates": [
113+
[
114+
[
115+
174.2461,
116+
-36.8999
117+
],
118+
[
119+
174.2461,
120+
-36.90010000000001
121+
],
122+
[
123+
174.2459,
124+
-36.90010000000001
125+
],
126+
[
127+
174.2459,
128+
-36.8999
129+
],
130+
[
131+
174.2461,
132+
-36.8999
133+
]
134+
]
135+
]
136+
},
137+
"properties": {
138+
"__action": "delete"
139+
}
106140
}
107141
],
108142
"bbox": {

src/action/handlers/handleDuplicateLinzRef.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,10 @@ export async function handleDuplicateLinzRef(
2626
const features: GeoJsonFeature[] = [];
2727

2828
for (const [linzId, [linzAddr, osmAddrList]] of array) {
29-
const simpleNodes = osmAddrList.filter(
30-
(x) => x.osmId[0] === 'n' && !x.isNonTrivial,
31-
);
29+
const simpleNodes = osmAddrList
30+
.filter((x) => x.osmId[0] === 'n' && !x.isNonTrivial)
31+
// if there are duplicates, keep the oldest node (determined by the node ID)
32+
.sort((a, b) => +a.osmId.slice(1) - +b.osmId.slice(1));
3233

3334
// we can autofix this if some of the duplicates are simple nodes
3435

@@ -55,7 +56,7 @@ export async function handleDuplicateLinzRef(
5556
autofixable[linzId] = '✅';
5657
}
5758
} else if (simpleNodes.length) {
58-
// Either (a) all nodes are simple. Arbitrarily pick 1 to keep and delete the rest.
59+
// Either (a) all nodes are simple. Pick the oldest 1 to keep and delete the rest.
5960
// Or ( b) notall of the addresses are simple, so delete only the simple ones.
6061
const toDelete =
6162
simpleNodes.length === osmAddrList.length

0 commit comments

Comments
 (0)