fix for duplicate documents sharing the same ID#552
Conversation
|
Example of two versions of the same document being emitted from the document stream: {
_index: 'pelias',
_id: 'whosonfirst:postalcode:454977637',
data: {
name: { default: [Array] },
phrase: { default: [Array] },
parent: {
country: [Array],
country_id: [Array],
country_a: [Array],
country_source: [Array],
county: [Array],
county_id: [Array],
county_a: [Array],
county_source: [Array],
locality: [Array],
locality_id: [Array],
locality_a: [Array],
locality_source: [Array],
macrocounty: [Array],
macrocounty_id: [Array],
macrocounty_a: [Array],
macrocounty_source: [Array],
macroregion: [Array],
macroregion_id: [Array],
macroregion_a: [Array],
macroregion_source: [Array],
region: [Array],
region_id: [Array],
region_a: [Array],
region_source: [Array],
postalcode: [Array],
postalcode_id: [Array],
postalcode_a: [Array],
postalcode_source: [Array]
},
center_point: { lon: -2.79291, lat: 54.053957 },
source: 'whosonfirst',
layer: 'postalcode',
source_id: '454977637',
bounding_box: '{"min_lat":54.053957,"max_lat":54.053957,"min_lon":-2.79291,"max_lon":-2.79291}',
popularity: 9000,
addendum: { concordances: '{"gp:id":"27457509"}' }
}
}{
_index: 'pelias',
_id: 'whosonfirst:postalcode:454977637',
data: {
name: { default: [Array] },
phrase: { default: [Array] },
parent: {
country: [Array],
country_id: [Array],
country_a: [Array],
country_source: [Array],
postalcode: [Array],
postalcode_id: [Array],
postalcode_a: [Array],
postalcode_source: [Array]
},
center_point: { lon: -2.79291, lat: 54.053957 },
source: 'whosonfirst',
layer: 'postalcode',
source_id: '454977637',
bounding_box: '{"min_lat":54.053957,"max_lat":54.053957,"min_lon":-2.79291,"max_lon":-2.79291}',
popularity: 9000,
addendum: { concordances: '{"gp:id":"27457509"}' }
}
} |
|
|
||
| try { | ||
| if (hierarchies && hierarchies.length > 0) { | ||
| hierarchies.forEach(function(hierarchy) { |
There was a problem hiding this comment.
This is the major change
There was a problem hiding this comment.
I verified via a console.log that this code is not called by the PIP service. So it should only affect actual whosonfirst records in Elasticsearch, not admin lookup for any other records like addresses. 👍
a very old PR introduced functionality to generate multiple versions of the same document for each hierarchy present. this results in the document being overridden in elasticsearch with the latter. reverts #87
1fdf8e3 to
272ccf4
Compare
orangejulius
left a comment
There was a problem hiding this comment.
Looks good! This is gonna make things cleaner and likely reduce our build sizes slightly as a perk.
| module.exports = (parentRecords) => { | ||
| return (wofRecord) => { | ||
| return wofRecord.hierarchies.map(hierarchy => { | ||
| return Object.values(hierarchy) | ||
| .map(parentId => parentRecords[parentId]) | ||
| .filter(Boolean) | ||
| .filter(r => r.name); | ||
| }); |
There was a problem hiding this comment.
Very nice cleanup here. Modern JS and easier to understand as well.
|
|
||
| try { | ||
| if (hierarchies && hierarchies.length > 0) { | ||
| hierarchies.forEach(function(hierarchy) { |
There was a problem hiding this comment.
I verified via a console.log that this code is not called by the PIP service. So it should only affect actual whosonfirst records in Elasticsearch, not admin lookup for any other records like addresses. 👍
| var isDefined = function(r) { | ||
| return r; | ||
| }; |
a very old PR introduced functionality to generate multiple versions of the same document for each hierarchy present. However, all the documents used the same ID in Elasticsearch. This results in all document being overwritten in Elasticsearch with the document with the last hierarchy.
While there isn't always a strong preferred order, generally we want the first hierarchy.
reverts #87