Skip to content

Commit 7a674fa

Browse files
committed
feat(normalizers): add optional normalizer for keyword fields
1 parent 1098353 commit 7a674fa

File tree

7 files changed

+97
-19
lines changed

7 files changed

+97
-19
lines changed

integration/source_layer_sourceid_filtering.js

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,22 @@ module.exports.tests.source_filter = function(test, common){
5757
});
5858
});
5959

60+
// case insensitive
61+
suite.assert( function( done ){
62+
suite.client.search({
63+
index: suite.props.index,
64+
type: config.schema.typeName,
65+
body: { query: {
66+
term: {
67+
source: 'OSM'
68+
}
69+
}}
70+
}, function( err, res ){
71+
t.equal( res.hits.total, 2 );
72+
done();
73+
});
74+
});
75+
6076
// find all 'address' layers
6177
suite.assert( function( done ){
6278
suite.client.search({
@@ -104,22 +120,6 @@ module.exports.tests.source_filter = function(test, common){
104120
});
105121
});
106122

107-
// case sensitive
108-
suite.assert( function( done ){
109-
suite.client.search({
110-
index: suite.props.index,
111-
type: config.schema.typeName,
112-
body: { query: {
113-
term: {
114-
source: 'OSM'
115-
}
116-
}}
117-
}, function( err, res ){
118-
t.equal( res.hits.total, 0 );
119-
done();
120-
});
121-
});
122-
123123
// keyword analysis - no partial matching
124124
suite.assert( function( done ){
125125
suite.client.search({

mappings/partial/keyword.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
22
"type": "keyword",
3+
"normalizer": "peliasKeywordNormalizer",
34
"doc_values": false
45
}
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
{
2-
"type": "keyword"
2+
"type": "keyword",
3+
"normalizer": "peliasKeywordNormalizer"
34
}

settings.js

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,15 @@ function generate(){
3434
"pattern": "[\\s,/\\\\-]+"
3535
}
3636
},
37+
"normalizer": {
38+
"peliasKeywordNormalizer": {
39+
"type": "custom",
40+
"filter": [
41+
"lowercase",
42+
"icu_folding"
43+
]
44+
}
45+
},
3746
"analyzer": {
3847
"peliasAdmin": {
3948
"type": "custom",

test/compile.js

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,29 @@ module.exports.tests.analyzers = function (test, common) {
9999
});
100100
};
101101

102+
// note: this test is commented out for now because it's valid for some keyword
103+
// fields such as bounding_box and addendum to use the null normalizer, but it's
104+
// not easy to test because it's not possible to specify them as null in the mapping.
105+
106+
// ensure "normalizer" is set for keyword fields
107+
// module.exports.tests.normalizers = function (test, common) {
108+
// test('normalizers: ensure "normalizer" is set', function (t) {
109+
// const keywordFields = [];
110+
111+
// forEachDeep(schema, (value, key) => {
112+
// if (!_.isPlainObject(value)) { return; }
113+
// if (_.get(value, 'type', '') !== 'keyword') { return; }
114+
// keywordFields.push({ key: key, value: value });
115+
// });
116+
117+
// keywordFields.forEach(field => {
118+
// t.true(_.has(field.value, 'normalizer'), `normalizer not set on ${field.key}`)
119+
// })
120+
121+
// t.end();
122+
// });
123+
// };
124+
102125
// current schema (compiled) - requires schema to be copied and settings to
103126
// be regenerated from a fixture in order to pass in CI environments.
104127
module.exports.tests.current_schema = function(test, common) {

test/fixtures/expected.json

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@
1111
"pattern": "[\\s,/\\\\-]+"
1212
}
1313
},
14+
"normalizer": {
15+
"peliasKeywordNormalizer": {
16+
"type": "custom",
17+
"filter": [
18+
"lowercase",
19+
"icu_folding"
20+
]
21+
}
22+
},
1423
"analyzer": {
1524
"peliasAdmin": {
1625
"type": "custom",
@@ -575,10 +584,12 @@
575584
"doc": {
576585
"properties": {
577586
"source": {
578-
"type": "keyword"
587+
"type": "keyword",
588+
"normalizer": "peliasKeywordNormalizer"
579589
},
580590
"layer": {
581-
"type": "keyword"
591+
"type": "keyword",
592+
"normalizer": "peliasKeywordNormalizer"
582593
},
583594
"name": {
584595
"type": "object",
@@ -656,6 +667,7 @@
656667
},
657668
"continent_id": {
658669
"type": "keyword",
670+
"normalizer": "peliasKeywordNormalizer",
659671
"doc_values": false
660672
},
661673
"ocean": {
@@ -686,6 +698,7 @@
686698
},
687699
"ocean_id": {
688700
"type": "keyword",
701+
"normalizer": "peliasKeywordNormalizer",
689702
"doc_values": false
690703
},
691704
"empire": {
@@ -716,6 +729,7 @@
716729
},
717730
"empire_id": {
718731
"type": "keyword",
732+
"normalizer": "peliasKeywordNormalizer",
719733
"doc_values": false
720734
},
721735
"country": {
@@ -746,6 +760,7 @@
746760
},
747761
"country_id": {
748762
"type": "keyword",
763+
"normalizer": "peliasKeywordNormalizer",
749764
"doc_values": false
750765
},
751766
"dependency": {
@@ -776,6 +791,7 @@
776791
},
777792
"dependency_id": {
778793
"type": "keyword",
794+
"normalizer": "peliasKeywordNormalizer",
779795
"doc_values": false
780796
},
781797
"marinearea": {
@@ -806,6 +822,7 @@
806822
},
807823
"marinearea_id": {
808824
"type": "keyword",
825+
"normalizer": "peliasKeywordNormalizer",
809826
"doc_values": false
810827
},
811828
"macroregion": {
@@ -836,6 +853,7 @@
836853
},
837854
"macroregion_id": {
838855
"type": "keyword",
856+
"normalizer": "peliasKeywordNormalizer",
839857
"doc_values": false
840858
},
841859
"region": {
@@ -866,6 +884,7 @@
866884
},
867885
"region_id": {
868886
"type": "keyword",
887+
"normalizer": "peliasKeywordNormalizer",
869888
"doc_values": false
870889
},
871890
"macrocounty": {
@@ -896,6 +915,7 @@
896915
},
897916
"macrocounty_id": {
898917
"type": "keyword",
918+
"normalizer": "peliasKeywordNormalizer",
899919
"doc_values": false
900920
},
901921
"county": {
@@ -926,6 +946,7 @@
926946
},
927947
"county_id": {
928948
"type": "keyword",
949+
"normalizer": "peliasKeywordNormalizer",
929950
"doc_values": false
930951
},
931952
"locality": {
@@ -956,6 +977,7 @@
956977
},
957978
"locality_id": {
958979
"type": "keyword",
980+
"normalizer": "peliasKeywordNormalizer",
959981
"doc_values": false
960982
},
961983
"borough": {
@@ -986,6 +1008,7 @@
9861008
},
9871009
"borough_id": {
9881010
"type": "keyword",
1011+
"normalizer": "peliasKeywordNormalizer",
9891012
"doc_values": false
9901013
},
9911014
"localadmin": {
@@ -1016,6 +1039,7 @@
10161039
},
10171040
"localadmin_id": {
10181041
"type": "keyword",
1042+
"normalizer": "peliasKeywordNormalizer",
10191043
"doc_values": false
10201044
},
10211045
"neighbourhood": {
@@ -1046,6 +1070,7 @@
10461070
},
10471071
"neighbourhood_id": {
10481072
"type": "keyword",
1073+
"normalizer": "peliasKeywordNormalizer",
10491074
"doc_values": false
10501075
},
10511076
"postalcode": {
@@ -1074,6 +1099,7 @@
10741099
},
10751100
"postalcode_id": {
10761101
"type": "keyword",
1102+
"normalizer": "peliasKeywordNormalizer",
10771103
"doc_values": false
10781104
}
10791105
}
@@ -1090,10 +1116,12 @@
10901116
},
10911117
"source_id": {
10921118
"type": "keyword",
1119+
"normalizer": "peliasKeywordNormalizer",
10931120
"doc_values": false
10941121
},
10951122
"category": {
10961123
"type": "keyword",
1124+
"normalizer": "peliasKeywordNormalizer",
10971125
"doc_values": false
10981126
},
10991127
"population": {

test/settings.js

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,22 @@ module.exports.tests.analysis = function(test, common) {
4848
});
4949
};
5050

51+
// -- normalizers --
52+
53+
module.exports.tests.peliasKeywordNormalizer = function (test, common) {
54+
test('has pelias keyword normalizer', function (t) {
55+
var s = settings();
56+
t.equal(typeof s.analysis.normalizer.peliasKeywordNormalizer, 'object', 'there is a pelias keyword normalizer');
57+
var normalizer = s.analysis.normalizer.peliasKeywordNormalizer;
58+
t.equal(normalizer.type, 'custom', 'custom normalizer');
59+
t.deepEqual(normalizer.filter, [
60+
"lowercase",
61+
"icu_folding"
62+
]);
63+
t.end();
64+
});
65+
};
66+
5167
// -- analyzers --
5268

5369
module.exports.tests.peliasAdminAnalyzer = function(test, common) {

0 commit comments

Comments
 (0)