Skip to content

Commit b4d5f7e

Browse files
committed
MB-57888: Added analyser and datetime parser checks
- Test case coverage for the same - Better loading and storing from bolt
1 parent 402bf8e commit b4d5f7e

File tree

4 files changed

+496
-110
lines changed

4 files changed

+496
-110
lines changed

index/scorch/persister.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
874874
rv.segment = append(rv.segment, segmentSnapshot)
875875
rv.offsets = append(rv.offsets, running)
876876
if segmentSnapshot.updatedFields != nil {
877-
rv.updatedFields = segmentSnapshot.updatedFields
877+
rv.MergeUpdateFieldsInfo(segmentSnapshot.updatedFields)
878878
}
879879
running += segmentSnapshot.segment.Count()
880880
}
@@ -936,6 +936,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
936936
for field, info := range updatedFields {
937937
rv.updatedFields[field] = &info
938938
}
939+
rv.UpdateFieldsInfo(rv.updatedFields)
939940
}
940941

941942
return rv, nil

index/scorch/snapshot_index.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,7 +1166,14 @@ func (is *IndexSnapshot) ThesaurusKeysRegexp(name string,
11661166
}
11671167

11681168
func (is *IndexSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) {
1169+
is.MergeUpdateFieldsInfo(updatedFields)
11691170

1171+
for _, segmentSnapshot := range is.segment {
1172+
segmentSnapshot.UpdateFieldsInfo(is.updatedFields)
1173+
}
1174+
}
1175+
1176+
func (is *IndexSnapshot) MergeUpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) {
11701177
if is.updatedFields == nil {
11711178
is.updatedFields = updatedFields
11721179
} else {
@@ -1181,9 +1188,4 @@ func (is *IndexSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.Update
11811188
}
11821189
}
11831190
}
1184-
1185-
for _, segmentSnapshot := range is.segment {
1186-
segmentSnapshot.UpdateFieldsInfo(updatedFields)
1187-
}
1188-
11891191
}

index_update.go

Lines changed: 136 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ package bleve
1616

1717
import (
1818
"fmt"
19+
"reflect"
1920

21+
"github.com/blevesearch/bleve/v2/analysis"
2022
"github.com/blevesearch/bleve/v2/mapping"
2123
index "github.com/blevesearch/bleve_index_api"
2224
)
@@ -33,16 +35,16 @@ type pathInfo struct {
3335
// Store the field information with respect to the
3436
// document paths
3537
type fieldMapInfo struct {
36-
fieldMapping *mapping.FieldMapping
37-
rootName string
38-
parent *pathInfo
38+
fieldMapping *mapping.FieldMapping
39+
analyzer string
40+
datetimeParser string
41+
rootName string
42+
parent *pathInfo
3943
}
4044

4145
// Store all of the changes to defaults
4246
type defaultInfo struct {
43-
analyzer bool
44-
dateTimeParser bool
45-
synonymSource bool
47+
synonymSource bool
4648
}
4749

4850
// Compare two index mappings to identify all of the updatable changes
@@ -87,6 +89,18 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.Update
8789
}
8890
addPathInfo(updPaths, "", upd.DefaultMapping, ori, nil, "")
8991

92+
// Compare all analysers currently in use
93+
err = compareAnalysers(oriPaths, updPaths, ori, upd)
94+
if err != nil {
95+
return nil, err
96+
}
97+
98+
// Compare all datetime parsers currently in use
99+
err = compareDateTimeParsers(oriPaths, updPaths, ori, upd)
100+
if err != nil {
101+
return nil, err
102+
}
103+
90104
// Compare both the mappings based on the document paths
91105
// and create a list of index, docvalues, store differences
92106
// for every single field possible
@@ -131,14 +145,6 @@ func compareMappings(ori, upd *mapping.IndexMappingImpl) (*defaultInfo, error) {
131145
return nil, fmt.Errorf("default type cannot be changed")
132146
}
133147

134-
if ori.DefaultAnalyzer != upd.DefaultAnalyzer {
135-
rv.analyzer = true
136-
}
137-
138-
if ori.DefaultDateTimeParser != upd.DefaultDateTimeParser {
139-
rv.dateTimeParser = true
140-
}
141-
142148
if ori.DefaultSynonymSource != upd.DefaultSynonymSource {
143149
rv.synonymSource = true
144150
}
@@ -260,6 +266,107 @@ func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMa
260266
paths[name] = pInfo
261267
}
262268

269+
func compareAnalysers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error {
270+
271+
oriAnalyzers := make(map[string]interface{})
272+
updAnalyzers := make(map[string]interface{})
273+
oriCustomAnalysers := ori.CustomAnalysis.Analyzers
274+
updCustomAnalysers := upd.CustomAnalysis.Analyzers
275+
276+
for path, info := range oriPaths {
277+
if len(info.fieldMapInfo) == 0 {
278+
continue
279+
}
280+
for _, fInfo := range info.fieldMapInfo {
281+
if fInfo.fieldMapping.Type == "text" {
282+
analyzerName := ori.AnalyzerNameForPath(path)
283+
fInfo.analyzer = analyzerName
284+
if val, ok := oriCustomAnalysers[analyzerName]; ok {
285+
oriAnalyzers[analyzerName] = val
286+
}
287+
}
288+
}
289+
}
290+
291+
for path, info := range updPaths {
292+
if len(info.fieldMapInfo) == 0 {
293+
continue
294+
}
295+
for _, fInfo := range info.fieldMapInfo {
296+
if fInfo.fieldMapping.Type == "text" {
297+
analyzerName := upd.AnalyzerNameForPath(path)
298+
fInfo.analyzer = analyzerName
299+
if val, ok := updCustomAnalysers[analyzerName]; ok {
300+
updAnalyzers[analyzerName] = val
301+
}
302+
}
303+
}
304+
}
305+
306+
for name, anUpd := range updAnalyzers {
307+
if anOri, ok := oriAnalyzers[name]; ok {
308+
if !reflect.DeepEqual(anUpd, anOri) {
309+
return fmt.Errorf("analyser %s changed while being used by fields", name)
310+
}
311+
} else {
312+
return fmt.Errorf("analyser %s newly added to an existing field", name)
313+
}
314+
}
315+
316+
return nil
317+
}
318+
319+
func compareDateTimeParsers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error {
320+
321+
oriDateTimeParsers := make(map[string]analysis.DateTimeParser)
322+
updDateTimeParsers := make(map[string]analysis.DateTimeParser)
323+
324+
for _, info := range oriPaths {
325+
if len(info.fieldMapInfo) == 0 {
326+
continue
327+
}
328+
for _, fInfo := range info.fieldMapInfo {
329+
if fInfo.fieldMapping.Type == "datetime" {
330+
if fInfo.fieldMapping.DateFormat == "" {
331+
fInfo.datetimeParser = ori.DefaultDateTimeParser
332+
oriDateTimeParsers[ori.DefaultDateTimeParser] = ori.DateTimeParserNamed(ori.DefaultDateTimeParser)
333+
} else {
334+
oriDateTimeParsers[fInfo.fieldMapping.DateFormat] = ori.DateTimeParserNamed(fInfo.fieldMapping.DateFormat)
335+
}
336+
}
337+
}
338+
}
339+
340+
for _, info := range updPaths {
341+
if len(info.fieldMapInfo) == 0 {
342+
continue
343+
}
344+
for _, fInfo := range info.fieldMapInfo {
345+
if fInfo.fieldMapping.Type == "datetime" {
346+
if fInfo.fieldMapping.DateFormat == "" {
347+
fInfo.datetimeParser = upd.DefaultDateTimeParser
348+
updDateTimeParsers[upd.DefaultDateTimeParser] = upd.DateTimeParserNamed(upd.DefaultDateTimeParser)
349+
} else {
350+
fInfo.datetimeParser = fInfo.fieldMapping.DateFormat
351+
updDateTimeParsers[fInfo.fieldMapping.DateFormat] = upd.DateTimeParserNamed(fInfo.fieldMapping.DateFormat)
352+
}
353+
}
354+
}
355+
}
356+
357+
for name, dtUpd := range updDateTimeParsers {
358+
if dtOri, ok := oriDateTimeParsers[name]; ok {
359+
if !reflect.DeepEqual(dtUpd, dtOri) {
360+
return fmt.Errorf("datetime parser %s changed while being used by fields", name)
361+
}
362+
} else {
363+
return fmt.Errorf("datetime parser %s added to an existing field", name)
364+
}
365+
}
366+
367+
return nil
368+
}
369+
263370
// Compare all of the fields at a particular document path and add its field information
264371
func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo, defaultChanges *defaultInfo) error {
265372

@@ -283,14 +390,28 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo, d
283390
} else {
284391
for _, oriFMapInfo := range ori.fieldMapInfo {
285392
var updFMap *mapping.FieldMapping
393+
var updAnalyser string
394+
var updDatetimeParser string
395+
286396
// For multiple fields at a single document path, compare
287397
// only with the matching ones
288398
for _, updFMapInfo := range upd.fieldMapInfo {
289399
if oriFMapInfo.rootName == updFMapInfo.rootName &&
290400
oriFMapInfo.fieldMapping.Name == updFMapInfo.fieldMapping.Name {
291401
updFMap = updFMapInfo.fieldMapping
402+
if updFMap.Type == "text" {
403+
updAnalyser = updFMapInfo.analyzer
404+
} else if updFMap.Type == "datetime" {
405+
updDatetimeParser = updFMapInfo.datetimeParser
406+
}
292407
}
293408
}
409+
if updAnalyser != "" && oriFMapInfo.analyzer != updAnalyser {
410+
return fmt.Errorf("analyser has been changed for a text field")
411+
}
412+
if updDatetimeParser != "" && oriFMapInfo.datetimeParser != updDatetimeParser {
413+
return fmt.Errorf("datetime parser has been changed for a text field")
414+
}
294415

295416
info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, updFMap, defaultChanges)
296417
if err != nil {
@@ -338,20 +459,16 @@ func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges
338459
if original.Type == "text" {
339460
if original.SynonymSource != updated.SynonymSource {
340461
return nil, false, fmt.Errorf("synonym source cannot be changed for text field")
341-
} else if original.SynonymSource == "inherit" && defaultChanges.synonymSource {
462+
} else if original.SynonymSource == "" && defaultChanges.synonymSource {
342463
return nil, false, fmt.Errorf("synonym source cannot be changed for possible inherited text field")
343464
}
344465
if original.Analyzer != updated.Analyzer {
345466
return nil, false, fmt.Errorf("analyzer cannot be updated for text fields")
346-
} else if original.Analyzer == "inherit" && defaultChanges.analyzer {
347-
return nil, false, fmt.Errorf("default analyzer changed for possible inherited text field")
348467
}
349468
}
350469
if original.Type == "datetime" {
351470
if original.DateFormat != updated.DateFormat {
352471
return nil, false, fmt.Errorf("dateFormat cannot be updated for datetime fields")
353-
} else if original.DateFormat == "inherit" && defaultChanges.dateTimeParser {
354-
return nil, false, fmt.Errorf("default analyzer changed for possible inherited text field")
355472
}
356473
}
357474
if original.Type == "vector" || original.Type == "vector_base64" {

0 commit comments

Comments
 (0)