From ab40135056885d933bfd4beda349629d96b2e0bd Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 26 Nov 2024 13:27:59 +0530 Subject: [PATCH 01/25] MB-57888: Index Update --- index/scorch/persister.go | 35 +++- index/scorch/scorch.go | 71 +++++++ index/scorch/snapshot_index.go | 28 ++- index/scorch/snapshot_segment.go | 13 +- index_impl.go | 37 ++++ index_update.go | 315 +++++++++++++++++++++++++++++++ 6 files changed, 487 insertions(+), 12 deletions(-) create mode 100644 index_update.go diff --git a/index/scorch/persister.go b/index/scorch/persister.go index 3aca020de..43eb75e76 100644 --- a/index/scorch/persister.go +++ b/index/scorch/persister.go @@ -738,6 +738,18 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string, return nil, nil, err } } + + // store updated field info + if segmentSnapshot.updatedFields != nil { + b, err := json.Marshal(segmentSnapshot.updatedFields) + if err != nil { + return nil, nil, err + } + err = snapshotSegmentBucket.Put(boltUpdatedFieldsKey, b) + if err != nil { + return nil, nil, err + } + } } return filenames, newSegmentPaths, nil @@ -842,6 +854,7 @@ var ( boltMetaDataSegmentVersionKey = []byte("version") boltMetaDataTimeStamp = []byte("timeStamp") boltStatsKey = []byte("stats") + boltUpdatedFieldsKey = []byte("fields") TotBytesWrittenKey = []byte("TotBytesWritten") ) @@ -990,6 +1003,9 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { } rv.segment = append(rv.segment, segmentSnapshot) rv.offsets = append(rv.offsets, running) + if segmentSnapshot.updatedFields != nil { + rv.updatedFields = segmentSnapshot.updatedFields + } running += segmentSnapshot.segment.Count() } } @@ -1002,13 +1018,13 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro return nil, fmt.Errorf("segment path missing") } segmentPath := s.path + string(os.PathSeparator) + string(pathBytes) - segment, err := s.segPlugin.Open(segmentPath) + seg, err := s.segPlugin.Open(segmentPath) if err != nil { return nil, fmt.Errorf("error opening bolt segment: %v", err) } rv := &SegmentSnapshot{ - segment: segment, + segment: seg, cachedDocs: &cachedDocs{cache: nil}, cachedMeta: &cachedMeta{meta: nil}, } @@ -1018,7 +1034,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro r := bytes.NewReader(deletedBytes) _, err := deletedBitmap.ReadFrom(r) if err != nil { - _ = segment.Close() + _ = seg.Close() return nil, fmt.Errorf("error reading deleted bytes: %v", err) } if !deletedBitmap.IsEmpty() { @@ -1032,11 +1048,22 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro err := json.Unmarshal(statBytes, &statsMap) stats := &fieldStats{statMap: statsMap} if err != nil { - _ = segment.Close() + _ = seg.Close() return nil, fmt.Errorf("error reading stat bytes: %v", err) } rv.stats = stats } + updatedFieldBytes := segmentBucket.Get(boltUpdatedFieldsKey) + if updatedFieldBytes != nil { + var updatedFields map[string]index.FieldInfo + + err := json.Unmarshal(updatedFieldBytes, &updatedFields) + if err != nil { + _ = seg.Close() + return nil, fmt.Errorf("error reading updated field bytes: %v", err) + } + rv.updatedFields = updatedFields + } return rv, nil } diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 54dcb9274..5774b7137 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -17,6 +17,7 @@ package scorch import ( "encoding/json" "fmt" + "log" "os" "path/filepath" "sync" @@ -36,6 +37,8 @@ const Version uint8 = 2 var ErrClosed = fmt.Errorf("scorch closed") +var mappingInternalKey = []byte("_mapping") + type Scorch struct { nextSegmentID uint64 stats Stats @@ -940,3 +943,71 @@ func (s *Scorch) CopyReader() index.CopyReader { func (s *Scorch) FireIndexEvent() { s.fireEvent(EventKindIndexStart, 0) } + +func (s *Scorch) UpdateFields(fieldInfo map[string]*index.FieldInfo, mappingBytes []byte) error { + err := s.updateBolt(fieldInfo, mappingBytes) + if err != nil { + return err + } + return nil +} + +func (s *Scorch) updateBolt(fieldInfo map[string]*index.FieldInfo, mappingBytes []byte) error { + return s.rootBolt.Update(func(tx *bolt.Tx) error { + snapshots := tx.Bucket(boltSnapshotsBucket) + if snapshots == nil { + return nil + } + + c := snapshots.Cursor() + for k, _ := c.Last(); k != nil; k, _ = c.Prev() { + _, _, err := decodeUvarintAscending(k) + if err != nil { + log.Printf("unable to parse segment epoch %x, continuing", k) + continue + } + snapshot := snapshots.Bucket(k) + cc := snapshot.Cursor() + for kk, _ := cc.First(); kk != nil; kk, _ = c.Next() { + if k[0] == boltInternalKey[0] { + internalBucket := snapshot.Bucket(k) + if internalBucket == nil { + return fmt.Errorf("segment key, but bucket missing % x", k) + } + err = internalBucket.Put(mappingInternalKey, mappingBytes) + if err != nil { + return err + } + } else if k[0] != boltMetaDataKey[0] { + segmentBucket := snapshot.Bucket(k) + if segmentBucket == nil { + return fmt.Errorf("segment key, but bucket missing % x", k) + } + var updatedFields map[string]index.FieldInfo + updatedFieldBytes := segmentBucket.Get(boltUpdatedFieldsKey) + if updatedFieldBytes != nil { + err := json.Unmarshal(updatedFieldBytes, &updatedFields) + if err != nil { + return fmt.Errorf("error reading updated field bytes: %v", err) + } + } else { + updatedFields = make(map[string]index.FieldInfo) + } + for field, info := range fieldInfo { + updatedFields[field] = *info + } + b, err := json.Marshal(updatedFields) + if err != nil { + return err + } + err = segmentBucket.Put(boltUpdatedFieldsKey, b) + if err != nil { + return err + } + } + } + } + + return nil + }) +} diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 4f67a3c0b..fe308f4aa 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -84,6 +84,8 @@ type IndexSnapshot struct { m3 sync.RWMutex // bm25 metrics specific - not to interfere with TFR creation fieldCardinality map[string]int + + updatedFields map[string]index.FieldInfo } func (i *IndexSnapshot) Segments() []*SegmentSnapshot { @@ -509,6 +511,10 @@ func (is *IndexSnapshot) Document(id string) (rv index.Document, err error) { // Keeping that TODO for now until we have a cleaner way. rvd.StoredFieldsSize += uint64(len(val)) + if info, ok := is.updatedFields[name]; ok && + (info.All || info.Store) { + return true + } // copy value, array positions to preserve them beyond the scope of this callback value := append([]byte(nil), val...) arrayPos := append([]uint64(nil), pos...) @@ -634,7 +640,15 @@ func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field segBytesRead := s.segment.BytesRead() rv.incrementBytesRead(segBytesRead) } - dict, err := s.segment.Dictionary(field) + + var dict segment.TermDictionary + var err error + if info, ok := is.updatedFields[field]; ok && + (info.Index || info.All) { + dict = nil + } else { + dict, err = s.segment.Dictionary(field) + } if err != nil { return nil, err } @@ -783,6 +797,16 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( } } + var filteredFields []string + for _, field := range vFields { + if info, ok := is.updatedFields[field]; ok && + (info.DocValues || info.All) { + continue + } else { + filteredFields = append(filteredFields, field) + } + } + var errCh chan error // cFields represents the fields that we'll need from the @@ -790,7 +814,7 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( // if the caller happens to know we're on the same segmentIndex // from a previous invocation if cFields == nil { - cFields = subtractStrings(fields, vFields) + cFields = subtractStrings(fields, filteredFields) if !ss.cachedDocs.hasFields(cFields) { errCh = make(chan error, 1) diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go index ec65bf800..df3cafb2d 100644 --- a/index/scorch/snapshot_segment.go +++ b/index/scorch/snapshot_segment.go @@ -35,12 +35,13 @@ type SegmentSnapshot struct { // segment was mmaped recently, in which case // we consider the loading cost of the metadata // as part of IO stats. - mmaped uint32 - id uint64 - segment segment.Segment - deleted *roaring.Bitmap - creator string - stats *fieldStats + mmaped uint32 + id uint64 + segment segment.Segment + deleted *roaring.Bitmap + creator string + stats *fieldStats + updatedFields map[string]index.FieldInfo cachedMeta *cachedMeta diff --git a/index_impl.go b/index_impl.go index 5cc0c5899..682fb9925 100644 --- a/index_impl.go +++ b/index_impl.go @@ -168,10 +168,25 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde storeConfig = map[string]interface{}{} } + var um *mapping.IndexMappingImpl + var umBytes []byte + storeConfig["path"] = indexStorePath(path) storeConfig["create_if_missing"] = false storeConfig["error_if_exists"] = false for rck, rcv := range runtimeConfig { + if rck == "mapping" { + if val, ok := rcv.([]byte); ok { + err = util.UnmarshalJSON(val, &um) + if err != nil { + return nil, fmt.Errorf("error parsing updated mapping JSON: %v\nmapping contents:\n%s", err, val) + } + umBytes = val + } else { + return nil, fmt.Errorf("error typecasting updated mapping JSON\nmapping contents: %v", rcv) + } + continue + } storeConfig[rck] = rcv } @@ -230,6 +245,28 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde return rv, err } + if um != nil { + ui, ok := rv.i.(index.UpdateIndex) + if !ok { + return rv, fmt.Errorf("updated mapping present for unupdatable index") + } + + err = um.Validate() + if err != nil { + return rv, err + } + + fieldInfo, err := deletedFields(im, um) + if err != nil { + return rv, err + } + + err = ui.UpdateFields(fieldInfo, umBytes) + if err != nil { + return rv, err + } + } + rv.m = im indexStats.Register(rv) return rv, err diff --git a/index_update.go b/index_update.go new file mode 100644 index 000000000..5e4dbcd52 --- /dev/null +++ b/index_update.go @@ -0,0 +1,315 @@ +// Copyright (c) 2024 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bleve + +import ( + "fmt" + + "github.com/blevesearch/bleve/v2/mapping" + index "github.com/blevesearch/bleve_index_api" +) + +type pathInfo struct { + fieldMapInfo []*fieldMapInfo + dynamic bool + path string + parentPath string +} + +type fieldMapInfo struct { + fieldMapping *mapping.FieldMapping + rootName string + parent *pathInfo +} + +func deletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldInfo, error) { + + var err error + for name, updDMapping := range upd.TypeMapping { + err = checkUpdatedMapping(ori.TypeMapping[name], updDMapping) + if err != nil { + return nil, err + } + } + + err = checkUpdatedMapping(ori.DefaultMapping, upd.DefaultMapping) + if err != nil { + return nil, err + } + + oriPaths := make(map[string]*pathInfo) + updPaths := make(map[string]*pathInfo) + + for name, oriDMapping := range ori.TypeMapping { + addPathInfo(oriPaths, "", oriDMapping, ori, nil, name) + } + addPathInfo(oriPaths, "", ori.DefaultMapping, ori, nil, "") + + for name, updDMapping := range upd.TypeMapping { + addPathInfo(updPaths, "", updDMapping, ori, nil, name) + } + addPathInfo(updPaths, "", upd.DefaultMapping, ori, nil, "") + + fieldInfo := make(map[string]*index.FieldInfo) + for path, info := range oriPaths { + err = addFieldInfo(fieldInfo, info, updPaths[path]) + if err != nil { + return nil, err + } + } + + for name, info := range fieldInfo { + if !info.All && !info.Index && !info.DocValues && !info.Store { + delete(fieldInfo, name) + } + } + return fieldInfo, nil +} + +// Function to ensure updated document mapping does not contain new field mappings +// or document mappings +func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error { + + if ori == nil { + if upd == nil || !upd.Enabled { + return nil + } + return fmt.Errorf("updated index mapping contains new properties") + } + + if upd == nil || !upd.Enabled { + return nil + } + + var err error + for name, updDMapping := range upd.Properties { + err = checkUpdatedMapping(ori.Properties[name], updDMapping) + if err != nil { + return err + } + } + + for _, updFMapping := range upd.Fields { + var oriFMapping *mapping.FieldMapping + + for _, fMapping := range ori.Fields { + if updFMapping.Name == fMapping.Name { + oriFMapping = fMapping + } + } + if oriFMapping == nil { + return fmt.Errorf("updated index mapping contains new fields") + } + } + + return nil +} + +func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMapping, + im *mapping.IndexMappingImpl, parent *pathInfo, rootName string) { + + if !mp.Enabled { + return + } + + var pInfo *pathInfo + if val, ok := paths[name]; ok { + pInfo = val + } else { + pInfo = &pathInfo{ + fieldMapInfo: make([]*fieldMapInfo, 0), + } + pInfo.dynamic = mp.Dynamic && im.IndexDynamic + } + + pInfo.dynamic = (pInfo.dynamic || mp.Dynamic) && im.IndexDynamic + pInfo.path = name + if parent != nil { + pInfo.parentPath = parent.path + } + + for cName, cMapping := range mp.Properties { + var pathName string + if name == "" { + pathName = cName + } else { + pathName = name + "." + cName + } + addPathInfo(paths, pathName, cMapping, im, pInfo, rootName) + } + + for _, fMap := range mp.Fields { + fieldMapInfo := &fieldMapInfo{ + fieldMapping: fMap, + rootName: rootName, + parent: pInfo, + } + pInfo.fieldMapInfo = append(pInfo.fieldMapInfo, fieldMapInfo) + } + + paths[name] = pInfo +} + +func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo) error { + + var info *index.FieldInfo + var updated bool + var err error + + if upd == nil { + for _, oriFMapInfo := range ori.fieldMapInfo { + info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, nil) + if err != nil { + return err + } + err = validateFieldInfo(info, updated, fInfo, ori) + if err != nil { + return err + } + } + } else { + for _, oriFMapInfo := range ori.fieldMapInfo { + var updFMap *mapping.FieldMapping + for _, updFMapInfo := range upd.fieldMapInfo { + if oriFMapInfo.rootName == updFMapInfo.rootName && + oriFMapInfo.fieldMapping.Name == updFMapInfo.fieldMapping.Name { + updFMap = updFMapInfo.fieldMapping + } + } + + info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, updFMap) + if err != nil { + return err + } + err = validateFieldInfo(info, updated, fInfo, ori) + if err != nil { + return err + } + } + } + if err != nil { + return err + } + + return nil +} + +func validateFieldInfo(newInfo *index.FieldInfo, updated bool, fInfo map[string]*index.FieldInfo, + ori *pathInfo) error { + + var name string + if ori.fieldMapInfo[0].parent.parentPath == "" { + name = ori.fieldMapInfo[0].fieldMapping.Name + } else { + name = ori.fieldMapInfo[0].parent.parentPath + "." + ori.fieldMapInfo[0].fieldMapping.Name + } + if updated { + if ori.dynamic { + return fmt.Errorf("updated field is under a dynamic property") + } + } + if oldInfo, ok := fInfo[name]; ok { + if oldInfo.All != newInfo.All || oldInfo.Index != newInfo.Index || + oldInfo.DocValues != newInfo.DocValues || oldInfo.Store != newInfo.Store { + return fmt.Errorf("updated field impossible to verify because multiple mappings point to the same field name") + } + } else { + fInfo[name] = newInfo + } + return nil +} + +func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.FieldInfo, bool, error) { + + rv := &index.FieldInfo{} + + if updated == nil { + if original != nil && !original.IncludeInAll { + rv.All = true + return rv, true, nil + } else if original == nil { + return nil, false, nil + } + return nil, false, fmt.Errorf("deleted field present in '_all' field") + } else if original == nil { + return nil, false, fmt.Errorf("matching field not found in original index mapping") + } + + if original.Type != updated.Type { + return nil, false, fmt.Errorf("field type cannot be updated") + } + if original.Analyzer != updated.Analyzer && original.Type == "text" { + return nil, false, fmt.Errorf("analyzer cannot be updated for text fields") + } + if original.DateFormat != updated.DateFormat && original.Type == "datetime" { + return nil, false, fmt.Errorf("dateFormat cannot be updated for datetime fields") + } + if original.Type == "vector" || original.Type == "vector_base64" { + if original.Dims != updated.Dims { + return nil, false, fmt.Errorf("dimensions cannot be updated for vector and vector_base64 fields") + } + if original.Similarity != updated.Similarity { + return nil, false, fmt.Errorf("similarity cannot be updated for vector and vector_base64 fields") + } + if original.VectorIndexOptimizedFor != updated.VectorIndexOptimizedFor { + return nil, false, fmt.Errorf("vectorIndexOptimizedFor cannot be updated for vector and vector_base64 fields") + } + } + if original.IncludeInAll != updated.IncludeInAll { + return nil, false, fmt.Errorf("includeInAll cannot be changed") + } + if original.IncludeTermVectors != updated.IncludeTermVectors { + return nil, false, fmt.Errorf("includeTermVectors cannot be changed") + } + if original.SkipFreqNorm != updated.SkipFreqNorm { + return nil, false, fmt.Errorf("skipFreqNorm cannot be changed") + } + + // Updating is not possible if store changes from true + // to false when the field is included in _all + if original.Store != updated.Store { + if updated.Store || updated.IncludeInAll { + return nil, false, fmt.Errorf("store cannot be changed if field present in `_all' field") + } else { + rv.Store = true + } + } + + // Updating is not possible if index changes from true + // to false when the field is included in _all + if original.Index != updated.Index { + if updated.Index || updated.IncludeInAll { + return nil, false, fmt.Errorf("index cannot be changed if field present in `_all' field") + } else { + rv.Index = true + rv.DocValues = true + } + } + + // Updating is not possible if docvalues changes from true + // to false when the field is included in _all + if original.DocValues != updated.DocValues { + if updated.DocValues || updated.IncludeInAll { + return nil, false, fmt.Errorf("docvalues cannot be changed if field present in `_all' field") + } else { + rv.DocValues = true + } + } + + if rv.All || rv.Index || rv.Store { + return rv, true, nil + } + return rv, false, nil +} From 7740e0f72559fc46883bf012fa84a5fb159127dd Mon Sep 17 00:00:00 2001 From: Likith B Date: Thu, 9 Jan 2025 15:32:59 +0530 Subject: [PATCH 02/25] MB-57888: New apis for index update --- index.go | 18 +++++ index/scorch/persister.go | 2 +- index/scorch/scorch.go | 43 +++++++----- index/scorch/snapshot_index.go | 10 ++- index_impl.go | 122 ++++++++++++++++++++++++++++----- index_update.go | 106 ++++++++++++++++++++-------- 6 files changed, 235 insertions(+), 66 deletions(-) diff --git a/index.go b/index.go index 3d2389884..6ab7ccd2a 100644 --- a/index.go +++ b/index.go @@ -329,6 +329,24 @@ func OpenUsing(path string, runtimeConfig map[string]interface{}) (Index, error) return openIndexUsing(path, runtimeConfig) } +// Update index at the specified path, must exist. +// The mapping used when created will be overwritten by the mapping provided +// for all Index/Search operations. +// Throws an error without any changes to the index if an unupdatable mapping is provided +func Update(path string, newParams string) (Index, error) { + return updateIndexUsing(path, nil, newParams) +} + +// UpdateUsing index at the specified path, must exist. +// The mapping used when created will be overwritten by the mapping provided +// for all Index/Search operations. +// The provided runtimeConfig can override settings +// persisted when the kvstore was created. +// Throws an error without any changes to the index if an unupdatable mapping is provided +func UpdateUsing(path string, runtimeConfig map[string]interface{}, newParams string) (Index, error) { + return updateIndexUsing(path, runtimeConfig, newParams) +} + // Builder is a limited interface, used to build indexes in an offline mode. // Items cannot be updated or deleted, and the caller MUST ensure a document is // indexed only once. diff --git a/index/scorch/persister.go b/index/scorch/persister.go index 43eb75e76..8874be89b 100644 --- a/index/scorch/persister.go +++ b/index/scorch/persister.go @@ -989,7 +989,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { segmentBucket := snapshot.Bucket(k) if segmentBucket == nil { _ = rv.DecRef() - return nil, fmt.Errorf("segment key, but bucket missing % x", k) + return nil, fmt.Errorf("segment key, but bucket missing %x", k) } segmentSnapshot, err := s.loadSegment(segmentBucket) if err != nil { diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 5774b7137..285f8b0f8 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -37,7 +37,7 @@ const Version uint8 = 2 var ErrClosed = fmt.Errorf("scorch closed") -var mappingInternalKey = []byte("_mapping") +var MappingInternalKey = []byte("_mapping") type Scorch struct { nextSegmentID uint64 @@ -944,15 +944,27 @@ func (s *Scorch) FireIndexEvent() { s.fireEvent(EventKindIndexStart, 0) } +// Updates bolt db with the given field info. Existing field info already in bolt +// will be merged before persisting. The index mapping is also overwritted both +// in bolt as well as the index snapshot func (s *Scorch) UpdateFields(fieldInfo map[string]*index.FieldInfo, mappingBytes []byte) error { - err := s.updateBolt(fieldInfo, mappingBytes) + // Switch from pointer to value to marshal into a json for storage + updatedFields := make(map[string]index.FieldInfo) + for field, info := range fieldInfo { + updatedFields[field] = *info + } + err := s.updateBolt(updatedFields, mappingBytes) if err != nil { return err } + s.root.m.Lock() + s.root.updatedFields = updatedFields + s.root.m.Unlock() return nil } -func (s *Scorch) updateBolt(fieldInfo map[string]*index.FieldInfo, mappingBytes []byte) error { +// Merge and update deleted field info and rewrite index mapping +func (s *Scorch) updateBolt(fieldInfo map[string]index.FieldInfo, mappingBytes []byte) error { return s.rootBolt.Update(func(tx *bolt.Tx) error { snapshots := tx.Bucket(boltSnapshotsBucket) if snapshots == nil { @@ -968,20 +980,20 @@ func (s *Scorch) updateBolt(fieldInfo map[string]*index.FieldInfo, mappingBytes } snapshot := snapshots.Bucket(k) cc := snapshot.Cursor() - for kk, _ := cc.First(); kk != nil; kk, _ = c.Next() { - if k[0] == boltInternalKey[0] { - internalBucket := snapshot.Bucket(k) + for kk, _ := cc.First(); kk != nil; kk, _ = cc.Next() { + if kk[0] == boltInternalKey[0] { + internalBucket := snapshot.Bucket(kk) if internalBucket == nil { - return fmt.Errorf("segment key, but bucket missing % x", k) + return fmt.Errorf("segment key, but bucket missing %x", kk) } - err = internalBucket.Put(mappingInternalKey, mappingBytes) + err = internalBucket.Put(MappingInternalKey, mappingBytes) if err != nil { return err } - } else if k[0] != boltMetaDataKey[0] { - segmentBucket := snapshot.Bucket(k) + } else if kk[0] != boltMetaDataKey[0] { + segmentBucket := snapshot.Bucket(kk) if segmentBucket == nil { - return fmt.Errorf("segment key, but bucket missing % x", k) + return fmt.Errorf("segment key, but bucket missing %x", kk) } var updatedFields map[string]index.FieldInfo updatedFieldBytes := segmentBucket.Get(boltUpdatedFieldsKey) @@ -990,11 +1002,11 @@ func (s *Scorch) updateBolt(fieldInfo map[string]*index.FieldInfo, mappingBytes if err != nil { return fmt.Errorf("error reading updated field bytes: %v", err) } + for field, info := range fieldInfo { + updatedFields[field] = info + } } else { - updatedFields = make(map[string]index.FieldInfo) - } - for field, info := range fieldInfo { - updatedFields[field] = *info + updatedFields = fieldInfo } b, err := json.Marshal(updatedFields) if err != nil { @@ -1007,7 +1019,6 @@ func (s *Scorch) updateBolt(fieldInfo map[string]*index.FieldInfo, mappingBytes } } } - return nil }) } diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index fe308f4aa..cf7cbf7f2 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -511,10 +511,12 @@ func (is *IndexSnapshot) Document(id string) (rv index.Document, err error) { // Keeping that TODO for now until we have a cleaner way. rvd.StoredFieldsSize += uint64(len(val)) + // Skip fields that are supposed to have deleted store values if info, ok := is.updatedFields[name]; ok && (info.All || info.Store) { return true } + // copy value, array positions to preserve them beyond the scope of this callback value := append([]byte(nil), val...) arrayPos := append([]uint64(nil), pos...) @@ -643,6 +645,8 @@ func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field var dict segment.TermDictionary var err error + + // Skip fields that are supposed to have no indexing if info, ok := is.updatedFields[field]; ok && (info.Index || info.All) { dict = nil @@ -652,6 +656,7 @@ func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field if err != nil { return nil, err } + if dictStats, ok := dict.(segment.DiskStatsReporter); ok { bytesRead := dictStats.BytesRead() rv.incrementBytesRead(bytesRead) @@ -797,6 +802,7 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( } } + // Filter out fields that are supposed to have no doc values var filteredFields []string for _, field := range vFields { if info, ok := is.updatedFields[field]; ok && @@ -829,8 +835,8 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( } } - if ssvOk && ssv != nil && len(vFields) > 0 { - dvs, err = ssv.VisitDocValues(localDocNum, fields, visitor, dvs) + if ssvOk && ssv != nil && len(filteredFields) > 0 { + dvs, err = ssv.VisitDocValues(localDocNum, filteredFields, visitor, dvs) if err != nil { return nil, nil, err } diff --git a/index_impl.go b/index_impl.go index 682fb9925..a94b59980 100644 --- a/index_impl.go +++ b/index_impl.go @@ -133,7 +133,7 @@ func newIndexUsing(path string, mapping mapping.IndexMapping, indexType string, if err != nil { return nil, err } - err = rv.i.SetInternal(mappingInternalKey, mappingBytes) + err = rv.i.SetInternal(scorch.MappingInternalKey, mappingBytes) if err != nil { return nil, err } @@ -168,25 +168,110 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde storeConfig = map[string]interface{}{} } + storeConfig["path"] = indexStorePath(path) + storeConfig["create_if_missing"] = false + storeConfig["error_if_exists"] = false + for rck, rcv := range runtimeConfig { + storeConfig[rck] = rcv + } + + // open the index + indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) + if indexTypeConstructor == nil { + return nil, ErrorUnknownIndexType + } + + rv.i, err = indexTypeConstructor(rv.meta.Storage, storeConfig, Config.analysisQueue) + if err != nil { + return nil, err + } + err = rv.i.Open() + if err != nil { + return nil, err + } + defer func(rv *indexImpl) { + if !rv.open { + rv.i.Close() + } + }(rv) + + // now load the mapping + indexReader, err := rv.i.Reader() + if err != nil { + return nil, err + } + defer func() { + if cerr := indexReader.Close(); cerr != nil && err == nil { + err = cerr + } + }() + + mappingBytes, err := indexReader.GetInternal(scorch.MappingInternalKey) + if err != nil { + return nil, err + } + + var im *mapping.IndexMappingImpl + err = util.UnmarshalJSON(mappingBytes, &im) + if err != nil { + return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes)) + } + + // mark the index as open + rv.mutex.Lock() + defer rv.mutex.Unlock() + rv.open = true + + // validate the mapping + err = im.Validate() + if err != nil { + // note even if the mapping is invalid + // we still return an open usable index + return rv, err + } + + rv.m = im + indexStats.Register(rv) + return rv, err +} + +func updateIndexUsing(path string, runtimeConfig map[string]interface{}, newParams string) (rv *indexImpl, err error) { + rv = &indexImpl{ + path: path, + name: path, + } + rv.stats = &IndexStat{i: rv} + + rv.meta, err = openIndexMeta(path) + if err != nil { + return nil, err + } + + // backwards compatibility if index type is missing + if rv.meta.IndexType == "" { + rv.meta.IndexType = upsidedown.Name + } + + storeConfig := rv.meta.Config + if storeConfig == nil { + storeConfig = map[string]interface{}{} + } + var um *mapping.IndexMappingImpl - var umBytes []byte + + if len(newParams) == 0 { + return nil, fmt.Errorf(("updated mapping is empty")) + } + + err = util.UnmarshalJSON([]byte(newParams), &um) + if err != nil { + return nil, fmt.Errorf("error parsing updated mapping JSON: %v\nmapping contents:\n%s", err, newParams) + } storeConfig["path"] = indexStorePath(path) storeConfig["create_if_missing"] = false storeConfig["error_if_exists"] = false for rck, rcv := range runtimeConfig { - if rck == "mapping" { - if val, ok := rcv.([]byte); ok { - err = util.UnmarshalJSON(val, &um) - if err != nil { - return nil, fmt.Errorf("error parsing updated mapping JSON: %v\nmapping contents:\n%s", err, val) - } - umBytes = val - } else { - return nil, fmt.Errorf("error typecasting updated mapping JSON\nmapping contents: %v", rcv) - } - continue - } storeConfig[rck] = rcv } @@ -200,6 +285,7 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde if err != nil { return nil, err } + err = rv.i.Open() if err != nil { return nil, err @@ -221,7 +307,7 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde } }() - mappingBytes, err := indexReader.GetInternal(mappingInternalKey) + mappingBytes, err := indexReader.GetInternal(scorch.MappingInternalKey) if err != nil { return nil, err } @@ -245,6 +331,7 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde return rv, err } + // Validate and update the index with the new mapping if um != nil { ui, ok := rv.i.(index.UpdateIndex) if !ok { @@ -256,15 +343,16 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde return rv, err } - fieldInfo, err := deletedFields(im, um) + fieldInfo, err := DeletedFields(im, um) if err != nil { return rv, err } - err = ui.UpdateFields(fieldInfo, umBytes) + err = ui.UpdateFields(fieldInfo, []byte(newParams)) if err != nil { return rv, err } + im = um } rv.m = im diff --git a/index_update.go b/index_update.go index 5e4dbcd52..c7228f532 100644 --- a/index_update.go +++ b/index_update.go @@ -1,4 +1,4 @@ -// Copyright (c) 2024 Couchbase, Inc. +// Copyright (c) 2025 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,6 +21,8 @@ import ( index "github.com/blevesearch/bleve_index_api" ) +// Store all the fields that interact with the data +// from a document path type pathInfo struct { fieldMapInfo []*fieldMapInfo dynamic bool @@ -28,15 +30,20 @@ type pathInfo struct { parentPath string } +// Store the field information with respect to the +// document paths type fieldMapInfo struct { fieldMapping *mapping.FieldMapping rootName string parent *pathInfo } -func deletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldInfo, error) { - +// Compare two index mappings to identify all of the updatable changes +func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldInfo, error) { var err error + + // Check for new mappings present in the type mappings + // of the updated compared to the original for name, updDMapping := range upd.TypeMapping { err = checkUpdatedMapping(ori.TypeMapping[name], updDMapping) if err != nil { @@ -44,6 +51,8 @@ func deletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldI } } + // Check for new mappings present in the default mappings + // of the updated compared to the original err = checkUpdatedMapping(ori.DefaultMapping, upd.DefaultMapping) if err != nil { return nil, err @@ -52,16 +61,23 @@ func deletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldI oriPaths := make(map[string]*pathInfo) updPaths := make(map[string]*pathInfo) + // Go through each mapping present in the original + // and consolidate according to the document paths for name, oriDMapping := range ori.TypeMapping { addPathInfo(oriPaths, "", oriDMapping, ori, nil, name) } addPathInfo(oriPaths, "", ori.DefaultMapping, ori, nil, "") + // Go through each mapping present in the updated + // and consolidate according to the document paths for name, updDMapping := range upd.TypeMapping { addPathInfo(updPaths, "", updDMapping, ori, nil, name) } addPathInfo(updPaths, "", upd.DefaultMapping, ori, nil, "") + // Compare both the mappings based on the document paths + // and create a list of index, docvalues, store differences + // for every single field possible fieldInfo := make(map[string]*index.FieldInfo) for path, info := range oriPaths { err = addFieldInfo(fieldInfo, info, updPaths[path]) @@ -70,6 +86,8 @@ func deletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldI } } + // Remove entries from the list with no changes between the + // original and the updated mapping for name, info := range fieldInfo { if !info.All && !info.Index && !info.DocValues && !info.Store { delete(fieldInfo, name) @@ -78,10 +96,12 @@ func deletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldI return fieldInfo, nil } -// Function to ensure updated document mapping does not contain new field mappings -// or document mappings +// Ensures updated document mapping does not contain new +// field mappings or document mappings func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error { + // Check to verify both original and updated are not nil + // and are enabled before proceeding if ori == nil { if upd == nil || !upd.Enabled { return nil @@ -94,6 +114,7 @@ func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error { } var err error + // Recursively go through the child mappings for name, updDMapping := range upd.Properties { err = checkUpdatedMapping(ori.Properties[name], updDMapping) if err != nil { @@ -101,6 +122,8 @@ func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error { } } + // Simple checks to ensure no new field mappings present + // in updated for _, updFMapping := range upd.Fields { var oriFMapping *mapping.FieldMapping @@ -117,13 +140,20 @@ func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error { return nil } +// Adds all of the field mappings while maintaining a tree of the document structure +// to ensure traversal and verification is possible incase of multiple mappings defined +// for a single field or multiple document fields' data getting written to a single zapx field func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMapping, im *mapping.IndexMappingImpl, parent *pathInfo, rootName string) { + // Early exit if mapping has been disabled + // Comparisions later on will be done with a nil object if !mp.Enabled { return } + // Consolidate path information like index dynamic across multiple + // mappings if path is the same var pInfo *pathInfo if val, ok := paths[name]; ok { pInfo = val @@ -140,6 +170,7 @@ func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMa pInfo.parentPath = parent.path } + // Recursively add path information for all child mappings for cName, cMapping := range mp.Properties { var pathName string if name == "" { @@ -150,6 +181,7 @@ func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMa addPathInfo(paths, pathName, cMapping, im, pInfo, rootName) } + // Add field mapping information keeping the document structure intact for _, fMap := range mp.Fields { fieldMapInfo := &fieldMapInfo{ fieldMapping: fMap, @@ -162,12 +194,15 @@ func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMa paths[name] = pInfo } +// Compare all of the fields at a particular document path and add its field information func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo) error { var info *index.FieldInfo var updated bool var err error + // Assume deleted or disabled mapping if upd is nil. Checks for ori being nil + // or upd having mappings not in orihave already been done before this stage if upd == nil { for _, oriFMapInfo := range ori.fieldMapInfo { info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, nil) @@ -182,6 +217,8 @@ func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo) error { } else { for _, oriFMapInfo := range ori.fieldMapInfo { var updFMap *mapping.FieldMapping + // For multiple fields at a single document path, compare + // only with the matching ones for _, updFMapInfo := range upd.fieldMapInfo { if oriFMapInfo.rootName == updFMapInfo.rootName && oriFMapInfo.fieldMapping.Name == updFMapInfo.fieldMapping.Name { @@ -206,31 +243,13 @@ func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo) error { return nil } -func validateFieldInfo(newInfo *index.FieldInfo, updated bool, fInfo map[string]*index.FieldInfo, - ori *pathInfo) error { - - var name string - if ori.fieldMapInfo[0].parent.parentPath == "" { - name = ori.fieldMapInfo[0].fieldMapping.Name - } else { - name = ori.fieldMapInfo[0].parent.parentPath + "." + ori.fieldMapInfo[0].fieldMapping.Name - } - if updated { - if ori.dynamic { - return fmt.Errorf("updated field is under a dynamic property") - } - } - if oldInfo, ok := fInfo[name]; ok { - if oldInfo.All != newInfo.All || oldInfo.Index != newInfo.Index || - oldInfo.DocValues != newInfo.DocValues || oldInfo.Store != newInfo.Store { - return fmt.Errorf("updated field impossible to verify because multiple mappings point to the same field name") - } - } else { - fInfo[name] = newInfo - } - return nil -} - +// Compares two field mappings against each other, checking for changes in index, store, doc values +// and complete deletiion of the mapping while noting that the changes made are doable based on +// other values like includeInAll and dynamic +// first return argument gives an empty fieldInfo if no changes detected +// second return argument gives a flag indicating whether any changes, if detected, are doable or if +// update is impossible +// third argument is an error explaining exactly why the change is not possible func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.FieldInfo, bool, error) { rv := &index.FieldInfo{} @@ -313,3 +332,30 @@ func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.FieldI } return rv, false, nil } + +// After identifying changes, validate against the existing changes incase of duplicate fields. +// In such a situation, any conflicting changes found will abort the update process +func validateFieldInfo(newInfo *index.FieldInfo, updated bool, fInfo map[string]*index.FieldInfo, + ori *pathInfo) error { + + var name string + if ori.fieldMapInfo[0].parent.parentPath == "" { + name = ori.fieldMapInfo[0].fieldMapping.Name + } else { + name = ori.fieldMapInfo[0].parent.parentPath + "." + ori.fieldMapInfo[0].fieldMapping.Name + } + if updated { + if ori.dynamic { + return fmt.Errorf("updated field is under a dynamic property") + } + } + if oldInfo, ok := fInfo[name]; ok { + if oldInfo.All != newInfo.All || oldInfo.Index != newInfo.Index || + oldInfo.DocValues != newInfo.DocValues || oldInfo.Store != newInfo.Store { + return fmt.Errorf("updated field impossible to verify because multiple mappings point to the same field name") + } + } else { + fInfo[name] = newInfo + } + return nil +} From 62271b29696b29d42aedc93029a4d1edd3d23f88 Mon Sep 17 00:00:00 2001 From: Likith B Date: Mon, 13 Jan 2025 19:41:22 +0530 Subject: [PATCH 03/25] MB-57888: Added few missing checks for index update --- builder.go | 2 +- index_update.go | 86 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 78 insertions(+), 10 deletions(-) diff --git a/builder.go b/builder.go index 30285a2e4..c92920301 100644 --- a/builder.go +++ b/builder.go @@ -68,7 +68,7 @@ func newBuilder(path string, mapping mapping.IndexMapping, config map[string]int return nil, err } config["internal"] = map[string][]byte{ - string(mappingInternalKey): mappingBytes, + string(scorch.MappingInternalKey): mappingBytes, } // do not use real config, as these are options for the builder, diff --git a/index_update.go b/index_update.go index c7228f532..770181264 100644 --- a/index_update.go +++ b/index_update.go @@ -38,10 +38,22 @@ type fieldMapInfo struct { parent *pathInfo } +// Store all of the changes to defaults +type defaultInfo struct { + analyzer bool + dateTimeParser bool + synonymSource bool +} + // Compare two index mappings to identify all of the updatable changes func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldInfo, error) { var err error + defaultChanges, err := compareMappings(ori, upd) + if err != nil { + return nil, err + } + // Check for new mappings present in the type mappings // of the updated compared to the original for name, updDMapping := range upd.TypeMapping { @@ -80,7 +92,7 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldI // for every single field possible fieldInfo := make(map[string]*index.FieldInfo) for path, info := range oriPaths { - err = addFieldInfo(fieldInfo, info, updPaths[path]) + err = addFieldInfo(fieldInfo, info, updPaths[path], defaultChanges) if err != nil { return nil, err } @@ -96,6 +108,49 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldI return fieldInfo, nil } +func compareMappings(ori, upd *mapping.IndexMappingImpl) (*defaultInfo, error) { + rv := &defaultInfo{} + + if ori.TypeField != upd.TypeField && + len(ori.TypeMapping) != 0 || len(upd.TypeMapping) != 0 { + return nil, fmt.Errorf("type field cannot be changed when type mappings are present") + } + + if ori.DefaultType != upd.DefaultType { + return nil, fmt.Errorf("default type cannot be changed") + } + + if ori.DefaultAnalyzer != upd.DefaultAnalyzer { + rv.analyzer = true + } + + if ori.DefaultDateTimeParser != upd.DefaultDateTimeParser { + rv.dateTimeParser = true + } + + if ori.DefaultSynonymSource != upd.DefaultSynonymSource { + rv.synonymSource = true + } + + if ori.DefaultField != upd.DefaultField { + return nil, fmt.Errorf("default field cannot be changed") + } + + if ori.IndexDynamic != upd.IndexDynamic { + return nil, fmt.Errorf("index dynamic cannot be changed") + } + + if ori.StoreDynamic != upd.StoreDynamic { + return nil, fmt.Errorf(("store dynamic cannot be changed")) + } + + if ori.DocValuesDynamic != upd.DocValuesDynamic { + return nil, fmt.Errorf(("docvalues dynamic cannot be changed")) + } + + return rv, nil +} + // Ensures updated document mapping does not contain new // field mappings or document mappings func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error { @@ -195,7 +250,7 @@ func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMa } // Compare all of the fields at a particular document path and add its field information -func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo) error { +func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo, defaultChanges *defaultInfo) error { var info *index.FieldInfo var updated bool @@ -205,7 +260,7 @@ func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo) error { // or upd having mappings not in orihave already been done before this stage if upd == nil { for _, oriFMapInfo := range ori.fieldMapInfo { - info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, nil) + info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, nil, defaultChanges) if err != nil { return err } @@ -226,7 +281,7 @@ func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo) error { } } - info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, updFMap) + info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, updFMap, defaultChanges) if err != nil { return err } @@ -250,7 +305,7 @@ func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo) error { // second return argument gives a flag indicating whether any changes, if detected, are doable or if // update is impossible // third argument is an error explaining exactly why the change is not possible -func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.FieldInfo, bool, error) { +func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges *defaultInfo) (*index.FieldInfo, bool, error) { rv := &index.FieldInfo{} @@ -269,11 +324,24 @@ func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.FieldI if original.Type != updated.Type { return nil, false, fmt.Errorf("field type cannot be updated") } - if original.Analyzer != updated.Analyzer && original.Type == "text" { - return nil, false, fmt.Errorf("analyzer cannot be updated for text fields") + if original.Type == "text" { + if original.SynonymSource != updated.SynonymSource { + return nil, false, fmt.Errorf("synonym source cannot be changed for text field") + } else if original.SynonymSource == "inherit" && defaultChanges.synonymSource { + return nil, false, fmt.Errorf("synonym source cannot be changed for possible inherited text field") + } + if original.Analyzer != updated.Analyzer { + return nil, false, fmt.Errorf("analyzer cannot be updated for text fields") + } else if original.Analyzer == "inherit" && defaultChanges.analyzer { + return nil, false, fmt.Errorf("default analyzer changed for possible inherited text field") + } } - if original.DateFormat != updated.DateFormat && original.Type == "datetime" { - return nil, false, fmt.Errorf("dateFormat cannot be updated for datetime fields") + if original.Type == "datetime" { + if original.DateFormat != updated.DateFormat { + return nil, false, fmt.Errorf("dateFormat cannot be updated for datetime fields") + } else if original.DateFormat == "inherit" && defaultChanges.dateTimeParser { + return nil, false, fmt.Errorf("default analyzer changed for possible inherited text field") + } } if original.Type == "vector" || original.Type == "vector_base64" { if original.Dims != updated.Dims { From d295df825264fe233c142563150eca9957a6609a Mon Sep 17 00:00:00 2001 From: Likith B Date: Thu, 16 Jan 2025 12:16:00 +0530 Subject: [PATCH 04/25] MB-57888: Added unit tests covering all update paths - Few changes to the deleted fields logic to accommodate edge cases - Added deletion logic to vector paths - Tweaked deletion logic in index path --- index/scorch/optimize_knn.go | 4 + index/scorch/snapshot_index.go | 2 +- index_update.go | 35 +- index_update_test.go | 2498 ++++++++++++++++++++++++++++++++ 4 files changed, 2530 insertions(+), 9 deletions(-) create mode 100644 index_update_test.go diff --git a/index/scorch/optimize_knn.go b/index/scorch/optimize_knn.go index 3b3bc3d19..cee0edff1 100644 --- a/index/scorch/optimize_knn.go +++ b/index/scorch/optimize_knn.go @@ -79,6 +79,10 @@ func (o *OptimizeVR) Finish() error { wg.Done() }() for field, vrs := range o.vrs { + if info, ok := o.snapshot.updatedFields[field]; ok && info.All || info.Index { + continue + } + vecIndex, err := segment.InterpretVectorIndex(field, o.requiresFiltering, origSeg.deleted) if err != nil { diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index cf7cbf7f2..da630b870 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -649,7 +649,7 @@ func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field // Skip fields that are supposed to have no indexing if info, ok := is.updatedFields[field]; ok && (info.Index || info.All) { - dict = nil + dict, err = s.segment.Dictionary("") } else { dict, err = s.segment.Dictionary(field) } diff --git a/index_update.go b/index_update.go index 770181264..0b1de1e16 100644 --- a/index_update.go +++ b/index_update.go @@ -104,6 +104,17 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldI if !info.All && !info.Index && !info.DocValues && !info.Store { delete(fieldInfo, name) } + if info.All { + if upd.IndexDynamic { + return nil, fmt.Errorf("Mapping cannot be removed when index dynamic is true") + } + if upd.StoreDynamic { + return nil, fmt.Errorf("Mapping cannot be removed when store dynamic is true") + } + if upd.DocValuesDynamic { + return nil, fmt.Errorf("Mapping cannot be removed when docvalues dynamic is true") + } + } } return fieldInfo, nil } @@ -112,7 +123,7 @@ func compareMappings(ori, upd *mapping.IndexMappingImpl) (*defaultInfo, error) { rv := &defaultInfo{} if ori.TypeField != upd.TypeField && - len(ori.TypeMapping) != 0 || len(upd.TypeMapping) != 0 { + (len(ori.TypeMapping) != 0 || len(upd.TypeMapping) != 0) { return nil, fmt.Errorf("type field cannot be changed when type mappings are present") } @@ -264,7 +275,7 @@ func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo, default if err != nil { return err } - err = validateFieldInfo(info, updated, fInfo, ori) + err = validateFieldInfo(info, updated, fInfo, ori, oriFMapInfo) if err != nil { return err } @@ -285,7 +296,7 @@ func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo, default if err != nil { return err } - err = validateFieldInfo(info, updated, fInfo, ori) + err = validateFieldInfo(info, updated, fInfo, ori, oriFMapInfo) if err != nil { return err } @@ -395,7 +406,7 @@ func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges } } - if rv.All || rv.Index || rv.Store { + if rv.All || rv.Index || rv.Store || rv.DocValues { return rv, true, nil } return rv, false, nil @@ -404,13 +415,21 @@ func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges // After identifying changes, validate against the existing changes incase of duplicate fields. // In such a situation, any conflicting changes found will abort the update process func validateFieldInfo(newInfo *index.FieldInfo, updated bool, fInfo map[string]*index.FieldInfo, - ori *pathInfo) error { + ori *pathInfo, oriFMapInfo *fieldMapInfo) error { var name string - if ori.fieldMapInfo[0].parent.parentPath == "" { - name = ori.fieldMapInfo[0].fieldMapping.Name + if oriFMapInfo.parent.parentPath == "" { + if oriFMapInfo.fieldMapping.Name == "" { + name = oriFMapInfo.parent.path + } else { + name = oriFMapInfo.fieldMapping.Name + } } else { - name = ori.fieldMapInfo[0].parent.parentPath + "." + ori.fieldMapInfo[0].fieldMapping.Name + if oriFMapInfo.fieldMapping.Name == "" { + name = oriFMapInfo.parent.parentPath + "." + oriFMapInfo.parent.path + } else { + name = oriFMapInfo.parent.parentPath + "." + oriFMapInfo.fieldMapping.Name + } } if updated { if ori.dynamic { diff --git a/index_update_test.go b/index_update_test.go new file mode 100644 index 000000000..83c38e077 --- /dev/null +++ b/index_update_test.go @@ -0,0 +1,2498 @@ +// Copyright (c) 2025 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bleve + +import ( + "encoding/json" + "reflect" + "testing" + + "github.com/blevesearch/bleve/v2/analysis/lang/en" + "github.com/blevesearch/bleve/v2/mapping" + index "github.com/blevesearch/bleve_index_api" +) + +func TestCompareFieldMapping(t *testing.T) { + tests := []struct { + original *mapping.FieldMapping + updated *mapping.FieldMapping + defaultChanges *defaultInfo + indexFieldInfo *index.FieldInfo + changed bool + err bool + }{ + { // both nil => no op + original: nil, + updated: nil, + defaultChanges: nil, + indexFieldInfo: nil, + changed: false, + err: false, + }, + { // updated nil => delete all + original: &mapping.FieldMapping{}, + updated: nil, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: &index.FieldInfo{ + All: true, + }, + changed: true, + err: false, + }, + { // type changed => not updatable + original: &mapping.FieldMapping{ + Type: "text", + }, + updated: &mapping.FieldMapping{ + Type: "datetime", + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: nil, + changed: false, + err: true, + }, + { // synonym source changed for text => not updatable + original: &mapping.FieldMapping{ + Type: "text", + SynonymSource: "a", + }, + updated: &mapping.FieldMapping{ + Type: "text", + SynonymSource: "b", + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: nil, + changed: false, + err: true, + }, + { // analyser changed for text => not updatable + original: &mapping.FieldMapping{ + Type: "text", + Analyzer: "a", + }, + updated: &mapping.FieldMapping{ + Type: "text", + Analyzer: "b", + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: nil, + changed: false, + err: true, + }, + { // default analyser changed when inherited => not updatable + original: &mapping.FieldMapping{ + Type: "text", + Analyzer: "inherit", + }, + updated: &mapping.FieldMapping{ + Type: "text", + Analyzer: "inherit", + }, + defaultChanges: &defaultInfo{ + analyzer: true, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: nil, + changed: false, + err: true, + }, + { // default datetimeparser changed for inherited datetime field => not updatable + original: &mapping.FieldMapping{ + Type: "datetime", + DateFormat: "inherit", + }, + updated: &mapping.FieldMapping{ + Type: "datetime", + DateFormat: "inherit", + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: true, + synonymSource: false, + }, + indexFieldInfo: nil, + changed: false, + err: true, + }, + { // dims changed for vector => not updatable + original: &mapping.FieldMapping{ + Type: "vector", + Dims: 128, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "memory-efficient", + }, + updated: &mapping.FieldMapping{ + Type: "vector", + Dims: 1024, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "memory-efficient", + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: nil, + changed: false, + err: true, + }, + { // similarity changed for vectorbase64 => not updatable + original: &mapping.FieldMapping{ + Type: "vector_base64", + Similarity: "l2_norm", + Dims: 128, + VectorIndexOptimizedFor: "memory-efficient", + }, + updated: &mapping.FieldMapping{ + Type: "vector_base64", + Similarity: "dot_product", + Dims: 128, + VectorIndexOptimizedFor: "memory-efficient", + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: nil, + changed: false, + err: true, + }, + { // vectorindexoptimizedfor chagned for vector => not updatable + original: &mapping.FieldMapping{ + Type: "vector", + Similarity: "dot_product", + Dims: 128, + VectorIndexOptimizedFor: "memory-efficient", + }, + updated: &mapping.FieldMapping{ + Type: "vector", + Similarity: "dot_product", + Dims: 128, + VectorIndexOptimizedFor: "latency", + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: nil, + changed: false, + err: true, + }, + { // includeinall changed => not updatable + original: &mapping.FieldMapping{ + Type: "numeric", + IncludeInAll: true, + }, + updated: &mapping.FieldMapping{ + Type: "numeric", + IncludeInAll: false, + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: nil, + changed: false, + err: true, + }, + { //includetermvectors changed => not updatable + original: &mapping.FieldMapping{ + Type: "numeric", + IncludeTermVectors: false, + }, + updated: &mapping.FieldMapping{ + Type: "numeric", + IncludeTermVectors: true, + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: nil, + changed: false, + err: true, + }, + { // store changed after all checks => updatable with store delete + original: &mapping.FieldMapping{ + Type: "numeric", + SkipFreqNorm: true, + }, + updated: &mapping.FieldMapping{ + Type: "numeric", + SkipFreqNorm: false, + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: nil, + changed: false, + err: true, + }, + { // index changed after all checks => updatable with index and docvalues delete + original: &mapping.FieldMapping{ + Type: "geopoint", + Index: true, + }, + updated: &mapping.FieldMapping{ + Type: "geopoint", + Index: false, + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: &index.FieldInfo{ + Index: true, + DocValues: true, + }, + changed: true, + err: false, + }, + { // docvalues changed after all checks => docvalues delete + original: &mapping.FieldMapping{ + Type: "numeric", + DocValues: true, + }, + updated: &mapping.FieldMapping{ + Type: "numeric", + DocValues: false, + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: &index.FieldInfo{ + DocValues: true, + }, + changed: true, + err: false, + }, + { // no relavent changes => continue but no op + original: &mapping.FieldMapping{ + Name: "", + Type: "datetime", + Analyzer: "a", + Store: true, + Index: false, + IncludeTermVectors: true, + IncludeInAll: false, + DateFormat: "a", + DocValues: false, + SkipFreqNorm: true, + Dims: 128, + Similarity: "dot_product", + VectorIndexOptimizedFor: "memory-efficient", + SynonymSource: "a", + }, + updated: &mapping.FieldMapping{ + Name: "", + Type: "datetime", + Analyzer: "b", + Store: true, + Index: false, + IncludeTermVectors: true, + IncludeInAll: false, + DateFormat: "a", + DocValues: false, + SkipFreqNorm: true, + Dims: 256, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + SynonymSource: "b", + }, + defaultChanges: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: false, + }, + indexFieldInfo: &index.FieldInfo{}, + changed: false, + err: false, + }, + } + + for i, test := range tests { + rv, changed, err := compareFieldMapping(test.original, test.updated, test.defaultChanges) + + if err == nil && test.err || err != nil && !test.err { + t.Errorf("Unexpected error value for test %d, expecting %t, got %v\n", i, test.err, err) + } + if changed != test.changed { + t.Errorf("Unexpected changed value for test %d, expecting %t, got %t, err %v\n", i, test.changed, changed, err) + } + if rv == nil && test.indexFieldInfo != nil || rv != nil && test.indexFieldInfo == nil || !reflect.DeepEqual(rv, test.indexFieldInfo) { + t.Errorf("Unexpected index field info value for test %d, expecting %+v, got %+v, err %v", i, test.indexFieldInfo, rv, err) + } + } +} + +func TestCompareMappings(t *testing.T) { + tests := []struct { + original *mapping.IndexMappingImpl + updated *mapping.IndexMappingImpl + info *defaultInfo + err bool + }{ + { // changed type field when non empty mappings are present => error + original: &mapping.IndexMappingImpl{ + TypeField: "a", + TypeMapping: map[string]*mapping.DocumentMapping{ + "a": {}, + "b": {}, + }, + }, + updated: &mapping.IndexMappingImpl{ + TypeField: "b", + TypeMapping: map[string]*mapping.DocumentMapping{ + "a": {}, + "b": {}, + }, + }, + info: nil, + err: true, + }, + { // changed default type => error + original: &mapping.IndexMappingImpl{ + DefaultType: "a", + }, + updated: &mapping.IndexMappingImpl{ + DefaultType: "b", + }, + info: nil, + err: true, + }, + { // changed default analyzer => analyser true + original: &mapping.IndexMappingImpl{ + DefaultAnalyzer: "a", + }, + updated: &mapping.IndexMappingImpl{ + DefaultAnalyzer: "b", + }, + info: &defaultInfo{ + analyzer: true, + dateTimeParser: false, + synonymSource: false, + }, + err: false, + }, + { // changed default datetimeparser => datetimeparser true + original: &mapping.IndexMappingImpl{ + DefaultDateTimeParser: "a", + }, + updated: &mapping.IndexMappingImpl{ + DefaultDateTimeParser: "b", + }, + info: &defaultInfo{ + analyzer: false, + dateTimeParser: true, + synonymSource: false, + }, + err: false, + }, + { // changed default synonym source => synonym source true + original: &mapping.IndexMappingImpl{ + DefaultSynonymSource: "a", + }, + updated: &mapping.IndexMappingImpl{ + DefaultSynonymSource: "b", + }, + info: &defaultInfo{ + analyzer: false, + dateTimeParser: false, + synonymSource: true, + }, + err: false, + }, + { // changed default field => error + original: &mapping.IndexMappingImpl{ + DefaultField: "a", + }, + updated: &mapping.IndexMappingImpl{ + DefaultField: "b", + }, + info: nil, + err: true, + }, + { // changed index dynamic => error + original: &mapping.IndexMappingImpl{ + IndexDynamic: true, + }, + updated: &mapping.IndexMappingImpl{ + IndexDynamic: false, + }, + info: nil, + err: true, + }, + { // changed store dynamic => error + original: &mapping.IndexMappingImpl{ + StoreDynamic: false, + }, + updated: &mapping.IndexMappingImpl{ + StoreDynamic: true, + }, + info: nil, + err: true, + }, + { // changed docvalues dynamic => error + original: &mapping.IndexMappingImpl{ + DocValuesDynamic: true, + }, + updated: &mapping.IndexMappingImpl{ + DocValuesDynamic: false, + }, + info: nil, + err: true, + }, + } + + for i, test := range tests { + info, err := compareMappings(test.original, test.updated) + + if err == nil && test.err || err != nil && !test.err { + t.Errorf("Unexpected error value for test %d, expecting %t, got %v\n", i, test.err, err) + } + if info == nil && test.info != nil || info != nil && test.info == nil || !reflect.DeepEqual(info, test.info) { + t.Errorf("Unexpected default info value for test %d, expecting %+v, got %+v, err %v", i, test.info, info, err) + } + } +} + +func TestDeletedFields(t *testing.T) { + tests := []struct { + original *mapping.IndexMappingImpl + updated *mapping.IndexMappingImpl + fieldInfo map[string]*index.FieldInfo + err bool + }{ + { + // no change between original and updated having type and default mapping + // => empty fieldInfo with no error + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + fieldInfo: map[string]*index.FieldInfo{}, + err: false, + }, + { + // no changes in type mappings and default mapping disabled with changes + // => empty fieldInfo with no error + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: false, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: false, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "d": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + fieldInfo: map[string]*index.FieldInfo{}, + err: false, + }, + { + // new type mappings in updated => error + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + fieldInfo: nil, + err: true, + }, + { + // new mappings in default mapping => error + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{}, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{}, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + fieldInfo: nil, + err: true, + }, + { + // fully removed mapping in type with some dynamic => error + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: true, + StoreDynamic: false, + DocValuesDynamic: false, + }, + fieldInfo: nil, + err: true, + }, + { + // semi removed mapping in default with some dynamic + // proper fieldInfo with no errors + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: false, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + fieldInfo: map[string]*index.FieldInfo{ + "b": { + Index: true, + DocValues: true, + }, + }, + err: false, + }, + { + // two fields from diff paths with removed content matching + // => relavent fieldInfo + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: false, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: false, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + fieldInfo: map[string]*index.FieldInfo{ + "a": { + Index: true, + DocValues: true, + }, + }, + err: false, + }, + { + // two fields from diff paths with removed content not matching + // => error + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: false, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + fieldInfo: nil, + err: true, + }, + { + // two fields from the same path => relavent fieldInfo + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Name: "a", + Type: "numeric", + Index: true, + Store: true, + }, + { + Name: "b", + Type: "numeric", + Index: true, + Store: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Name: "a", + Type: "numeric", + Index: false, + Store: true, + }, + { + Name: "b", + Type: "numeric", + Index: true, + Store: false, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + fieldInfo: map[string]*index.FieldInfo{ + "a": { + Index: true, + DocValues: true, + }, + "b": { + Store: true, + }, + }, + err: false, + }, + { + // one store, one index, one dynamic and one all removed in type and default + // => relavent fieldInfo without error + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Store: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map3": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + DocValues: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "d": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: true, + Store: true, + DocValues: true, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{ + "map1": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Index: false, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map2": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + Store: false, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + "map3": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "numeric", + DocValues: false, + }, + }, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + }, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "", + DefaultSynonymSource: "", + }, + IndexDynamic: false, + StoreDynamic: false, + DocValuesDynamic: false, + }, + fieldInfo: map[string]*index.FieldInfo{ + "a": { + Index: true, + DocValues: true, + }, + "b": { + Store: true, + }, + "c": { + DocValues: true, + }, + "d": { + All: true, + }, + }, + err: false, + }, + } + + for i, test := range tests { + info, err := DeletedFields(test.original, test.updated) + + if err == nil && test.err || err != nil && !test.err { + t.Errorf("Unexpected error value for test %d, expecting %t, got %v\n", i, test.err, err) + } + if info == nil && test.fieldInfo != nil || info != nil && test.fieldInfo == nil || !reflect.DeepEqual(info, test.fieldInfo) { + t.Errorf("Unexpected default info value for test %d, expecting %+v, got %+v, err %v", i, test.fieldInfo, info, err) + } + } +} + +func TestIndexUpdateText(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + indexMappingBefore := mapping.NewIndexMapping() + indexMappingBefore.TypeMapping = map[string]*mapping.DocumentMapping{} + indexMappingBefore.DefaultMapping = &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + "d": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + } + indexMappingBefore.IndexDynamic = false + indexMappingBefore.StoreDynamic = false + indexMappingBefore.DocValuesDynamic = false + + index, err := New(tmpIndexPath, indexMappingBefore) + if err != nil { + t.Fatal(err) + } + doc1 := map[string]interface{}{"a": "xyz", "b": "abc", "c": "def", "d": "ghi"} + doc2 := map[string]interface{}{"a": "uvw", "b": "rst", "c": "klm", "d": "pqr"} + doc3 := map[string]interface{}{"a": "xyz", "b": "def", "c": "abc", "d": "mno"} + batch := index.NewBatch() + err = batch.Index("001", doc1) + if err != nil { + t.Fatal(err) + } + err = batch.Index("002", doc2) + if err != nil { + t.Fatal(err) + } + err = batch.Index("003", doc3) + if err != nil { + t.Fatal(err) + } + err = index.Batch(batch) + if err != nil { + t.Fatal(err) + } + err = index.Close() + if err != nil { + t.Fatal(err) + } + + indexMappingAfter := mapping.NewIndexMapping() + indexMappingAfter.TypeMapping = map[string]*mapping.DocumentMapping{} + indexMappingAfter.DefaultMapping = &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: false, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: false, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + } + indexMappingAfter.IndexDynamic = false + indexMappingAfter.StoreDynamic = false + indexMappingAfter.DocValuesDynamic = false + + mappingString, err := json.Marshal(indexMappingAfter) + if err != nil { + t.Fatal(err) + } + index, err = Update(tmpIndexPath, string(mappingString)) + if err != nil { + t.Fatal(err) + } + + q1 := NewSearchRequest(NewQueryStringQuery("a:*")) + q1.Fields = append(q1.Fields, "a") + res1, err := index.Search(q1) + if err != nil { + t.Fatal(err) + } + if len(res1.Hits) != 3 { + t.Fatalf("Expected 3 hits, got %d\n", len(res1.Hits)) + } + if len(res1.Hits[0].Fields) != 1 { + t.Fatalf("Expected 1 field, got %d\n", len(res1.Hits[0].Fields)) + } + q2 := NewSearchRequest(NewQueryStringQuery("b:*")) + q2.Fields = append(q2.Fields, "b") + res2, err := index.Search(q2) + if err != nil { + t.Fatal(err) + } + if len(res2.Hits) != 0 { + t.Fatalf("Expected 0 hits, got %d\n", len(res2.Hits)) + } + q3 := NewSearchRequest(NewQueryStringQuery("c:*")) + q3.Fields = append(q3.Fields, "c") + res3, err := index.Search(q3) + if err != nil { + t.Fatal(err) + } + if len(res3.Hits) != 3 { + t.Fatalf("Expected 3 hits, got %d\n", len(res3.Hits)) + } + if len(res3.Hits[0].Fields) != 0 { + t.Fatalf("Expected 0 field, got %d\n", len(res3.Hits[0].Fields)) + } + q4 := NewSearchRequest(NewQueryStringQuery("d:*")) + q4.Fields = append(q4.Fields, "d") + res4, err := index.Search(q4) + if err != nil { + t.Fatal(err) + } + if len(res4.Hits) != 0 { + t.Fatalf("Expected 0 hits, got %d\n", len(res4.Hits)) + } +} + +func TestIndexUpdateVector(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + indexMappingBefore := mapping.NewIndexMapping() + indexMappingBefore.TypeMapping = map[string]*mapping.DocumentMapping{} + indexMappingBefore.DefaultMapping = &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector", + Index: true, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector", + Index: true, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector_base64", + Index: true, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + "d": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector_base64", + Index: true, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + }, + Fields: []*mapping.FieldMapping{}, + } + indexMappingBefore.IndexDynamic = false + indexMappingBefore.StoreDynamic = false + indexMappingBefore.DocValuesDynamic = false + + index, err := New(tmpIndexPath, indexMappingBefore) + if err != nil { + t.Fatal(err) + } + doc1 := map[string]interface{}{"a": []float32{0.32894259691238403, 0.6973215341567993, 0.6835201978683472, 0.38296082615852356}, "b": []float32{0.32894259691238403, 0.6973215341567993, 0.6835201978683472, 0.38296082615852356}, "c": "L5MOPw7NID5SQMU9pHUoPw==", "d": "L5MOPw7NID5SQMU9pHUoPw=="} + doc2 := map[string]interface{}{"a": []float32{0.0018692062003538013, 0.41076546907424927, 0.5675257444381714, 0.45832985639572144}, "b": []float32{0.0018692062003538013, 0.41076546907424927, 0.5675257444381714, 0.45832985639572144}, "c": "czloP94ZCD71ldY+GbAOPw==", "d": "czloP94ZCD71ldY+GbAOPw=="} + doc3 := map[string]interface{}{"a": []float32{0.7853356599807739, 0.6904757618904114, 0.5643226504325867, 0.682637631893158}, "b": []float32{0.7853356599807739, 0.6904757618904114, 0.5643226504325867, 0.682637631893158}, "c": "Chh6P2lOqT47mjg/0odlPg==", "d": "Chh6P2lOqT47mjg/0odlPg=="} + batch := index.NewBatch() + err = batch.Index("001", doc1) + if err != nil { + t.Fatal(err) + } + err = batch.Index("002", doc2) + if err != nil { + t.Fatal(err) + } + err = batch.Index("003", doc3) + if err != nil { + t.Fatal(err) + } + err = index.Batch(batch) + if err != nil { + t.Fatal(err) + } + err = index.Close() + if err != nil { + t.Fatal(err) + } + + indexMappingAfter := mapping.NewIndexMapping() + indexMappingAfter.TypeMapping = map[string]*mapping.DocumentMapping{} + indexMappingAfter.DefaultMapping = &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector", + Index: true, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector_base64", + Index: true, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + "d": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector_base64", + Index: false, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + }, + Fields: []*mapping.FieldMapping{}, + } + indexMappingAfter.IndexDynamic = false + indexMappingAfter.StoreDynamic = false + indexMappingAfter.DocValuesDynamic = false + + mappingString, err := json.Marshal(indexMappingAfter) + if err != nil { + t.Fatal(err) + } + index, err = Update(tmpIndexPath, string(mappingString)) + if err != nil { + t.Fatal(err) + } + + q1 := NewSearchRequest(NewMatchNoneQuery()) + q1.AddKNN("a", []float32{1, 2, 3, 4}, 3, 1.0) + res1, err := index.Search(q1) + if err != nil { + t.Fatal(err) + } + if len(res1.Hits) != 3 { + t.Fatalf("Expected 3 hits, got %d\n", len(res1.Hits)) + } + q2 := NewSearchRequest(NewMatchNoneQuery()) + q2.AddKNN("e", []float32{1, 2, 3, 4}, 3, 1.0) + res2, err := index.Search(q2) + if err != nil { + t.Fatal(err) + } + if len(res2.Hits) != 0 { + t.Fatalf("Expected 0 hits, got %d\n", len(res2.Hits)) + } + q3 := NewSearchRequest(NewMatchNoneQuery()) + q3.AddKNN("c", []float32{1, 2, 3, 4}, 3, 1.0) + res3, err := index.Search(q3) + if err != nil { + t.Fatal(err) + } + if len(res3.Hits) != 3 { + t.Fatalf("Expected 3 hits, got %d\n", len(res3.Hits)) + } + q4 := NewSearchRequest(NewMatchNoneQuery()) + q4.AddKNN("d", []float32{1, 2, 3, 4}, 3, 1.0) + res4, err := index.Search(q4) + if err != nil { + t.Fatal(err) + } + if len(res4.Hits) != 0 { + t.Fatalf("Expected 0 hits, got %d\n", len(res4.Hits)) + } +} + +func TestIndexUpdateSynonym(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + synonymCollection := "collection1" + synonymSourceName := "english" + analyzer := en.AnalyzerName + synonymSourceConfig := map[string]interface{}{ + "collection": synonymCollection, + "analyzer": analyzer, + } + + a := mapping.NewTextFieldMapping() + a.Analyzer = analyzer + a.SynonymSource = synonymSourceName + a.IncludeInAll = false + + b := mapping.NewTextFieldMapping() + b.Analyzer = analyzer + b.SynonymSource = synonymSourceName + b.IncludeInAll = false + + c := mapping.NewTextFieldMapping() + c.Analyzer = analyzer + c.SynonymSource = synonymSourceName + c.IncludeInAll = false + + indexMappingBefore := mapping.NewIndexMapping() + indexMappingBefore.DefaultMapping.AddFieldMappingsAt("a", a) + indexMappingBefore.DefaultMapping.AddFieldMappingsAt("b", b) + indexMappingBefore.DefaultMapping.AddFieldMappingsAt("c", c) + err := indexMappingBefore.AddSynonymSource(synonymSourceName, synonymSourceConfig) + if err != nil { + t.Fatal(err) + } + + indexMappingBefore.IndexDynamic = false + indexMappingBefore.StoreDynamic = false + indexMappingBefore.DocValuesDynamic = false + + index, err := New(tmpIndexPath, indexMappingBefore) + if err != nil { + t.Fatal(err) + } + + doc1 := map[string]interface{}{ + "a": `The hardworking employee consistently strives to exceed expectations. + His industrious nature makes him a valuable asset to any team. + His conscientious attention to detail ensures that projects are completed efficiently and accurately. + He remains persistent even in the face of challenges.`, + "b": `The hardworking employee consistently strives to exceed expectations. + His industrious nature makes him a valuable asset to any team. + His conscientious attention to detail ensures that projects are completed efficiently and accurately. + He remains persistent even in the face of challenges.`, + "c": `The hardworking employee consistently strives to exceed expectations. + His industrious nature makes him a valuable asset to any team. + His conscientious attention to detail ensures that projects are completed efficiently and accurately. + He remains persistent even in the face of challenges.`, + } + doc2 := map[string]interface{}{ + "a": `The tranquil surroundings of the retreat provide a perfect escape from the hustle and bustle of city life. + Guests enjoy the peaceful atmosphere, which is perfect for relaxation and rejuvenation. + The calm environment offers the ideal place to meditate and connect with nature. + Even the most stressed individuals find themselves feeling relaxed and at ease.`, + "b": `The tranquil surroundings of the retreat provide a perfect escape from the hustle and bustle of city life. + Guests enjoy the peaceful atmosphere, which is perfect for relaxation and rejuvenation. + The calm environment offers the ideal place to meditate and connect with nature. + Even the most stressed individuals find themselves feeling relaxed and at ease.`, + "c": `The tranquil surroundings of the retreat provide a perfect escape from the hustle and bustle of city life. + Guests enjoy the peaceful atmosphere, which is perfect for relaxation and rejuvenation. + The calm environment offers the ideal place to meditate and connect with nature. + Even the most stressed individuals find themselves feeling relaxed and at ease.`, + } + synDoc1 := &SynonymDefinition{Synonyms: []string{"hardworking", "industrious", "conscientious", "persistent", "focused", "devoted"}} + synDoc2 := &SynonymDefinition{Synonyms: []string{"tranquil", "peaceful", "calm", "relaxed", "unruffled"}} + + batch := index.NewBatch() + err = batch.IndexSynonym("001", synonymCollection, synDoc1) + if err != nil { + t.Fatal(err) + } + err = batch.IndexSynonym("002", synonymCollection, synDoc2) + if err != nil { + t.Fatal(err) + } + err = batch.Index("003", doc1) + if err != nil { + t.Fatal(err) + } + err = batch.Index("004", doc2) + if err != nil { + t.Fatal(err) + } + err = index.Batch(batch) + if err != nil { + t.Fatal(err) + } + err = index.Close() + if err != nil { + t.Fatal(err) + } + + indexMappingAfter := mapping.NewIndexMapping() + indexMappingAfter.DefaultMapping.AddFieldMappingsAt("a", a) + b.Index = false + indexMappingAfter.DefaultMapping.AddFieldMappingsAt("b", b) + err = indexMappingAfter.AddSynonymSource(synonymSourceName, synonymSourceConfig) + if err != nil { + t.Fatal(err) + } + + indexMappingAfter.IndexDynamic = false + indexMappingAfter.StoreDynamic = false + indexMappingAfter.DocValuesDynamic = false + + mappingString, err := json.Marshal(indexMappingAfter) + if err != nil { + t.Fatal(err) + } + index, err = Update(tmpIndexPath, string(mappingString)) + if err != nil { + t.Fatal(err) + } + + q1 := NewSearchRequest(NewQueryStringQuery("a:devoted")) + res1, err := index.Search(q1) + if err != nil { + t.Fatal(err) + } + if len(res1.Hits) != 1 { + t.Fatalf("Expected 1 hit, got %d\n", len(res1.Hits)) + } + + q2 := NewSearchRequest(NewQueryStringQuery("b:devoted")) + res2, err := index.Search(q2) + if err != nil { + t.Fatal(err) + } + if len(res2.Hits) != 0 { + t.Fatalf("Expected 0 hits, got %d\n", len(res2.Hits)) + } + + q3 := NewSearchRequest(NewQueryStringQuery("c:unruffled")) + res3, err := index.Search(q3) + if err != nil { + t.Fatal(err) + } + if len(res3.Hits) != 0 { + t.Fatalf("Expected 0 hits, got %d\n", len(res3.Hits)) + } +} From 2e964455d788bf12b51c815e2d1aba3834fd6c97 Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 4 Feb 2025 11:59:37 +0530 Subject: [PATCH 05/25] MB-57888: Minor Fixes - Bug Fixes - Name changes - More test cases --- index/scorch/optimize_knn.go | 2 +- index/scorch/persister.go | 7 +- index/scorch/scorch.go | 21 +- index/scorch/snapshot_index.go | 31 ++- index/scorch/snapshot_segment.go | 24 +- index_update.go | 24 +- index_update_test.go | 400 +++++++++++++++++++++++++++++-- 7 files changed, 468 insertions(+), 41 deletions(-) diff --git a/index/scorch/optimize_knn.go b/index/scorch/optimize_knn.go index cee0edff1..506c3893a 100644 --- a/index/scorch/optimize_knn.go +++ b/index/scorch/optimize_knn.go @@ -79,7 +79,7 @@ func (o *OptimizeVR) Finish() error { wg.Done() }() for field, vrs := range o.vrs { - if info, ok := o.snapshot.updatedFields[field]; ok && info.All || info.Index { + if info, ok := o.snapshot.updatedFields[field]; ok && info.RemoveAll || info.Index { continue } diff --git a/index/scorch/persister.go b/index/scorch/persister.go index 8874be89b..886cea15f 100644 --- a/index/scorch/persister.go +++ b/index/scorch/persister.go @@ -1055,14 +1055,17 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro } updatedFieldBytes := segmentBucket.Get(boltUpdatedFieldsKey) if updatedFieldBytes != nil { - var updatedFields map[string]index.FieldInfo + var updatedFields map[string]index.UpdateFieldInfo err := json.Unmarshal(updatedFieldBytes, &updatedFields) if err != nil { _ = seg.Close() return nil, fmt.Errorf("error reading updated field bytes: %v", err) } - rv.updatedFields = updatedFields + rv.updatedFields = make(map[string]*index.UpdateFieldInfo) + for field, info := range updatedFields { + rv.updatedFields[field] = &info + } } return rv, nil diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 285f8b0f8..80b9a874c 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -947,9 +947,9 @@ func (s *Scorch) FireIndexEvent() { // Updates bolt db with the given field info. Existing field info already in bolt // will be merged before persisting. The index mapping is also overwritted both // in bolt as well as the index snapshot -func (s *Scorch) UpdateFields(fieldInfo map[string]*index.FieldInfo, mappingBytes []byte) error { +func (s *Scorch) UpdateFields(fieldInfo map[string]*index.UpdateFieldInfo, mappingBytes []byte) error { // Switch from pointer to value to marshal into a json for storage - updatedFields := make(map[string]index.FieldInfo) + updatedFields := make(map[string]index.UpdateFieldInfo) for field, info := range fieldInfo { updatedFields[field] = *info } @@ -958,13 +958,13 @@ func (s *Scorch) UpdateFields(fieldInfo map[string]*index.FieldInfo, mappingByte return err } s.root.m.Lock() - s.root.updatedFields = updatedFields + s.root.UpdateFieldsInfo(fieldInfo) s.root.m.Unlock() return nil } // Merge and update deleted field info and rewrite index mapping -func (s *Scorch) updateBolt(fieldInfo map[string]index.FieldInfo, mappingBytes []byte) error { +func (s *Scorch) updateBolt(fieldInfo map[string]index.UpdateFieldInfo, mappingBytes []byte) error { return s.rootBolt.Update(func(tx *bolt.Tx) error { snapshots := tx.Bucket(boltSnapshotsBucket) if snapshots == nil { @@ -995,7 +995,7 @@ func (s *Scorch) updateBolt(fieldInfo map[string]index.FieldInfo, mappingBytes [ if segmentBucket == nil { return fmt.Errorf("segment key, but bucket missing %x", kk) } - var updatedFields map[string]index.FieldInfo + var updatedFields map[string]index.UpdateFieldInfo updatedFieldBytes := segmentBucket.Get(boltUpdatedFieldsKey) if updatedFieldBytes != nil { err := json.Unmarshal(updatedFieldBytes, &updatedFields) @@ -1003,7 +1003,16 @@ func (s *Scorch) updateBolt(fieldInfo map[string]index.FieldInfo, mappingBytes [ return fmt.Errorf("error reading updated field bytes: %v", err) } for field, info := range fieldInfo { - updatedFields[field] = info + if val, ok := updatedFields[field]; ok { + updatedFields[field] = index.UpdateFieldInfo{ + RemoveAll: info.RemoveAll || val.RemoveAll, + Store: info.Store || val.Store, + DocValues: info.DocValues || val.DocValues, + Index: info.Index || val.Index, + } + } else { + updatedFields[field] = info + } } } else { updatedFields = fieldInfo diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index da630b870..25432c4e6 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -85,7 +85,7 @@ type IndexSnapshot struct { m3 sync.RWMutex // bm25 metrics specific - not to interfere with TFR creation fieldCardinality map[string]int - updatedFields map[string]index.FieldInfo + updatedFields map[string]*index.UpdateFieldInfo } func (i *IndexSnapshot) Segments() []*SegmentSnapshot { @@ -513,7 +513,7 @@ func (is *IndexSnapshot) Document(id string) (rv index.Document, err error) { // Skip fields that are supposed to have deleted store values if info, ok := is.updatedFields[name]; ok && - (info.All || info.Store) { + (info.RemoveAll || info.Store) { return true } @@ -648,7 +648,7 @@ func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field // Skip fields that are supposed to have no indexing if info, ok := is.updatedFields[field]; ok && - (info.Index || info.All) { + (info.Index || info.RemoveAll) { dict, err = s.segment.Dictionary("") } else { dict, err = s.segment.Dictionary(field) @@ -806,7 +806,7 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( var filteredFields []string for _, field := range vFields { if info, ok := is.updatedFields[field]; ok && - (info.DocValues || info.All) { + (info.DocValues || info.RemoveAll) { continue } else { filteredFields = append(filteredFields, field) @@ -1191,3 +1191,26 @@ func (is *IndexSnapshot) ThesaurusKeysRegexp(name string, func (is *IndexSnapshot) UpdateSynonymSearchCount(delta uint64) { atomic.AddUint64(&is.parent.stats.TotSynonymSearches, delta) } + +func (is *IndexSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) { + + if is.updatedFields == nil { + is.updatedFields = updatedFields + } else { + for fieldName, info := range updatedFields { + if val, ok := is.updatedFields[fieldName]; ok { + val.RemoveAll = val.RemoveAll || info.RemoveAll + val.Index = val.Index || info.Index + val.DocValues = val.DocValues || info.DocValues + val.Store = val.Store || info.Store + } else { + is.updatedFields[fieldName] = info + } + } + } + + for _, segmentSnapshot := range is.segment { + segmentSnapshot.UpdateFieldsInfo(updatedFields) + } + +} diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go index df3cafb2d..4154ad540 100644 --- a/index/scorch/snapshot_segment.go +++ b/index/scorch/snapshot_segment.go @@ -41,7 +41,7 @@ type SegmentSnapshot struct { deleted *roaring.Bitmap creator string stats *fieldStats - updatedFields map[string]index.FieldInfo + updatedFields map[string]*index.UpdateFieldInfo cachedMeta *cachedMeta @@ -147,6 +147,28 @@ func (s *SegmentSnapshot) Size() (rv int) { return } +func (s *SegmentSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) { + + if s.updatedFields == nil { + s.updatedFields = updatedFields + } else { + for fieldName, info := range updatedFields { + if val, ok := s.updatedFields[fieldName]; ok { + val.RemoveAll = val.RemoveAll || info.RemoveAll + val.Index = val.Index || info.Index + val.DocValues = val.DocValues || info.DocValues + val.Store = val.Store || info.Store + } else { + s.updatedFields[fieldName] = info + } + } + } + + if segment, ok := s.segment.(segment.UpdatableSegment); ok { + segment.PutUpdatedFields(s.updatedFields) + } +} + type cachedFieldDocs struct { m sync.Mutex readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used. diff --git a/index_update.go b/index_update.go index 0b1de1e16..84f1106b4 100644 --- a/index_update.go +++ b/index_update.go @@ -46,7 +46,7 @@ type defaultInfo struct { } // Compare two index mappings to identify all of the updatable changes -func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldInfo, error) { +func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.UpdateFieldInfo, error) { var err error defaultChanges, err := compareMappings(ori, upd) @@ -90,7 +90,7 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldI // Compare both the mappings based on the document paths // and create a list of index, docvalues, store differences // for every single field possible - fieldInfo := make(map[string]*index.FieldInfo) + fieldInfo := make(map[string]*index.UpdateFieldInfo) for path, info := range oriPaths { err = addFieldInfo(fieldInfo, info, updPaths[path], defaultChanges) if err != nil { @@ -101,10 +101,10 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.FieldI // Remove entries from the list with no changes between the // original and the updated mapping for name, info := range fieldInfo { - if !info.All && !info.Index && !info.DocValues && !info.Store { + if !info.RemoveAll && !info.Index && !info.DocValues && !info.Store { delete(fieldInfo, name) } - if info.All { + if info.RemoveAll { if upd.IndexDynamic { return nil, fmt.Errorf("Mapping cannot be removed when index dynamic is true") } @@ -261,9 +261,9 @@ func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMa } // Compare all of the fields at a particular document path and add its field information -func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo, defaultChanges *defaultInfo) error { +func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo, defaultChanges *defaultInfo) error { - var info *index.FieldInfo + var info *index.UpdateFieldInfo var updated bool var err error @@ -316,13 +316,13 @@ func addFieldInfo(fInfo map[string]*index.FieldInfo, ori, upd *pathInfo, default // second return argument gives a flag indicating whether any changes, if detected, are doable or if // update is impossible // third argument is an error explaining exactly why the change is not possible -func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges *defaultInfo) (*index.FieldInfo, bool, error) { +func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges *defaultInfo) (*index.UpdateFieldInfo, bool, error) { - rv := &index.FieldInfo{} + rv := &index.UpdateFieldInfo{} if updated == nil { if original != nil && !original.IncludeInAll { - rv.All = true + rv.RemoveAll = true return rv, true, nil } else if original == nil { return nil, false, nil @@ -406,7 +406,7 @@ func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges } } - if rv.All || rv.Index || rv.Store || rv.DocValues { + if rv.RemoveAll || rv.Index || rv.Store || rv.DocValues { return rv, true, nil } return rv, false, nil @@ -414,7 +414,7 @@ func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges // After identifying changes, validate against the existing changes incase of duplicate fields. // In such a situation, any conflicting changes found will abort the update process -func validateFieldInfo(newInfo *index.FieldInfo, updated bool, fInfo map[string]*index.FieldInfo, +func validateFieldInfo(newInfo *index.UpdateFieldInfo, updated bool, fInfo map[string]*index.UpdateFieldInfo, ori *pathInfo, oriFMapInfo *fieldMapInfo) error { var name string @@ -437,7 +437,7 @@ func validateFieldInfo(newInfo *index.FieldInfo, updated bool, fInfo map[string] } } if oldInfo, ok := fInfo[name]; ok { - if oldInfo.All != newInfo.All || oldInfo.Index != newInfo.Index || + if oldInfo.RemoveAll != newInfo.RemoveAll || oldInfo.Index != newInfo.Index || oldInfo.DocValues != newInfo.DocValues || oldInfo.Store != newInfo.Store { return fmt.Errorf("updated field impossible to verify because multiple mappings point to the same field name") } diff --git a/index_update_test.go b/index_update_test.go index 83c38e077..9e19b0484 100644 --- a/index_update_test.go +++ b/index_update_test.go @@ -15,11 +15,16 @@ package bleve import ( + "context" "encoding/json" + "fmt" + "math/rand" "reflect" "testing" "github.com/blevesearch/bleve/v2/analysis/lang/en" + "github.com/blevesearch/bleve/v2/index/scorch" + "github.com/blevesearch/bleve/v2/index/scorch/mergeplan" "github.com/blevesearch/bleve/v2/mapping" index "github.com/blevesearch/bleve_index_api" ) @@ -29,7 +34,7 @@ func TestCompareFieldMapping(t *testing.T) { original *mapping.FieldMapping updated *mapping.FieldMapping defaultChanges *defaultInfo - indexFieldInfo *index.FieldInfo + indexFieldInfo *index.UpdateFieldInfo changed bool err bool }{ @@ -49,8 +54,8 @@ func TestCompareFieldMapping(t *testing.T) { dateTimeParser: false, synonymSource: false, }, - indexFieldInfo: &index.FieldInfo{ - All: true, + indexFieldInfo: &index.UpdateFieldInfo{ + RemoveAll: true, }, changed: true, err: false, @@ -277,7 +282,7 @@ func TestCompareFieldMapping(t *testing.T) { dateTimeParser: false, synonymSource: false, }, - indexFieldInfo: &index.FieldInfo{ + indexFieldInfo: &index.UpdateFieldInfo{ Index: true, DocValues: true, }, @@ -298,7 +303,7 @@ func TestCompareFieldMapping(t *testing.T) { dateTimeParser: false, synonymSource: false, }, - indexFieldInfo: &index.FieldInfo{ + indexFieldInfo: &index.UpdateFieldInfo{ DocValues: true, }, changed: true, @@ -342,7 +347,7 @@ func TestCompareFieldMapping(t *testing.T) { dateTimeParser: false, synonymSource: false, }, - indexFieldInfo: &index.FieldInfo{}, + indexFieldInfo: &index.UpdateFieldInfo{}, changed: false, err: false, }, @@ -498,7 +503,7 @@ func TestDeletedFields(t *testing.T) { tests := []struct { original *mapping.IndexMappingImpl updated *mapping.IndexMappingImpl - fieldInfo map[string]*index.FieldInfo + fieldInfo map[string]*index.UpdateFieldInfo err bool }{ { @@ -650,7 +655,7 @@ func TestDeletedFields(t *testing.T) { StoreDynamic: false, DocValuesDynamic: false, }, - fieldInfo: map[string]*index.FieldInfo{}, + fieldInfo: map[string]*index.UpdateFieldInfo{}, err: false, }, { @@ -802,7 +807,7 @@ func TestDeletedFields(t *testing.T) { StoreDynamic: false, DocValuesDynamic: false, }, - fieldInfo: map[string]*index.FieldInfo{}, + fieldInfo: map[string]*index.UpdateFieldInfo{}, err: false, }, { @@ -1267,7 +1272,7 @@ func TestDeletedFields(t *testing.T) { StoreDynamic: false, DocValuesDynamic: false, }, - fieldInfo: map[string]*index.FieldInfo{ + fieldInfo: map[string]*index.UpdateFieldInfo{ "b": { Index: true, DocValues: true, @@ -1424,7 +1429,7 @@ func TestDeletedFields(t *testing.T) { StoreDynamic: false, DocValuesDynamic: false, }, - fieldInfo: map[string]*index.FieldInfo{ + fieldInfo: map[string]*index.UpdateFieldInfo{ "a": { Index: true, DocValues: true, @@ -1704,7 +1709,7 @@ func TestDeletedFields(t *testing.T) { StoreDynamic: false, DocValuesDynamic: false, }, - fieldInfo: map[string]*index.FieldInfo{ + fieldInfo: map[string]*index.UpdateFieldInfo{ "a": { Index: true, DocValues: true, @@ -1896,7 +1901,7 @@ func TestDeletedFields(t *testing.T) { StoreDynamic: false, DocValuesDynamic: false, }, - fieldInfo: map[string]*index.FieldInfo{ + fieldInfo: map[string]*index.UpdateFieldInfo{ "a": { Index: true, DocValues: true, @@ -1908,7 +1913,7 @@ func TestDeletedFields(t *testing.T) { DocValues: true, }, "d": { - All: true, + RemoveAll: true, }, }, err: false, @@ -2128,7 +2133,7 @@ func TestIndexUpdateText(t *testing.T) { t.Fatalf("Expected 3 hits, got %d\n", len(res3.Hits)) } if len(res3.Hits[0].Fields) != 0 { - t.Fatalf("Expected 0 field, got %d\n", len(res3.Hits[0].Fields)) + t.Fatalf("Expected 0 fields, got %d\n", len(res3.Hits[0].Fields)) } q4 := NewSearchRequest(NewQueryStringQuery("d:*")) q4.Fields = append(q4.Fields, "d") @@ -2496,3 +2501,368 @@ func TestIndexUpdateSynonym(t *testing.T) { t.Fatalf("Expected 0 hits, got %d\n", len(res3.Hits)) } } + +func TestIndexUpdateMerge(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + indexMappingBefore := mapping.NewIndexMapping() + indexMappingBefore.TypeMapping = map[string]*mapping.DocumentMapping{} + indexMappingBefore.DefaultMapping = &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + "d": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + } + indexMappingBefore.IndexDynamic = false + indexMappingBefore.StoreDynamic = false + indexMappingBefore.DocValuesDynamic = false + + index, err := New(tmpIndexPath, indexMappingBefore) + if err != nil { + t.Fatal(err) + } + + numDocsPerBatch := 1000 + numBatches := 3 + + var batch *Batch + doc := make(map[string]interface{}) + const letters = "abcdefghijklmnopqrstuvwxyz" + + randStr := func() string { + result := make([]byte, 3) + for i := 0; i < 3; i++ { + result[i] = letters[rand.Intn(len(letters))] + } + return string(result) + } + for i := 0; i < numBatches; i++ { + batch = index.NewBatch() + for j := 0; j < numDocsPerBatch; j++ { + doc["a"] = randStr() + doc["b"] = randStr() + doc["c"] = randStr() + doc["d"] = randStr() + err = batch.Index(fmt.Sprintf("%d", i*numDocsPerBatch+j), doc) + if err != nil { + t.Fatal(err) + } + } + err = index.Batch(batch) + if err != nil { + t.Fatal(err) + } + } + + err = index.Close() + if err != nil { + t.Fatal(err) + } + + indexMappingAfter := mapping.NewIndexMapping() + indexMappingAfter.TypeMapping = map[string]*mapping.DocumentMapping{} + indexMappingAfter.DefaultMapping = &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: false, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: false, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + } + indexMappingAfter.IndexDynamic = false + indexMappingAfter.StoreDynamic = false + indexMappingAfter.DocValuesDynamic = false + + mappingString, err := json.Marshal(indexMappingAfter) + if err != nil { + t.Fatal(err) + } + index, err = Update(tmpIndexPath, string(mappingString)) + if err != nil { + t.Fatal(err) + } + + impl, ok := index.(*indexImpl) + if !ok { + t.Fatalf("Typecasting index to indexImpl failed") + } + sindex, ok := impl.i.(*scorch.Scorch) + if !ok { + t.Fatalf("Typecasting index to scorch index failed") + } + + err = sindex.ForceMerge(context.Background(), &mergeplan.SingleSegmentMergePlanOptions) + if err != nil { + t.Fatal(err) + } + + q1 := NewSearchRequest(NewQueryStringQuery("a:*")) + q1.Fields = append(q1.Fields, "a") + + res1, err := index.Search(q1) + if err != nil { + t.Fatal(err) + } + if len(res1.Hits) != 10 { + t.Fatalf("Expected 10 hits, got %d\n", len(res1.Hits)) + } + if len(res1.Hits[0].Fields) != 1 { + t.Fatalf("Expected 1 field, got %d\n", len(res1.Hits[0].Fields)) + } + q2 := NewSearchRequest(NewQueryStringQuery("b:*")) + q2.Fields = append(q2.Fields, "b") + res2, err := index.Search(q2) + if err != nil { + t.Fatal(err) + } + if len(res2.Hits) != 0 { + t.Fatalf("Expected 0 hits, got %d\n", len(res2.Hits)) + } + q3 := NewSearchRequest(NewQueryStringQuery("c:*")) + q3.Fields = append(q3.Fields, "c") + res3, err := index.Search(q3) + if err != nil { + t.Fatal(err) + } + if len(res3.Hits) != 10 { + t.Fatalf("Expected 10 hits, got %d\n", len(res3.Hits)) + } + if len(res3.Hits[0].Fields) != 0 { + t.Fatalf("Expected 0 fields, got %d\n", len(res3.Hits[0].Fields)) + } + q4 := NewSearchRequest(NewQueryStringQuery("d:*")) + q4.Fields = append(q4.Fields, "d") + res4, err := index.Search(q4) + if err != nil { + t.Fatal(err) + } + if len(res4.Hits) != 0 { + t.Fatalf("Expected 0 hits, got %d\n", len(res4.Hits)) + } + +} + +func BenchmarkIndexUpdateText(b *testing.B) { + + tmpIndexPath := createTmpIndexPath(b) + defer cleanupTmpIndexPath(b, tmpIndexPath) + + indexMappingBefore := mapping.NewIndexMapping() + indexMappingBefore.TypeMapping = map[string]*mapping.DocumentMapping{} + indexMappingBefore.DefaultMapping = &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: true, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + } + indexMappingBefore.IndexDynamic = false + indexMappingBefore.StoreDynamic = false + indexMappingBefore.DocValuesDynamic = false + + index, err := New(tmpIndexPath, indexMappingBefore) + if err != nil { + b.Fatal(err) + } + + numDocsPerBatch := 1000 + numBatches := 5 + + var batch *Batch + doc := make(map[string]interface{}) + const letters = "abcdefghijklmnopqrstuvwxyz" + + randStr := func() string { + result := make([]byte, 3) + for i := 0; i < 3; i++ { + result[i] = letters[rand.Intn(len(letters))] + } + return string(result) + } + for i := 0; i < numBatches; i++ { + batch = index.NewBatch() + for j := 0; j < numDocsPerBatch; j++ { + doc["a"] = randStr() + err = batch.Index(fmt.Sprintf("%d", i*numDocsPerBatch+j), doc) + if err != nil { + b.Fatal(err) + } + } + err = index.Batch(batch) + if err != nil { + b.Fatal(err) + } + } + + err = index.Close() + if err != nil { + b.Fatal(err) + } + + indexMappingAfter := mapping.NewIndexMapping() + indexMappingAfter.TypeMapping = map[string]*mapping.DocumentMapping{} + indexMappingAfter.DefaultMapping = &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "text", + Index: true, + Store: false, + }, + }, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + }, + }, + Fields: []*mapping.FieldMapping{}, + DefaultAnalyzer: "standard", + DefaultSynonymSource: "", + } + indexMappingAfter.IndexDynamic = false + indexMappingAfter.StoreDynamic = false + indexMappingAfter.DocValuesDynamic = false + + mappingString, err := json.Marshal(indexMappingAfter) + if err != nil { + b.Fatal(err) + } + index, err = Update(tmpIndexPath, string(mappingString)) + if err != nil { + b.Fatal(err) + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + q := NewQueryStringQuery("a:*") + req := NewSearchRequest(q) + if _, err = index.Search(req); err != nil { + b.Fatal(err) + } + } +} From 0410448f185822b418bad7544006ec1dc0ee3081 Mon Sep 17 00:00:00 2001 From: Likith B Date: Fri, 7 Feb 2025 15:25:13 +0530 Subject: [PATCH 06/25] MB-57888: Naming changes --- index/scorch/optimize_knn.go | 2 +- index/scorch/scorch.go | 2 +- index/scorch/snapshot_index.go | 8 ++++---- index/scorch/snapshot_segment.go | 2 +- index_update.go | 10 +++++----- index_update_test.go | 4 ++-- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/index/scorch/optimize_knn.go b/index/scorch/optimize_knn.go index 506c3893a..7fda843d4 100644 --- a/index/scorch/optimize_knn.go +++ b/index/scorch/optimize_knn.go @@ -79,7 +79,7 @@ func (o *OptimizeVR) Finish() error { wg.Done() }() for field, vrs := range o.vrs { - if info, ok := o.snapshot.updatedFields[field]; ok && info.RemoveAll || info.Index { + if info, ok := o.snapshot.updatedFields[field]; ok && info.Deleted || info.Index { continue } diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 80b9a874c..aacabf4af 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -1005,7 +1005,7 @@ func (s *Scorch) updateBolt(fieldInfo map[string]index.UpdateFieldInfo, mappingB for field, info := range fieldInfo { if val, ok := updatedFields[field]; ok { updatedFields[field] = index.UpdateFieldInfo{ - RemoveAll: info.RemoveAll || val.RemoveAll, + Deleted: info.Deleted || val.Deleted, Store: info.Store || val.Store, DocValues: info.DocValues || val.DocValues, Index: info.Index || val.Index, diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 25432c4e6..3adaab802 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -513,7 +513,7 @@ func (is *IndexSnapshot) Document(id string) (rv index.Document, err error) { // Skip fields that are supposed to have deleted store values if info, ok := is.updatedFields[name]; ok && - (info.RemoveAll || info.Store) { + (info.Deleted || info.Store) { return true } @@ -648,7 +648,7 @@ func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field // Skip fields that are supposed to have no indexing if info, ok := is.updatedFields[field]; ok && - (info.Index || info.RemoveAll) { + (info.Index || info.Deleted) { dict, err = s.segment.Dictionary("") } else { dict, err = s.segment.Dictionary(field) @@ -806,7 +806,7 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( var filteredFields []string for _, field := range vFields { if info, ok := is.updatedFields[field]; ok && - (info.DocValues || info.RemoveAll) { + (info.DocValues || info.Deleted) { continue } else { filteredFields = append(filteredFields, field) @@ -1199,7 +1199,7 @@ func (is *IndexSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.Update } else { for fieldName, info := range updatedFields { if val, ok := is.updatedFields[fieldName]; ok { - val.RemoveAll = val.RemoveAll || info.RemoveAll + val.Deleted = val.Deleted || info.Deleted val.Index = val.Index || info.Index val.DocValues = val.DocValues || info.DocValues val.Store = val.Store || info.Store diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go index 4154ad540..a4bd3deb5 100644 --- a/index/scorch/snapshot_segment.go +++ b/index/scorch/snapshot_segment.go @@ -154,7 +154,7 @@ func (s *SegmentSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.Updat } else { for fieldName, info := range updatedFields { if val, ok := s.updatedFields[fieldName]; ok { - val.RemoveAll = val.RemoveAll || info.RemoveAll + val.Deleted = val.Deleted || info.Deleted val.Index = val.Index || info.Index val.DocValues = val.DocValues || info.DocValues val.Store = val.Store || info.Store diff --git a/index_update.go b/index_update.go index 84f1106b4..769239b8e 100644 --- a/index_update.go +++ b/index_update.go @@ -101,10 +101,10 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.Update // Remove entries from the list with no changes between the // original and the updated mapping for name, info := range fieldInfo { - if !info.RemoveAll && !info.Index && !info.DocValues && !info.Store { + if !info.Deleted && !info.Index && !info.DocValues && !info.Store { delete(fieldInfo, name) } - if info.RemoveAll { + if info.Deleted { if upd.IndexDynamic { return nil, fmt.Errorf("Mapping cannot be removed when index dynamic is true") } @@ -322,7 +322,7 @@ func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges if updated == nil { if original != nil && !original.IncludeInAll { - rv.RemoveAll = true + rv.Deleted = true return rv, true, nil } else if original == nil { return nil, false, nil @@ -406,7 +406,7 @@ func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges } } - if rv.RemoveAll || rv.Index || rv.Store || rv.DocValues { + if rv.Deleted || rv.Index || rv.Store || rv.DocValues { return rv, true, nil } return rv, false, nil @@ -437,7 +437,7 @@ func validateFieldInfo(newInfo *index.UpdateFieldInfo, updated bool, fInfo map[s } } if oldInfo, ok := fInfo[name]; ok { - if oldInfo.RemoveAll != newInfo.RemoveAll || oldInfo.Index != newInfo.Index || + if oldInfo.Deleted != newInfo.Deleted || oldInfo.Index != newInfo.Index || oldInfo.DocValues != newInfo.DocValues || oldInfo.Store != newInfo.Store { return fmt.Errorf("updated field impossible to verify because multiple mappings point to the same field name") } diff --git a/index_update_test.go b/index_update_test.go index 9e19b0484..1eb645747 100644 --- a/index_update_test.go +++ b/index_update_test.go @@ -55,7 +55,7 @@ func TestCompareFieldMapping(t *testing.T) { synonymSource: false, }, indexFieldInfo: &index.UpdateFieldInfo{ - RemoveAll: true, + Deleted: true, }, changed: true, err: false, @@ -1913,7 +1913,7 @@ func TestDeletedFields(t *testing.T) { DocValues: true, }, "d": { - RemoveAll: true, + Deleted: true, }, }, err: false, From 4c5dd0be238d4a3aea069693259cb0430de0514f Mon Sep 17 00:00:00 2001 From: Likith B Date: Wed, 19 Feb 2025 18:53:04 +0530 Subject: [PATCH 07/25] MB-57888: Added analyser and datetime parser checks - Test case coverage for the same - Better loading and storing from bolt --- index/scorch/persister.go | 3 +- index/scorch/snapshot_index.go | 12 +- index_update.go | 155 ++++++++++-- index_update_test.go | 436 ++++++++++++++++++++++++++------- 4 files changed, 496 insertions(+), 110 deletions(-) diff --git a/index/scorch/persister.go b/index/scorch/persister.go index 886cea15f..d65bcf4ff 100644 --- a/index/scorch/persister.go +++ b/index/scorch/persister.go @@ -1004,7 +1004,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { rv.segment = append(rv.segment, segmentSnapshot) rv.offsets = append(rv.offsets, running) if segmentSnapshot.updatedFields != nil { - rv.updatedFields = segmentSnapshot.updatedFields + rv.MergeUpdateFieldsInfo(segmentSnapshot.updatedFields) } running += segmentSnapshot.segment.Count() } @@ -1066,6 +1066,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro for field, info := range updatedFields { rv.updatedFields[field] = &info } + rv.UpdateFieldsInfo(rv.updatedFields) } return rv, nil diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 3adaab802..74e48eb14 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -1193,7 +1193,14 @@ func (is *IndexSnapshot) UpdateSynonymSearchCount(delta uint64) { } func (is *IndexSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) { + is.MergeUpdateFieldsInfo(updatedFields) + for _, segmentSnapshot := range is.segment { + segmentSnapshot.UpdateFieldsInfo(is.updatedFields) + } +} + +func (is *IndexSnapshot) MergeUpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) { if is.updatedFields == nil { is.updatedFields = updatedFields } else { @@ -1208,9 +1215,4 @@ func (is *IndexSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.Update } } } - - for _, segmentSnapshot := range is.segment { - segmentSnapshot.UpdateFieldsInfo(updatedFields) - } - } diff --git a/index_update.go b/index_update.go index 769239b8e..1a56d2432 100644 --- a/index_update.go +++ b/index_update.go @@ -16,7 +16,9 @@ package bleve import ( "fmt" + "reflect" + "github.com/blevesearch/bleve/v2/analysis" "github.com/blevesearch/bleve/v2/mapping" index "github.com/blevesearch/bleve_index_api" ) @@ -33,16 +35,16 @@ type pathInfo struct { // Store the field information with respect to the // document paths type fieldMapInfo struct { - fieldMapping *mapping.FieldMapping - rootName string - parent *pathInfo + fieldMapping *mapping.FieldMapping + analyzer string + datetimeParser string + rootName string + parent *pathInfo } // Store all of the changes to defaults type defaultInfo struct { - analyzer bool - dateTimeParser bool - synonymSource bool + synonymSource bool } // Compare two index mappings to identify all of the updatable changes @@ -87,6 +89,18 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.Update } addPathInfo(updPaths, "", upd.DefaultMapping, ori, nil, "") + // Compare all analysers currently in use + err = compareAnalysers(oriPaths, updPaths, ori, upd) + if err != nil { + return nil, err + } + + // Compare all datetime parsers currently in use + err = compareDateTimeParsers(oriPaths, updPaths, ori, upd) + if err != nil { + return nil, err + } + // Compare both the mappings based on the document paths // and create a list of index, docvalues, store differences // for every single field possible @@ -131,14 +145,6 @@ func compareMappings(ori, upd *mapping.IndexMappingImpl) (*defaultInfo, error) { return nil, fmt.Errorf("default type cannot be changed") } - if ori.DefaultAnalyzer != upd.DefaultAnalyzer { - rv.analyzer = true - } - - if ori.DefaultDateTimeParser != upd.DefaultDateTimeParser { - rv.dateTimeParser = true - } - if ori.DefaultSynonymSource != upd.DefaultSynonymSource { rv.synonymSource = true } @@ -260,6 +266,107 @@ func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMa paths[name] = pInfo } +func compareAnalysers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error { + + oriAnalyzers := make(map[string]interface{}) + updAnalyzers := make(map[string]interface{}) + oriCustomAnalysers := ori.CustomAnalysis.Analyzers + updCustomAnalysers := upd.CustomAnalysis.Analyzers + + for path, info := range oriPaths { + if len(info.fieldMapInfo) == 0 { + continue + } + for _, fInfo := range info.fieldMapInfo { + if fInfo.fieldMapping.Type == "text" { + analyzerName := ori.AnalyzerNameForPath(path) + fInfo.analyzer = analyzerName + if val, ok := oriCustomAnalysers[analyzerName]; ok { + oriAnalyzers[analyzerName] = val + } + } + } + } + + for path, info := range updPaths { + if len(info.fieldMapInfo) == 0 { + continue + } + for _, fInfo := range info.fieldMapInfo { + if fInfo.fieldMapping.Type == "text" { + analyzerName := upd.AnalyzerNameForPath(path) + fInfo.analyzer = analyzerName + if val, ok := updCustomAnalysers[analyzerName]; ok { + updAnalyzers[analyzerName] = val + } + } + } + } + + for name, anUpd := range updAnalyzers { + if anOri, ok := oriAnalyzers[name]; ok { + if !reflect.DeepEqual(anUpd, anOri) { + return fmt.Errorf("analyser %s changed while being used by fields", name) + } + } else { + return fmt.Errorf("analyser %s newly added to an existing field", name) + } + } + + return nil +} + +func compareDateTimeParsers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error { + + oriDateTimeParsers := make(map[string]analysis.DateTimeParser) + updDateTimeParsers := make(map[string]analysis.DateTimeParser) + + for _, info := range oriPaths { + if len(info.fieldMapInfo) == 0 { + continue + } + for _, fInfo := range info.fieldMapInfo { + if fInfo.fieldMapping.Type == "datetime" { + if fInfo.fieldMapping.DateFormat == "" { + fInfo.datetimeParser = ori.DefaultDateTimeParser + oriDateTimeParsers[ori.DefaultDateTimeParser] = ori.DateTimeParserNamed(ori.DefaultDateTimeParser) + } else { + oriDateTimeParsers[fInfo.fieldMapping.DateFormat] = ori.DateTimeParserNamed(fInfo.fieldMapping.DateFormat) + } + } + } + } + + for _, info := range updPaths { + if len(info.fieldMapInfo) == 0 { + continue + } + for _, fInfo := range info.fieldMapInfo { + if fInfo.fieldMapping.Type == "datetime" { + if fInfo.fieldMapping.DateFormat == "" { + fInfo.datetimeParser = upd.DefaultDateTimeParser + updDateTimeParsers[upd.DefaultDateTimeParser] = upd.DateTimeParserNamed(upd.DefaultDateTimeParser) + } else { + fInfo.datetimeParser = fInfo.fieldMapping.DateFormat + updDateTimeParsers[fInfo.fieldMapping.DateFormat] = upd.DateTimeParserNamed(fInfo.fieldMapping.DateFormat) + } + } + } + } + + for name, dtUpd := range updDateTimeParsers { + if dtOri, ok := oriDateTimeParsers[name]; ok { + if !reflect.DeepEqual(dtUpd, dtOri) { + return fmt.Errorf("datetime parser %s changed while being used by fields", name) + } + } else { + return fmt.Errorf("datetime parser %s added to an existing field", name) + } + } + + return nil +} + // Compare all of the fields at a particular document path and add its field information func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo, defaultChanges *defaultInfo) error { @@ -283,14 +390,28 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo, d } else { for _, oriFMapInfo := range ori.fieldMapInfo { var updFMap *mapping.FieldMapping + var updAnalyser string + var updDatetimeParser string + // For multiple fields at a single document path, compare // only with the matching ones for _, updFMapInfo := range upd.fieldMapInfo { if oriFMapInfo.rootName == updFMapInfo.rootName && oriFMapInfo.fieldMapping.Name == updFMapInfo.fieldMapping.Name { updFMap = updFMapInfo.fieldMapping + if updFMap.Type == "text" { + updAnalyser = updFMapInfo.analyzer + } else if updFMap.Type == "datetime" { + updDatetimeParser = updFMapInfo.datetimeParser + } } } + if updAnalyser != "" && oriFMapInfo.analyzer != updAnalyser { + return fmt.Errorf("analyser has been changed for a text field") + } + if updDatetimeParser != "" && oriFMapInfo.datetimeParser != updDatetimeParser { + return fmt.Errorf("datetime parser has been changed for a text field") + } info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, updFMap, defaultChanges) if err != nil { @@ -338,20 +459,16 @@ func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges if original.Type == "text" { if original.SynonymSource != updated.SynonymSource { return nil, false, fmt.Errorf("synonym source cannot be changed for text field") - } else if original.SynonymSource == "inherit" && defaultChanges.synonymSource { + } else if original.SynonymSource == "" && defaultChanges.synonymSource { return nil, false, fmt.Errorf("synonym source cannot be changed for possible inherited text field") } if original.Analyzer != updated.Analyzer { return nil, false, fmt.Errorf("analyzer cannot be updated for text fields") - } else if original.Analyzer == "inherit" && defaultChanges.analyzer { - return nil, false, fmt.Errorf("default analyzer changed for possible inherited text field") } } if original.Type == "datetime" { if original.DateFormat != updated.DateFormat { return nil, false, fmt.Errorf("dateFormat cannot be updated for datetime fields") - } else if original.DateFormat == "inherit" && defaultChanges.dateTimeParser { - return nil, false, fmt.Errorf("default analyzer changed for possible inherited text field") } } if original.Type == "vector" || original.Type == "vector_base64" { diff --git a/index_update_test.go b/index_update_test.go index 1eb645747..9c84e6f73 100644 --- a/index_update_test.go +++ b/index_update_test.go @@ -22,7 +22,14 @@ import ( "reflect" "testing" + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" + "github.com/blevesearch/bleve/v2/analysis/analyzer/simple" + "github.com/blevesearch/bleve/v2/analysis/datetime/percent" + "github.com/blevesearch/bleve/v2/analysis/datetime/sanitized" "github.com/blevesearch/bleve/v2/analysis/lang/en" + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/whitespace" "github.com/blevesearch/bleve/v2/index/scorch" "github.com/blevesearch/bleve/v2/index/scorch/mergeplan" "github.com/blevesearch/bleve/v2/mapping" @@ -50,9 +57,7 @@ func TestCompareFieldMapping(t *testing.T) { original: &mapping.FieldMapping{}, updated: nil, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: &index.UpdateFieldInfo{ Deleted: true, @@ -68,9 +73,7 @@ func TestCompareFieldMapping(t *testing.T) { Type: "datetime", }, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: nil, changed: false, @@ -86,9 +89,7 @@ func TestCompareFieldMapping(t *testing.T) { SynonymSource: "b", }, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: nil, changed: false, @@ -104,45 +105,7 @@ func TestCompareFieldMapping(t *testing.T) { Analyzer: "b", }, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, - }, - indexFieldInfo: nil, - changed: false, - err: true, - }, - { // default analyser changed when inherited => not updatable - original: &mapping.FieldMapping{ - Type: "text", - Analyzer: "inherit", - }, - updated: &mapping.FieldMapping{ - Type: "text", - Analyzer: "inherit", - }, - defaultChanges: &defaultInfo{ - analyzer: true, - dateTimeParser: false, - synonymSource: false, - }, - indexFieldInfo: nil, - changed: false, - err: true, - }, - { // default datetimeparser changed for inherited datetime field => not updatable - original: &mapping.FieldMapping{ - Type: "datetime", - DateFormat: "inherit", - }, - updated: &mapping.FieldMapping{ - Type: "datetime", - DateFormat: "inherit", - }, - defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: true, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: nil, changed: false, @@ -162,9 +125,7 @@ func TestCompareFieldMapping(t *testing.T) { VectorIndexOptimizedFor: "memory-efficient", }, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: nil, changed: false, @@ -184,9 +145,7 @@ func TestCompareFieldMapping(t *testing.T) { VectorIndexOptimizedFor: "memory-efficient", }, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: nil, changed: false, @@ -206,9 +165,7 @@ func TestCompareFieldMapping(t *testing.T) { VectorIndexOptimizedFor: "latency", }, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: nil, changed: false, @@ -224,9 +181,7 @@ func TestCompareFieldMapping(t *testing.T) { IncludeInAll: false, }, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: nil, changed: false, @@ -242,9 +197,7 @@ func TestCompareFieldMapping(t *testing.T) { IncludeTermVectors: true, }, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: nil, changed: false, @@ -260,9 +213,7 @@ func TestCompareFieldMapping(t *testing.T) { SkipFreqNorm: false, }, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: nil, changed: false, @@ -278,9 +229,7 @@ func TestCompareFieldMapping(t *testing.T) { Index: false, }, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: &index.UpdateFieldInfo{ Index: true, @@ -299,9 +248,7 @@ func TestCompareFieldMapping(t *testing.T) { DocValues: false, }, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: &index.UpdateFieldInfo{ DocValues: true, @@ -343,9 +290,7 @@ func TestCompareFieldMapping(t *testing.T) { SynonymSource: "b", }, defaultChanges: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, indexFieldInfo: &index.UpdateFieldInfo{}, changed: false, @@ -411,9 +356,7 @@ func TestCompareMappings(t *testing.T) { DefaultAnalyzer: "b", }, info: &defaultInfo{ - analyzer: true, - dateTimeParser: false, - synonymSource: false, + synonymSource: false, }, err: false, }, @@ -425,9 +368,7 @@ func TestCompareMappings(t *testing.T) { DefaultDateTimeParser: "b", }, info: &defaultInfo{ - analyzer: false, - dateTimeParser: true, - synonymSource: false, + synonymSource: false, }, err: false, }, @@ -439,9 +380,7 @@ func TestCompareMappings(t *testing.T) { DefaultSynonymSource: "b", }, info: &defaultInfo{ - analyzer: false, - dateTimeParser: false, - synonymSource: true, + synonymSource: true, }, err: false, }, @@ -499,6 +438,314 @@ func TestCompareMappings(t *testing.T) { } } +func TestCompareAnalysers(t *testing.T) { + + ori := mapping.NewIndexMapping() + ori.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping()) + ori.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping()) + ori.DefaultMapping.AddFieldMappingsAt("c", NewTextFieldMapping()) + ori.DefaultMapping.Properties["b"].DefaultAnalyzer = "3xbla" + ori.DefaultMapping.Properties["c"].DefaultAnalyzer = simple.Name + + upd := mapping.NewIndexMapping() + upd.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping()) + upd.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping()) + upd.DefaultMapping.AddFieldMappingsAt("c", NewTextFieldMapping()) + upd.DefaultMapping.Properties["b"].DefaultAnalyzer = "3xbla" + upd.DefaultMapping.Properties["c"].DefaultAnalyzer = simple.Name + + if err := ori.AddCustomAnalyzer("3xbla", map[string]interface{}{ + "type": custom.Name, + "tokenizer": whitespace.Name, + "token_filters": []interface{}{lowercase.Name, "stop_en"}, + }); err != nil { + t.Fatal(err) + } + + if err := upd.AddCustomAnalyzer("3xbla", map[string]interface{}{ + "type": custom.Name, + "tokenizer": whitespace.Name, + "token_filters": []interface{}{lowercase.Name, "stop_en"}, + }); err != nil { + t.Fatal(err) + } + + oriPaths := map[string]*pathInfo{ + "a": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "text", + }, + }, + }, + dynamic: false, + path: "a", + parentPath: "", + }, + "b": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "text", + }, + }, + }, + dynamic: false, + path: "b", + parentPath: "", + }, + "c": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "text", + }, + }, + }, + dynamic: false, + path: "c", + parentPath: "", + }, + } + + updPaths := map[string]*pathInfo{ + "a": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "text", + }, + }, + }, + dynamic: false, + path: "a", + parentPath: "", + }, + "b": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "text", + }, + }, + }, + dynamic: false, + path: "b", + parentPath: "", + }, + "c": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "text", + }, + }, + }, + dynamic: false, + path: "c", + parentPath: "", + }, + } + + // Test case has identical analysers for text fields + err := compareAnalysers(oriPaths, updPaths, ori, upd) + if err != nil { + t.Errorf("Expected error to be nil, got %v", err) + } + + ori2 := mapping.NewIndexMapping() + ori2.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping()) + ori2.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping()) + ori2.DefaultMapping.AddFieldMappingsAt("c", NewTextFieldMapping()) + ori2.DefaultMapping.Properties["b"].DefaultAnalyzer = "3xbla" + ori2.DefaultMapping.Properties["c"].DefaultAnalyzer = simple.Name + + upd2 := mapping.NewIndexMapping() + upd2.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping()) + upd2.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping()) + upd2.DefaultMapping.AddFieldMappingsAt("c", NewTextFieldMapping()) + upd2.DefaultMapping.Properties["b"].DefaultAnalyzer = "3xbla" + upd2.DefaultMapping.Properties["c"].DefaultAnalyzer = simple.Name + + if err := ori2.AddCustomAnalyzer("3xbla", map[string]interface{}{ + "type": custom.Name, + "tokenizer": whitespace.Name, + "token_filters": []interface{}{lowercase.Name, "stop_en"}, + }); err != nil { + t.Fatal(err) + } + + if err := upd2.AddCustomAnalyzer("3xbla", map[string]interface{}{ + "type": custom.Name, + "tokenizer": letter.Name, + "token_filters": []interface{}{lowercase.Name, "stop_en"}, + }); err != nil { + t.Fatal(err) + } + + // Test case has different custom analyser for field "b" + err = compareAnalysers(oriPaths, updPaths, ori2, upd2) + if err == nil { + t.Errorf("Expected error, got nil") + } +} + +func TestCompareDatetimeParsers(t *testing.T) { + + ori := mapping.NewIndexMapping() + ori.DefaultMapping.AddFieldMappingsAt("a", NewDateTimeFieldMapping()) + ori.DefaultMapping.AddFieldMappingsAt("b", NewDateTimeFieldMapping()) + ori.DefaultMapping.AddFieldMappingsAt("c", NewDateTimeFieldMapping()) + ori.DefaultMapping.Properties["b"].Fields[0].DateFormat = "customDT" + ori.DefaultMapping.Properties["c"].Fields[0].DateFormat = percent.Name + + upd := mapping.NewIndexMapping() + upd.DefaultMapping.AddFieldMappingsAt("a", NewDateTimeFieldMapping()) + upd.DefaultMapping.AddFieldMappingsAt("b", NewDateTimeFieldMapping()) + upd.DefaultMapping.AddFieldMappingsAt("c", NewDateTimeFieldMapping()) + upd.DefaultMapping.Properties["b"].Fields[0].DateFormat = "customDT" + upd.DefaultMapping.Properties["c"].Fields[0].DateFormat = percent.Name + + err := ori.AddCustomDateTimeParser("customDT", map[string]interface{}{ + "type": sanitized.Name, + "layouts": []interface{}{ + "02/01/2006 15:04:05", + "2006/01/02 3:04PM", + }, + }) + + err = upd.AddCustomDateTimeParser("customDT", map[string]interface{}{ + "type": sanitized.Name, + "layouts": []interface{}{ + "02/01/2006 15:04:05", + "2006/01/02 3:04PM", + }, + }) + + oriPaths := map[string]*pathInfo{ + "a": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "datetime", + }, + }, + }, + dynamic: false, + path: "a", + parentPath: "", + }, + "b": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "datetime", + DateFormat: "customDT", + }, + }, + }, + dynamic: false, + path: "b", + parentPath: "", + }, + "c": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "datetime", + }, + }, + }, + dynamic: false, + path: "c", + parentPath: "", + }, + } + + updPaths := map[string]*pathInfo{ + "a": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "datetime", + }, + }, + }, + dynamic: false, + path: "a", + parentPath: "", + }, + "b": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "datetime", + DateFormat: "customDT", + }, + }, + }, + dynamic: false, + path: "b", + parentPath: "", + }, + "c": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "datetime", + }, + }, + }, + dynamic: false, + path: "c", + parentPath: "", + }, + } + + // Test case has identical datetime parsers for all fields + err = compareDateTimeParsers(oriPaths, updPaths, ori, upd) + if err != nil { + t.Errorf("Expected error to be nil, got %v", err) + } + + ori2 := mapping.NewIndexMapping() + ori2.DefaultMapping.AddFieldMappingsAt("a", NewDateTimeFieldMapping()) + ori2.DefaultMapping.AddFieldMappingsAt("b", NewDateTimeFieldMapping()) + ori2.DefaultMapping.AddFieldMappingsAt("c", NewDateTimeFieldMapping()) + ori2.DefaultMapping.Properties["b"].Fields[0].DateFormat = "customDT" + ori2.DefaultMapping.Properties["c"].Fields[0].DateFormat = percent.Name + + upd2 := mapping.NewIndexMapping() + upd2.DefaultMapping.AddFieldMappingsAt("a", NewDateTimeFieldMapping()) + upd2.DefaultMapping.AddFieldMappingsAt("b", NewDateTimeFieldMapping()) + upd2.DefaultMapping.AddFieldMappingsAt("c", NewDateTimeFieldMapping()) + upd2.DefaultMapping.Properties["b"].Fields[0].DateFormat = "customDT" + upd2.DefaultMapping.Properties["c"].Fields[0].DateFormat = percent.Name + + err = ori2.AddCustomDateTimeParser("customDT", map[string]interface{}{ + "type": sanitized.Name, + "layouts": []interface{}{ + "02/01/2006 15:04:05", + "2006/01/02 3:04PM", + }, + }) + + err = upd2.AddCustomDateTimeParser("customDT", map[string]interface{}{ + "type": sanitized.Name, + "layouts": []interface{}{ + "02/01/2006 15:04:05", + "2006/01/02", + }, + }) + + // test case has different custom datetime parser for field "b" + err = compareDateTimeParsers(oriPaths, updPaths, ori2, upd2) + if err == nil { + t.Errorf("Expected error, got nil") + } +} + func TestDeletedFields(t *testing.T) { tests := []struct { original *mapping.IndexMappingImpl @@ -581,6 +828,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, updated: &mapping.IndexMappingImpl{ TypeMapping: map[string]*mapping.DocumentMapping{ @@ -654,6 +902,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, fieldInfo: map[string]*index.UpdateFieldInfo{}, err: false, @@ -733,6 +982,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, updated: &mapping.IndexMappingImpl{ TypeMapping: map[string]*mapping.DocumentMapping{ @@ -806,6 +1056,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, fieldInfo: map[string]*index.UpdateFieldInfo{}, err: false, @@ -870,6 +1121,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, updated: &mapping.IndexMappingImpl{ TypeMapping: map[string]*mapping.DocumentMapping{ @@ -929,6 +1181,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, fieldInfo: nil, err: true, @@ -962,6 +1215,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, updated: &mapping.IndexMappingImpl{ TypeMapping: map[string]*mapping.DocumentMapping{}, @@ -990,6 +1244,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, fieldInfo: nil, err: true, @@ -1068,6 +1323,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, updated: &mapping.IndexMappingImpl{ TypeMapping: map[string]*mapping.DocumentMapping{ @@ -1119,6 +1375,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: true, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, fieldInfo: nil, err: true, @@ -1198,6 +1455,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, updated: &mapping.IndexMappingImpl{ TypeMapping: map[string]*mapping.DocumentMapping{ @@ -1271,6 +1529,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, fieldInfo: map[string]*index.UpdateFieldInfo{ "b": { @@ -1355,6 +1614,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, updated: &mapping.IndexMappingImpl{ TypeMapping: map[string]*mapping.DocumentMapping{ @@ -1428,6 +1688,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, fieldInfo: map[string]*index.UpdateFieldInfo{ "a": { @@ -1512,6 +1773,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, updated: &mapping.IndexMappingImpl{ TypeMapping: map[string]*mapping.DocumentMapping{ @@ -1585,6 +1847,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, fieldInfo: nil, err: true, @@ -1649,6 +1912,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, updated: &mapping.IndexMappingImpl{ TypeMapping: map[string]*mapping.DocumentMapping{ @@ -1708,6 +1972,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, fieldInfo: map[string]*index.UpdateFieldInfo{ "a": { @@ -1819,6 +2084,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, updated: &mapping.IndexMappingImpl{ TypeMapping: map[string]*mapping.DocumentMapping{ @@ -1900,6 +2166,7 @@ func TestDeletedFields(t *testing.T) { IndexDynamic: false, StoreDynamic: false, DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, }, fieldInfo: map[string]*index.UpdateFieldInfo{ "a": { @@ -2742,7 +3009,6 @@ func TestIndexUpdateMerge(t *testing.T) { if len(res4.Hits) != 0 { t.Fatalf("Expected 0 hits, got %d\n", len(res4.Hits)) } - } func BenchmarkIndexUpdateText(b *testing.B) { From 53eebc8d230f0b65f8307f0ed030bd91b6a59023 Mon Sep 17 00:00:00 2001 From: Likith B Date: Thu, 20 Feb 2025 17:19:15 +0530 Subject: [PATCH 08/25] MB-57888: Additional checks for synonyms --- index/scorch/optimize_knn.go | 2 +- index/scorch/persister.go | 2 + index/scorch/scorch.go | 6 +- index/scorch/snapshot_index.go | 2 + index/scorch/snapshot_segment.go | 1 + index_impl.go | 1 + index_update.go | 136 +++++++++++++----- index_update_test.go | 230 ++++++++++++++++++++++--------- 8 files changed, 270 insertions(+), 110 deletions(-) diff --git a/index/scorch/optimize_knn.go b/index/scorch/optimize_knn.go index 7fda843d4..020300592 100644 --- a/index/scorch/optimize_knn.go +++ b/index/scorch/optimize_knn.go @@ -79,7 +79,7 @@ func (o *OptimizeVR) Finish() error { wg.Done() }() for field, vrs := range o.vrs { - if info, ok := o.snapshot.updatedFields[field]; ok && info.Deleted || info.Index { + if info, ok := o.snapshot.updatedFields[field]; ok && (info.Deleted || info.Index) { continue } diff --git a/index/scorch/persister.go b/index/scorch/persister.go index d65bcf4ff..f5f5509a0 100644 --- a/index/scorch/persister.go +++ b/index/scorch/persister.go @@ -1003,6 +1003,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { } rv.segment = append(rv.segment, segmentSnapshot) rv.offsets = append(rv.offsets, running) + // Merge all segment level updated field info for use during queries if segmentSnapshot.updatedFields != nil { rv.MergeUpdateFieldsInfo(segmentSnapshot.updatedFields) } @@ -1066,6 +1067,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro for field, info := range updatedFields { rv.updatedFields[field] = &info } + // Set the value within the segment base for use during merge rv.UpdateFieldsInfo(rv.updatedFields) } diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index aacabf4af..074f32025 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -17,7 +17,6 @@ package scorch import ( "encoding/json" "fmt" - "log" "os" "path/filepath" "sync" @@ -948,7 +947,7 @@ func (s *Scorch) FireIndexEvent() { // will be merged before persisting. The index mapping is also overwritted both // in bolt as well as the index snapshot func (s *Scorch) UpdateFields(fieldInfo map[string]*index.UpdateFieldInfo, mappingBytes []byte) error { - // Switch from pointer to value to marshal into a json for storage + // Switch from pointer to value so we can marshal into a json for storage updatedFields := make(map[string]index.UpdateFieldInfo) for field, info := range fieldInfo { updatedFields[field] = *info @@ -957,6 +956,7 @@ func (s *Scorch) UpdateFields(fieldInfo map[string]*index.UpdateFieldInfo, mappi if err != nil { return err } + // Pass the update field info to all snapshots and segment bases s.root.m.Lock() s.root.UpdateFieldsInfo(fieldInfo) s.root.m.Unlock() @@ -975,7 +975,7 @@ func (s *Scorch) updateBolt(fieldInfo map[string]index.UpdateFieldInfo, mappingB for k, _ := c.Last(); k != nil; k, _ = c.Prev() { _, _, err := decodeUvarintAscending(k) if err != nil { - log.Printf("unable to parse segment epoch %x, continuing", k) + fmt.Printf("unable to parse segment epoch %x, continuing", k) continue } snapshot := snapshots.Bucket(k) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 74e48eb14..8c198f3be 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -1192,6 +1192,7 @@ func (is *IndexSnapshot) UpdateSynonymSearchCount(delta uint64) { atomic.AddUint64(&is.parent.stats.TotSynonymSearches, delta) } +// Update current snapshot updated field data as well as pass it on to all segments and segment bases func (is *IndexSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) { is.MergeUpdateFieldsInfo(updatedFields) @@ -1200,6 +1201,7 @@ func (is *IndexSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.Update } } +// Merge given updated field information with existing updated field information func (is *IndexSnapshot) MergeUpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) { if is.updatedFields == nil { is.updatedFields = updatedFields diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go index a4bd3deb5..d13db344e 100644 --- a/index/scorch/snapshot_segment.go +++ b/index/scorch/snapshot_segment.go @@ -147,6 +147,7 @@ func (s *SegmentSnapshot) Size() (rv int) { return } +// Merge given updated field information with existing and pass it on to the segment base func (s *SegmentSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) { if s.updatedFields == nil { diff --git a/index_impl.go b/index_impl.go index a94b59980..46da47e70 100644 --- a/index_impl.go +++ b/index_impl.go @@ -332,6 +332,7 @@ func updateIndexUsing(path string, runtimeConfig map[string]interface{}, newPara } // Validate and update the index with the new mapping + // return usable index with error as to why update failed for any error if um != nil { ui, ok := rv.i.(index.UpdateIndex) if !ok { diff --git a/index_update.go b/index_update.go index 1a56d2432..1953de7a5 100644 --- a/index_update.go +++ b/index_update.go @@ -38,20 +38,15 @@ type fieldMapInfo struct { fieldMapping *mapping.FieldMapping analyzer string datetimeParser string + synonymSource string rootName string parent *pathInfo } -// Store all of the changes to defaults -type defaultInfo struct { - synonymSource bool -} - // Compare two index mappings to identify all of the updatable changes func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.UpdateFieldInfo, error) { - var err error - - defaultChanges, err := compareMappings(ori, upd) + // Compare all of the top level fields in an index mapping + err := compareMappings(ori, upd) if err != nil { return nil, err } @@ -89,14 +84,8 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.Update } addPathInfo(updPaths, "", upd.DefaultMapping, ori, nil, "") - // Compare all analysers currently in use - err = compareAnalysers(oriPaths, updPaths, ori, upd) - if err != nil { - return nil, err - } - - // Compare all datetime parsers currently in use - err = compareDateTimeParsers(oriPaths, updPaths, ori, upd) + // Compare all components of custom analysis currently in use + err = compareCustomComponents(oriPaths, updPaths, ori, upd) if err != nil { return nil, err } @@ -106,7 +95,7 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.Update // for every single field possible fieldInfo := make(map[string]*index.UpdateFieldInfo) for path, info := range oriPaths { - err = addFieldInfo(fieldInfo, info, updPaths[path], defaultChanges) + err = addFieldInfo(fieldInfo, info, updPaths[path]) if err != nil { return nil, err } @@ -118,6 +107,7 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.Update if !info.Deleted && !info.Index && !info.DocValues && !info.Store { delete(fieldInfo, name) } + // A field cannot be completely deleted with any dynamic value turned on if info.Deleted { if upd.IndexDynamic { return nil, fmt.Errorf("Mapping cannot be removed when index dynamic is true") @@ -133,39 +123,34 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.Update return fieldInfo, nil } -func compareMappings(ori, upd *mapping.IndexMappingImpl) (*defaultInfo, error) { - rv := &defaultInfo{} - +// Ensures non of the top level index mapping fields have changed +func compareMappings(ori, upd *mapping.IndexMappingImpl) error { if ori.TypeField != upd.TypeField && (len(ori.TypeMapping) != 0 || len(upd.TypeMapping) != 0) { - return nil, fmt.Errorf("type field cannot be changed when type mappings are present") + return fmt.Errorf("type field cannot be changed when type mappings are present") } if ori.DefaultType != upd.DefaultType { - return nil, fmt.Errorf("default type cannot be changed") - } - - if ori.DefaultSynonymSource != upd.DefaultSynonymSource { - rv.synonymSource = true + return fmt.Errorf("default type cannot be changed") } if ori.DefaultField != upd.DefaultField { - return nil, fmt.Errorf("default field cannot be changed") + return fmt.Errorf("default field cannot be changed") } if ori.IndexDynamic != upd.IndexDynamic { - return nil, fmt.Errorf("index dynamic cannot be changed") + return fmt.Errorf("index dynamic cannot be changed") } if ori.StoreDynamic != upd.StoreDynamic { - return nil, fmt.Errorf(("store dynamic cannot be changed")) + return fmt.Errorf(("store dynamic cannot be changed")) } if ori.DocValuesDynamic != upd.DocValuesDynamic { - return nil, fmt.Errorf(("docvalues dynamic cannot be changed")) + return fmt.Errorf(("docvalues dynamic cannot be changed")) } - return rv, nil + return nil } // Ensures updated document mapping does not contain new @@ -266,6 +251,32 @@ func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMa paths[name] = pInfo } +// Compares all of the custom analysis components in use +func compareCustomComponents(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error { + // Compare all analysers currently in use + err := compareAnalysers(oriPaths, updPaths, ori, upd) + if err != nil { + return err + } + + // Compare all datetime parsers currently in use + err = compareDateTimeParsers(oriPaths, updPaths, ori, upd) + if err != nil { + return err + } + + // Compare all synonum sources + err = compareSynonymSources(oriPaths, updPaths, ori, upd) + if err != nil { + return err + } + + return nil +} + +// Compares all analysers currently in use +// Standard analysers not in custom analysis are not compared +// Analysers in custom analysis but not in use are not compared func compareAnalysers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error { oriAnalyzers := make(map[string]interface{}) @@ -316,6 +327,8 @@ func compareAnalysers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping return nil } +// Compares all date time parsers currently in use +// Date time parsers in custom analysis but not in use are not compared func compareDateTimeParsers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error { oriDateTimeParsers := make(map[string]analysis.DateTimeParser) @@ -367,8 +380,48 @@ func compareDateTimeParsers(oriPaths, updPaths map[string]*pathInfo, ori, upd *m return nil } +// Compares all synonym sources +// Synonym sources currently not in use are also compared +func compareSynonymSources(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error { + + oriSynonymSources := make(map[string]analysis.SynonymSource) + updSynonymSources := make(map[string]analysis.SynonymSource) + + for path, info := range oriPaths { + if len(info.fieldMapInfo) == 0 { + continue + } + for _, fInfo := range info.fieldMapInfo { + if fInfo.fieldMapping.Type == "text" { + synonymSourceName := ori.SynonymSourceForPath(path) + fInfo.synonymSource = synonymSourceName + oriSynonymSources[synonymSourceName] = ori.SynonymSourceNamed(synonymSourceName) + } + } + } + + for path, info := range updPaths { + if len(info.fieldMapInfo) == 0 { + continue + } + for _, fInfo := range info.fieldMapInfo { + if fInfo.fieldMapping.Type == "text" { + synonymSourceName := upd.SynonymSourceForPath(path) + fInfo.synonymSource = synonymSourceName + updSynonymSources[synonymSourceName] = upd.SynonymSourceNamed(synonymSourceName) + } + } + } + + if !reflect.DeepEqual(ori.CustomAnalysis.SynonymSources, upd.CustomAnalysis.SynonymSources) { + return fmt.Errorf("synonym sources cannot be changed") + } + + return nil +} + // Compare all of the fields at a particular document path and add its field information -func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo, defaultChanges *defaultInfo) error { +func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) error { var info *index.UpdateFieldInfo var updated bool @@ -378,7 +431,7 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo, d // or upd having mappings not in orihave already been done before this stage if upd == nil { for _, oriFMapInfo := range ori.fieldMapInfo { - info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, nil, defaultChanges) + info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, nil) if err != nil { return err } @@ -392,6 +445,7 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo, d var updFMap *mapping.FieldMapping var updAnalyser string var updDatetimeParser string + var updSynonymSource string // For multiple fields at a single document path, compare // only with the matching ones @@ -401,22 +455,30 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo, d updFMap = updFMapInfo.fieldMapping if updFMap.Type == "text" { updAnalyser = updFMapInfo.analyzer + updSynonymSource = updFMapInfo.synonymSource } else if updFMap.Type == "datetime" { updDatetimeParser = updFMapInfo.datetimeParser } } } + // Compare analyser, datetime parser and synonym source before comparing + // the field mapping as it might not have this information if updAnalyser != "" && oriFMapInfo.analyzer != updAnalyser { return fmt.Errorf("analyser has been changed for a text field") } if updDatetimeParser != "" && oriFMapInfo.datetimeParser != updDatetimeParser { return fmt.Errorf("datetime parser has been changed for a text field") } - - info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, updFMap, defaultChanges) + if updSynonymSource != "" && oriFMapInfo.synonymSource != updSynonymSource { + return fmt.Errorf("synonym source has been changed for a text field") + } + info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, updFMap) if err != nil { return err } + + // Validate to ensure change is possible + // Needed if multiple mappings are aliased to the same field err = validateFieldInfo(info, updated, fInfo, ori, oriFMapInfo) if err != nil { return err @@ -437,7 +499,7 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo, d // second return argument gives a flag indicating whether any changes, if detected, are doable or if // update is impossible // third argument is an error explaining exactly why the change is not possible -func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges *defaultInfo) (*index.UpdateFieldInfo, bool, error) { +func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.UpdateFieldInfo, bool, error) { rv := &index.UpdateFieldInfo{} @@ -459,8 +521,6 @@ func compareFieldMapping(original, updated *mapping.FieldMapping, defaultChanges if original.Type == "text" { if original.SynonymSource != updated.SynonymSource { return nil, false, fmt.Errorf("synonym source cannot be changed for text field") - } else if original.SynonymSource == "" && defaultChanges.synonymSource { - return nil, false, fmt.Errorf("synonym source cannot be changed for possible inherited text field") } if original.Analyzer != updated.Analyzer { return nil, false, fmt.Errorf("analyzer cannot be updated for text fields") diff --git a/index_update_test.go b/index_update_test.go index 9c84e6f73..0377f217d 100644 --- a/index_update_test.go +++ b/index_update_test.go @@ -24,6 +24,7 @@ import ( "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" "github.com/blevesearch/bleve/v2/analysis/analyzer/simple" + "github.com/blevesearch/bleve/v2/analysis/analyzer/standard" "github.com/blevesearch/bleve/v2/analysis/datetime/percent" "github.com/blevesearch/bleve/v2/analysis/datetime/sanitized" "github.com/blevesearch/bleve/v2/analysis/lang/en" @@ -40,7 +41,6 @@ func TestCompareFieldMapping(t *testing.T) { tests := []struct { original *mapping.FieldMapping updated *mapping.FieldMapping - defaultChanges *defaultInfo indexFieldInfo *index.UpdateFieldInfo changed bool err bool @@ -48,7 +48,6 @@ func TestCompareFieldMapping(t *testing.T) { { // both nil => no op original: nil, updated: nil, - defaultChanges: nil, indexFieldInfo: nil, changed: false, err: false, @@ -56,9 +55,6 @@ func TestCompareFieldMapping(t *testing.T) { { // updated nil => delete all original: &mapping.FieldMapping{}, updated: nil, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: &index.UpdateFieldInfo{ Deleted: true, }, @@ -72,9 +68,6 @@ func TestCompareFieldMapping(t *testing.T) { updated: &mapping.FieldMapping{ Type: "datetime", }, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: nil, changed: false, err: true, @@ -88,9 +81,6 @@ func TestCompareFieldMapping(t *testing.T) { Type: "text", SynonymSource: "b", }, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: nil, changed: false, err: true, @@ -104,9 +94,6 @@ func TestCompareFieldMapping(t *testing.T) { Type: "text", Analyzer: "b", }, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: nil, changed: false, err: true, @@ -124,9 +111,6 @@ func TestCompareFieldMapping(t *testing.T) { Similarity: "l2_norm", VectorIndexOptimizedFor: "memory-efficient", }, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: nil, changed: false, err: true, @@ -144,9 +128,6 @@ func TestCompareFieldMapping(t *testing.T) { Dims: 128, VectorIndexOptimizedFor: "memory-efficient", }, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: nil, changed: false, err: true, @@ -164,9 +145,6 @@ func TestCompareFieldMapping(t *testing.T) { Dims: 128, VectorIndexOptimizedFor: "latency", }, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: nil, changed: false, err: true, @@ -180,9 +158,6 @@ func TestCompareFieldMapping(t *testing.T) { Type: "numeric", IncludeInAll: false, }, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: nil, changed: false, err: true, @@ -196,9 +171,6 @@ func TestCompareFieldMapping(t *testing.T) { Type: "numeric", IncludeTermVectors: true, }, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: nil, changed: false, err: true, @@ -212,9 +184,6 @@ func TestCompareFieldMapping(t *testing.T) { Type: "numeric", SkipFreqNorm: false, }, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: nil, changed: false, err: true, @@ -228,9 +197,6 @@ func TestCompareFieldMapping(t *testing.T) { Type: "geopoint", Index: false, }, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: &index.UpdateFieldInfo{ Index: true, DocValues: true, @@ -247,9 +213,6 @@ func TestCompareFieldMapping(t *testing.T) { Type: "numeric", DocValues: false, }, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: &index.UpdateFieldInfo{ DocValues: true, }, @@ -289,9 +252,6 @@ func TestCompareFieldMapping(t *testing.T) { VectorIndexOptimizedFor: "latency", SynonymSource: "b", }, - defaultChanges: &defaultInfo{ - synonymSource: false, - }, indexFieldInfo: &index.UpdateFieldInfo{}, changed: false, err: false, @@ -299,7 +259,7 @@ func TestCompareFieldMapping(t *testing.T) { } for i, test := range tests { - rv, changed, err := compareFieldMapping(test.original, test.updated, test.defaultChanges) + rv, changed, err := compareFieldMapping(test.original, test.updated) if err == nil && test.err || err != nil && !test.err { t.Errorf("Unexpected error value for test %d, expecting %t, got %v\n", i, test.err, err) @@ -317,7 +277,6 @@ func TestCompareMappings(t *testing.T) { tests := []struct { original *mapping.IndexMappingImpl updated *mapping.IndexMappingImpl - info *defaultInfo err bool }{ { // changed type field when non empty mappings are present => error @@ -335,8 +294,7 @@ func TestCompareMappings(t *testing.T) { "b": {}, }, }, - info: nil, - err: true, + err: true, }, { // changed default type => error original: &mapping.IndexMappingImpl{ @@ -345,8 +303,7 @@ func TestCompareMappings(t *testing.T) { updated: &mapping.IndexMappingImpl{ DefaultType: "b", }, - info: nil, - err: true, + err: true, }, { // changed default analyzer => analyser true original: &mapping.IndexMappingImpl{ @@ -355,9 +312,6 @@ func TestCompareMappings(t *testing.T) { updated: &mapping.IndexMappingImpl{ DefaultAnalyzer: "b", }, - info: &defaultInfo{ - synonymSource: false, - }, err: false, }, { // changed default datetimeparser => datetimeparser true @@ -367,9 +321,6 @@ func TestCompareMappings(t *testing.T) { updated: &mapping.IndexMappingImpl{ DefaultDateTimeParser: "b", }, - info: &defaultInfo{ - synonymSource: false, - }, err: false, }, { // changed default synonym source => synonym source true @@ -379,9 +330,6 @@ func TestCompareMappings(t *testing.T) { updated: &mapping.IndexMappingImpl{ DefaultSynonymSource: "b", }, - info: &defaultInfo{ - synonymSource: true, - }, err: false, }, { // changed default field => error @@ -391,8 +339,7 @@ func TestCompareMappings(t *testing.T) { updated: &mapping.IndexMappingImpl{ DefaultField: "b", }, - info: nil, - err: true, + err: true, }, { // changed index dynamic => error original: &mapping.IndexMappingImpl{ @@ -401,8 +348,7 @@ func TestCompareMappings(t *testing.T) { updated: &mapping.IndexMappingImpl{ IndexDynamic: false, }, - info: nil, - err: true, + err: true, }, { // changed store dynamic => error original: &mapping.IndexMappingImpl{ @@ -411,8 +357,7 @@ func TestCompareMappings(t *testing.T) { updated: &mapping.IndexMappingImpl{ StoreDynamic: true, }, - info: nil, - err: true, + err: true, }, { // changed docvalues dynamic => error original: &mapping.IndexMappingImpl{ @@ -421,20 +366,16 @@ func TestCompareMappings(t *testing.T) { updated: &mapping.IndexMappingImpl{ DocValuesDynamic: false, }, - info: nil, - err: true, + err: true, }, } for i, test := range tests { - info, err := compareMappings(test.original, test.updated) + err := compareMappings(test.original, test.updated) if err == nil && test.err || err != nil && !test.err { t.Errorf("Unexpected error value for test %d, expecting %t, got %v\n", i, test.err, err) } - if info == nil && test.info != nil || info != nil && test.info == nil || !reflect.DeepEqual(info, test.info) { - t.Errorf("Unexpected default info value for test %d, expecting %+v, got %+v, err %v", i, test.info, info, err) - } } } @@ -746,6 +687,159 @@ func TestCompareDatetimeParsers(t *testing.T) { } } +func TestCompareSynonymSources(t *testing.T) { + + ori := mapping.NewIndexMapping() + ori.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping()) + ori.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping()) + ori.DefaultMapping.DefaultSynonymSource = "syn1" + ori.DefaultMapping.Properties["b"].Fields[0].SynonymSource = "syn2" + + upd := mapping.NewIndexMapping() + upd.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping()) + upd.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping()) + upd.DefaultMapping.DefaultSynonymSource = "syn1" + upd.DefaultMapping.Properties["b"].Fields[0].SynonymSource = "syn2" + + err := ori.AddSynonymSource("syn1", map[string]interface{}{ + "collection": "col1", + "analyzer": simple.Name, + }) + if err != nil { + t.Fatal(err) + } + err = ori.AddSynonymSource("syn2", map[string]interface{}{ + "collection": "col2", + "analyzer": standard.Name, + }) + if err != nil { + t.Fatal(err) + } + + err = upd.AddSynonymSource("syn1", map[string]interface{}{ + "collection": "col1", + "analyzer": simple.Name, + }) + if err != nil { + t.Fatal(err) + } + err = upd.AddSynonymSource("syn2", map[string]interface{}{ + "collection": "col2", + "analyzer": standard.Name, + }) + if err != nil { + t.Fatal(err) + } + + oriPaths := map[string]*pathInfo{ + "a": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "text", + }, + }, + }, + dynamic: false, + path: "a", + parentPath: "", + }, + "b": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "text", + }, + }, + }, + dynamic: false, + path: "b", + parentPath: "", + }, + } + + updPaths := map[string]*pathInfo{ + "a": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "text", + }, + }, + }, + dynamic: false, + path: "a", + parentPath: "", + }, + "b": { + fieldMapInfo: []*fieldMapInfo{ + { + fieldMapping: &mapping.FieldMapping{ + Type: "text", + }, + }, + }, + dynamic: false, + path: "b", + parentPath: "", + }, + } + + // Test case has identical synonym sources for all fields + err = compareSynonymSources(oriPaths, updPaths, ori, upd) + if err != nil { + t.Errorf("Expected error to be nil, got %v", err) + } + + ori2 := mapping.NewIndexMapping() + ori2.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping()) + ori2.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping()) + ori2.DefaultMapping.DefaultSynonymSource = "syn1" + ori2.DefaultMapping.Properties["b"].Fields[0].SynonymSource = "syn2" + + upd2 := mapping.NewIndexMapping() + upd2.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping()) + upd2.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping()) + upd2.DefaultMapping.DefaultSynonymSource = "syn1" + upd2.DefaultMapping.Properties["b"].Fields[0].SynonymSource = "syn2" + + err = ori2.AddSynonymSource("syn1", map[string]interface{}{ + "collection": "col1", + "analyzer": simple.Name, + }) + if err != nil { + t.Fatal(err) + } + err = ori2.AddSynonymSource("syn2", map[string]interface{}{ + "collection": "col2", + "analyzer": standard.Name, + }) + if err != nil { + t.Fatal(err) + } + + err = upd2.AddSynonymSource("syn1", map[string]interface{}{ + "collection": "col1", + "analyzer": simple.Name, + }) + if err != nil { + t.Fatal(err) + } + err = upd2.AddSynonymSource("syn2", map[string]interface{}{ + "collection": "col3", + "analyzer": standard.Name, + }) + if err != nil { + t.Fatal(err) + } + + // test case has different synonym source for field "b" + err = compareSynonymSources(oriPaths, updPaths, ori2, upd2) + if err == nil { + t.Errorf("Expected error, got nil") + } +} + func TestDeletedFields(t *testing.T) { tests := []struct { original *mapping.IndexMappingImpl From d5c4489583b7da85e37424d0ca651bd08d9a9f0a Mon Sep 17 00:00:00 2001 From: Likith B Date: Fri, 21 Feb 2025 19:24:17 +0530 Subject: [PATCH 09/25] MB-57888: Api changes --- index.go | 20 +------- index_impl.go | 117 +++++++------------------------------------ index_update_test.go | 31 ++++++++++-- 3 files changed, 45 insertions(+), 123 deletions(-) diff --git a/index.go b/index.go index 6ab7ccd2a..a4f2fb5b6 100644 --- a/index.go +++ b/index.go @@ -325,28 +325,12 @@ func Open(path string) (Index, error) { // The mapping used when it was created will be used for all Index/Search operations. // The provided runtimeConfig can override settings // persisted when the kvstore was created. +// If runtimeConfig has updated mapping, then an index update is attempted +// Throws an error without any changes to the index if an unupdatable mapping is provided func OpenUsing(path string, runtimeConfig map[string]interface{}) (Index, error) { return openIndexUsing(path, runtimeConfig) } -// Update index at the specified path, must exist. -// The mapping used when created will be overwritten by the mapping provided -// for all Index/Search operations. -// Throws an error without any changes to the index if an unupdatable mapping is provided -func Update(path string, newParams string) (Index, error) { - return updateIndexUsing(path, nil, newParams) -} - -// UpdateUsing index at the specified path, must exist. -// The mapping used when created will be overwritten by the mapping provided -// for all Index/Search operations. -// The provided runtimeConfig can override settings -// persisted when the kvstore was created. -// Throws an error without any changes to the index if an unupdatable mapping is provided -func UpdateUsing(path string, runtimeConfig map[string]interface{}, newParams string) (Index, error) { - return updateIndexUsing(path, runtimeConfig, newParams) -} - // Builder is a limited interface, used to build indexes in an offline mode. // Items cannot be updated or deleted, and the caller MUST ensure a document is // indexed only once. diff --git a/index_impl.go b/index_impl.go index 46da47e70..514a4c93d 100644 --- a/index_impl.go +++ b/index_impl.go @@ -163,6 +163,9 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde rv.meta.IndexType = upsidedown.Name } + var um *mapping.IndexMappingImpl + var umBytes []byte + storeConfig := rv.meta.Config if storeConfig == nil { storeConfig = map[string]interface{}{} @@ -173,106 +176,21 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde storeConfig["error_if_exists"] = false for rck, rcv := range runtimeConfig { storeConfig[rck] = rcv - } - - // open the index - indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) - if indexTypeConstructor == nil { - return nil, ErrorUnknownIndexType - } - - rv.i, err = indexTypeConstructor(rv.meta.Storage, storeConfig, Config.analysisQueue) - if err != nil { - return nil, err - } - err = rv.i.Open() - if err != nil { - return nil, err - } - defer func(rv *indexImpl) { - if !rv.open { - rv.i.Close() - } - }(rv) + if rck == "updated_mapping" { + if val, ok := rcv.(string); ok { + if len(val) == 0 { + return nil, fmt.Errorf(("updated_mapping is empty")) + } + umBytes = []byte(val) - // now load the mapping - indexReader, err := rv.i.Reader() - if err != nil { - return nil, err - } - defer func() { - if cerr := indexReader.Close(); cerr != nil && err == nil { - err = cerr + err = util.UnmarshalJSON(umBytes, &um) + if err != nil { + return nil, fmt.Errorf("error parsing updated_mapping into JSON: %v\nmapping contents:\n%v", err, rck) + } + } else { + return nil, fmt.Errorf("updated_mapping not of type string") + } } - }() - - mappingBytes, err := indexReader.GetInternal(scorch.MappingInternalKey) - if err != nil { - return nil, err - } - - var im *mapping.IndexMappingImpl - err = util.UnmarshalJSON(mappingBytes, &im) - if err != nil { - return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes)) - } - - // mark the index as open - rv.mutex.Lock() - defer rv.mutex.Unlock() - rv.open = true - - // validate the mapping - err = im.Validate() - if err != nil { - // note even if the mapping is invalid - // we still return an open usable index - return rv, err - } - - rv.m = im - indexStats.Register(rv) - return rv, err -} - -func updateIndexUsing(path string, runtimeConfig map[string]interface{}, newParams string) (rv *indexImpl, err error) { - rv = &indexImpl{ - path: path, - name: path, - } - rv.stats = &IndexStat{i: rv} - - rv.meta, err = openIndexMeta(path) - if err != nil { - return nil, err - } - - // backwards compatibility if index type is missing - if rv.meta.IndexType == "" { - rv.meta.IndexType = upsidedown.Name - } - - storeConfig := rv.meta.Config - if storeConfig == nil { - storeConfig = map[string]interface{}{} - } - - var um *mapping.IndexMappingImpl - - if len(newParams) == 0 { - return nil, fmt.Errorf(("updated mapping is empty")) - } - - err = util.UnmarshalJSON([]byte(newParams), &um) - if err != nil { - return nil, fmt.Errorf("error parsing updated mapping JSON: %v\nmapping contents:\n%s", err, newParams) - } - - storeConfig["path"] = indexStorePath(path) - storeConfig["create_if_missing"] = false - storeConfig["error_if_exists"] = false - for rck, rcv := range runtimeConfig { - storeConfig[rck] = rcv } // open the index @@ -285,7 +203,6 @@ func updateIndexUsing(path string, runtimeConfig map[string]interface{}, newPara if err != nil { return nil, err } - err = rv.i.Open() if err != nil { return nil, err @@ -349,7 +266,7 @@ func updateIndexUsing(path string, runtimeConfig map[string]interface{}, newPara return rv, err } - err = ui.UpdateFields(fieldInfo, []byte(newParams)) + err = ui.UpdateFields(fieldInfo, umBytes) if err != nil { return rv, err } diff --git a/index_update_test.go b/index_update_test.go index 0377f217d..36506d9d4 100644 --- a/index_update_test.go +++ b/index_update_test.go @@ -2458,7 +2458,12 @@ func TestIndexUpdateText(t *testing.T) { if err != nil { t.Fatal(err) } - index, err = Update(tmpIndexPath, string(mappingString)) + + config := map[string]interface{}{ + "updated_mapping": string(mappingString), + } + + index, err = OpenUsing(tmpIndexPath, config) if err != nil { t.Fatal(err) } @@ -2668,7 +2673,11 @@ func TestIndexUpdateVector(t *testing.T) { if err != nil { t.Fatal(err) } - index, err = Update(tmpIndexPath, string(mappingString)) + config := map[string]interface{}{ + "updated_mapping": string(mappingString), + } + + index, err = OpenUsing(tmpIndexPath, config) if err != nil { t.Fatal(err) } @@ -2830,7 +2839,11 @@ func TestIndexUpdateSynonym(t *testing.T) { if err != nil { t.Fatal(err) } - index, err = Update(tmpIndexPath, string(mappingString)) + config := map[string]interface{}{ + "updated_mapping": string(mappingString), + } + + index, err = OpenUsing(tmpIndexPath, config) if err != nil { t.Fatal(err) } @@ -3041,7 +3054,11 @@ func TestIndexUpdateMerge(t *testing.T) { if err != nil { t.Fatal(err) } - index, err = Update(tmpIndexPath, string(mappingString)) + config := map[string]interface{}{ + "updated_mapping": string(mappingString), + } + + index, err = OpenUsing(tmpIndexPath, config) if err != nil { t.Fatal(err) } @@ -3211,7 +3228,11 @@ func BenchmarkIndexUpdateText(b *testing.B) { if err != nil { b.Fatal(err) } - index, err = Update(tmpIndexPath, string(mappingString)) + config := map[string]interface{}{ + "updated_mapping": string(mappingString), + } + + index, err = OpenUsing(tmpIndexPath, config) if err != nil { b.Fatal(err) } From 41ffb3b3f06c09fe2e9ed36eb0c1c9f2bfa31b37 Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 11 Mar 2025 11:33:22 +0530 Subject: [PATCH 10/25] MB-57888: Minor Code Refactoring --- index/scorch/optimize_knn.go | 1 + index_update.go | 9 --------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/index/scorch/optimize_knn.go b/index/scorch/optimize_knn.go index 020300592..afe7bacad 100644 --- a/index/scorch/optimize_knn.go +++ b/index/scorch/optimize_knn.go @@ -79,6 +79,7 @@ func (o *OptimizeVR) Finish() error { wg.Done() }() for field, vrs := range o.vrs { + // Noop if the vector field or its index data is supposed to be deleted if info, ok := o.snapshot.updatedFields[field]; ok && (info.Deleted || info.Index) { continue } diff --git a/index_update.go b/index_update.go index 1953de7a5..dff5fbc88 100644 --- a/index_update.go +++ b/index_update.go @@ -156,7 +156,6 @@ func compareMappings(ori, upd *mapping.IndexMappingImpl) error { // Ensures updated document mapping does not contain new // field mappings or document mappings func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error { - // Check to verify both original and updated are not nil // and are enabled before proceeding if ori == nil { @@ -183,7 +182,6 @@ func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error { // in updated for _, updFMapping := range upd.Fields { var oriFMapping *mapping.FieldMapping - for _, fMapping := range ori.Fields { if updFMapping.Name == fMapping.Name { oriFMapping = fMapping @@ -202,7 +200,6 @@ func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error { // for a single field or multiple document fields' data getting written to a single zapx field func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMapping, im *mapping.IndexMappingImpl, parent *pathInfo, rootName string) { - // Early exit if mapping has been disabled // Comparisions later on will be done with a nil object if !mp.Enabled { @@ -278,7 +275,6 @@ func compareCustomComponents(oriPaths, updPaths map[string]*pathInfo, ori, upd * // Standard analysers not in custom analysis are not compared // Analysers in custom analysis but not in use are not compared func compareAnalysers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error { - oriAnalyzers := make(map[string]interface{}) updAnalyzers := make(map[string]interface{}) oriCustomAnalysers := ori.CustomAnalysis.Analyzers @@ -330,7 +326,6 @@ func compareAnalysers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping // Compares all date time parsers currently in use // Date time parsers in custom analysis but not in use are not compared func compareDateTimeParsers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error { - oriDateTimeParsers := make(map[string]analysis.DateTimeParser) updDateTimeParsers := make(map[string]analysis.DateTimeParser) @@ -383,7 +378,6 @@ func compareDateTimeParsers(oriPaths, updPaths map[string]*pathInfo, ori, upd *m // Compares all synonym sources // Synonym sources currently not in use are also compared func compareSynonymSources(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error { - oriSynonymSources := make(map[string]analysis.SynonymSource) updSynonymSources := make(map[string]analysis.SynonymSource) @@ -422,7 +416,6 @@ func compareSynonymSources(oriPaths, updPaths map[string]*pathInfo, ori, upd *ma // Compare all of the fields at a particular document path and add its field information func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) error { - var info *index.UpdateFieldInfo var updated bool var err error @@ -500,7 +493,6 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) e // update is impossible // third argument is an error explaining exactly why the change is not possible func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.UpdateFieldInfo, bool, error) { - rv := &index.UpdateFieldInfo{} if updated == nil { @@ -593,7 +585,6 @@ func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.Update // In such a situation, any conflicting changes found will abort the update process func validateFieldInfo(newInfo *index.UpdateFieldInfo, updated bool, fInfo map[string]*index.UpdateFieldInfo, ori *pathInfo, oriFMapInfo *fieldMapInfo) error { - var name string if oriFMapInfo.parent.parentPath == "" { if oriFMapInfo.fieldMapping.Name == "" { From 1611ac52ac61ca3cf9cc3130e35d535184f784c6 Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 11 Mar 2025 11:58:08 +0530 Subject: [PATCH 11/25] MB-57888: Addressing review comments --- index/scorch/scorch.go | 13 +-- index_impl.go | 2 +- index_update_test.go | 208 ----------------------------------------- search_knn_test.go | 208 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 213 insertions(+), 218 deletions(-) diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 074f32025..4ef1a5458 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -947,12 +947,7 @@ func (s *Scorch) FireIndexEvent() { // will be merged before persisting. The index mapping is also overwritted both // in bolt as well as the index snapshot func (s *Scorch) UpdateFields(fieldInfo map[string]*index.UpdateFieldInfo, mappingBytes []byte) error { - // Switch from pointer to value so we can marshal into a json for storage - updatedFields := make(map[string]index.UpdateFieldInfo) - for field, info := range fieldInfo { - updatedFields[field] = *info - } - err := s.updateBolt(updatedFields, mappingBytes) + err := s.updateBolt(fieldInfo, mappingBytes) if err != nil { return err } @@ -964,7 +959,7 @@ func (s *Scorch) UpdateFields(fieldInfo map[string]*index.UpdateFieldInfo, mappi } // Merge and update deleted field info and rewrite index mapping -func (s *Scorch) updateBolt(fieldInfo map[string]index.UpdateFieldInfo, mappingBytes []byte) error { +func (s *Scorch) updateBolt(fieldInfo map[string]*index.UpdateFieldInfo, mappingBytes []byte) error { return s.rootBolt.Update(func(tx *bolt.Tx) error { snapshots := tx.Bucket(boltSnapshotsBucket) if snapshots == nil { @@ -995,7 +990,7 @@ func (s *Scorch) updateBolt(fieldInfo map[string]index.UpdateFieldInfo, mappingB if segmentBucket == nil { return fmt.Errorf("segment key, but bucket missing %x", kk) } - var updatedFields map[string]index.UpdateFieldInfo + var updatedFields map[string]*index.UpdateFieldInfo updatedFieldBytes := segmentBucket.Get(boltUpdatedFieldsKey) if updatedFieldBytes != nil { err := json.Unmarshal(updatedFieldBytes, &updatedFields) @@ -1004,7 +999,7 @@ func (s *Scorch) updateBolt(fieldInfo map[string]index.UpdateFieldInfo, mappingB } for field, info := range fieldInfo { if val, ok := updatedFields[field]; ok { - updatedFields[field] = index.UpdateFieldInfo{ + updatedFields[field] = &index.UpdateFieldInfo{ Deleted: info.Deleted || val.Deleted, Store: info.Store || val.Store, DocValues: info.DocValues || val.DocValues, diff --git a/index_impl.go b/index_impl.go index 514a4c93d..cff909f42 100644 --- a/index_impl.go +++ b/index_impl.go @@ -179,7 +179,7 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde if rck == "updated_mapping" { if val, ok := rcv.(string); ok { if len(val) == 0 { - return nil, fmt.Errorf(("updated_mapping is empty")) + return nil, fmt.Errorf("updated_mapping is empty") } umBytes = []byte(val) diff --git a/index_update_test.go b/index_update_test.go index 36506d9d4..e202e9319 100644 --- a/index_update_test.go +++ b/index_update_test.go @@ -2512,214 +2512,6 @@ func TestIndexUpdateText(t *testing.T) { } } -func TestIndexUpdateVector(t *testing.T) { - tmpIndexPath := createTmpIndexPath(t) - defer cleanupTmpIndexPath(t, tmpIndexPath) - - indexMappingBefore := mapping.NewIndexMapping() - indexMappingBefore.TypeMapping = map[string]*mapping.DocumentMapping{} - indexMappingBefore.DefaultMapping = &mapping.DocumentMapping{ - Enabled: true, - Dynamic: false, - Properties: map[string]*mapping.DocumentMapping{ - "a": { - Enabled: true, - Dynamic: false, - Properties: map[string]*mapping.DocumentMapping{}, - Fields: []*mapping.FieldMapping{ - { - Type: "vector", - Index: true, - Dims: 4, - Similarity: "l2_norm", - VectorIndexOptimizedFor: "latency", - }, - }, - }, - "b": { - Enabled: true, - Dynamic: false, - Properties: map[string]*mapping.DocumentMapping{}, - Fields: []*mapping.FieldMapping{ - { - Type: "vector", - Index: true, - Dims: 4, - Similarity: "l2_norm", - VectorIndexOptimizedFor: "latency", - }, - }, - }, - "c": { - Enabled: true, - Dynamic: false, - Properties: map[string]*mapping.DocumentMapping{}, - Fields: []*mapping.FieldMapping{ - { - Type: "vector_base64", - Index: true, - Dims: 4, - Similarity: "l2_norm", - VectorIndexOptimizedFor: "latency", - }, - }, - }, - "d": { - Enabled: true, - Dynamic: false, - Properties: map[string]*mapping.DocumentMapping{}, - Fields: []*mapping.FieldMapping{ - { - Type: "vector_base64", - Index: true, - Dims: 4, - Similarity: "l2_norm", - VectorIndexOptimizedFor: "latency", - }, - }, - }, - }, - Fields: []*mapping.FieldMapping{}, - } - indexMappingBefore.IndexDynamic = false - indexMappingBefore.StoreDynamic = false - indexMappingBefore.DocValuesDynamic = false - - index, err := New(tmpIndexPath, indexMappingBefore) - if err != nil { - t.Fatal(err) - } - doc1 := map[string]interface{}{"a": []float32{0.32894259691238403, 0.6973215341567993, 0.6835201978683472, 0.38296082615852356}, "b": []float32{0.32894259691238403, 0.6973215341567993, 0.6835201978683472, 0.38296082615852356}, "c": "L5MOPw7NID5SQMU9pHUoPw==", "d": "L5MOPw7NID5SQMU9pHUoPw=="} - doc2 := map[string]interface{}{"a": []float32{0.0018692062003538013, 0.41076546907424927, 0.5675257444381714, 0.45832985639572144}, "b": []float32{0.0018692062003538013, 0.41076546907424927, 0.5675257444381714, 0.45832985639572144}, "c": "czloP94ZCD71ldY+GbAOPw==", "d": "czloP94ZCD71ldY+GbAOPw=="} - doc3 := map[string]interface{}{"a": []float32{0.7853356599807739, 0.6904757618904114, 0.5643226504325867, 0.682637631893158}, "b": []float32{0.7853356599807739, 0.6904757618904114, 0.5643226504325867, 0.682637631893158}, "c": "Chh6P2lOqT47mjg/0odlPg==", "d": "Chh6P2lOqT47mjg/0odlPg=="} - batch := index.NewBatch() - err = batch.Index("001", doc1) - if err != nil { - t.Fatal(err) - } - err = batch.Index("002", doc2) - if err != nil { - t.Fatal(err) - } - err = batch.Index("003", doc3) - if err != nil { - t.Fatal(err) - } - err = index.Batch(batch) - if err != nil { - t.Fatal(err) - } - err = index.Close() - if err != nil { - t.Fatal(err) - } - - indexMappingAfter := mapping.NewIndexMapping() - indexMappingAfter.TypeMapping = map[string]*mapping.DocumentMapping{} - indexMappingAfter.DefaultMapping = &mapping.DocumentMapping{ - Enabled: true, - Dynamic: false, - Properties: map[string]*mapping.DocumentMapping{ - "a": { - Enabled: true, - Dynamic: false, - Properties: map[string]*mapping.DocumentMapping{}, - Fields: []*mapping.FieldMapping{ - { - Type: "vector", - Index: true, - Dims: 4, - Similarity: "l2_norm", - VectorIndexOptimizedFor: "latency", - }, - }, - }, - "c": { - Enabled: true, - Dynamic: false, - Properties: map[string]*mapping.DocumentMapping{}, - Fields: []*mapping.FieldMapping{ - { - Type: "vector_base64", - Index: true, - Dims: 4, - Similarity: "l2_norm", - VectorIndexOptimizedFor: "latency", - }, - }, - }, - "d": { - Enabled: true, - Dynamic: false, - Properties: map[string]*mapping.DocumentMapping{}, - Fields: []*mapping.FieldMapping{ - { - Type: "vector_base64", - Index: false, - Dims: 4, - Similarity: "l2_norm", - VectorIndexOptimizedFor: "latency", - }, - }, - }, - }, - Fields: []*mapping.FieldMapping{}, - } - indexMappingAfter.IndexDynamic = false - indexMappingAfter.StoreDynamic = false - indexMappingAfter.DocValuesDynamic = false - - mappingString, err := json.Marshal(indexMappingAfter) - if err != nil { - t.Fatal(err) - } - config := map[string]interface{}{ - "updated_mapping": string(mappingString), - } - - index, err = OpenUsing(tmpIndexPath, config) - if err != nil { - t.Fatal(err) - } - - q1 := NewSearchRequest(NewMatchNoneQuery()) - q1.AddKNN("a", []float32{1, 2, 3, 4}, 3, 1.0) - res1, err := index.Search(q1) - if err != nil { - t.Fatal(err) - } - if len(res1.Hits) != 3 { - t.Fatalf("Expected 3 hits, got %d\n", len(res1.Hits)) - } - q2 := NewSearchRequest(NewMatchNoneQuery()) - q2.AddKNN("e", []float32{1, 2, 3, 4}, 3, 1.0) - res2, err := index.Search(q2) - if err != nil { - t.Fatal(err) - } - if len(res2.Hits) != 0 { - t.Fatalf("Expected 0 hits, got %d\n", len(res2.Hits)) - } - q3 := NewSearchRequest(NewMatchNoneQuery()) - q3.AddKNN("c", []float32{1, 2, 3, 4}, 3, 1.0) - res3, err := index.Search(q3) - if err != nil { - t.Fatal(err) - } - if len(res3.Hits) != 3 { - t.Fatalf("Expected 3 hits, got %d\n", len(res3.Hits)) - } - q4 := NewSearchRequest(NewMatchNoneQuery()) - q4.AddKNN("d", []float32{1, 2, 3, 4}, 3, 1.0) - res4, err := index.Search(q4) - if err != nil { - t.Fatal(err) - } - if len(res4.Hits) != 0 { - t.Fatalf("Expected 0 hits, got %d\n", len(res4.Hits)) - } -} - func TestIndexUpdateSynonym(t *testing.T) { tmpIndexPath := createTmpIndexPath(t) defer cleanupTmpIndexPath(t, tmpIndexPath) diff --git a/search_knn_test.go b/search_knn_test.go index a2d207bfc..84cebd398 100644 --- a/search_knn_test.go +++ b/search_knn_test.go @@ -1701,3 +1701,211 @@ func TestNumVecsStat(t *testing.T) { } } } + +func TestIndexUpdateVector(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + indexMappingBefore := mapping.NewIndexMapping() + indexMappingBefore.TypeMapping = map[string]*mapping.DocumentMapping{} + indexMappingBefore.DefaultMapping = &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector", + Index: true, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + "b": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector", + Index: true, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector_base64", + Index: true, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + "d": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector_base64", + Index: true, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + }, + Fields: []*mapping.FieldMapping{}, + } + indexMappingBefore.IndexDynamic = false + indexMappingBefore.StoreDynamic = false + indexMappingBefore.DocValuesDynamic = false + + index, err := New(tmpIndexPath, indexMappingBefore) + if err != nil { + t.Fatal(err) + } + doc1 := map[string]interface{}{"a": []float32{0.32894259691238403, 0.6973215341567993, 0.6835201978683472, 0.38296082615852356}, "b": []float32{0.32894259691238403, 0.6973215341567993, 0.6835201978683472, 0.38296082615852356}, "c": "L5MOPw7NID5SQMU9pHUoPw==", "d": "L5MOPw7NID5SQMU9pHUoPw=="} + doc2 := map[string]interface{}{"a": []float32{0.0018692062003538013, 0.41076546907424927, 0.5675257444381714, 0.45832985639572144}, "b": []float32{0.0018692062003538013, 0.41076546907424927, 0.5675257444381714, 0.45832985639572144}, "c": "czloP94ZCD71ldY+GbAOPw==", "d": "czloP94ZCD71ldY+GbAOPw=="} + doc3 := map[string]interface{}{"a": []float32{0.7853356599807739, 0.6904757618904114, 0.5643226504325867, 0.682637631893158}, "b": []float32{0.7853356599807739, 0.6904757618904114, 0.5643226504325867, 0.682637631893158}, "c": "Chh6P2lOqT47mjg/0odlPg==", "d": "Chh6P2lOqT47mjg/0odlPg=="} + batch := index.NewBatch() + err = batch.Index("001", doc1) + if err != nil { + t.Fatal(err) + } + err = batch.Index("002", doc2) + if err != nil { + t.Fatal(err) + } + err = batch.Index("003", doc3) + if err != nil { + t.Fatal(err) + } + err = index.Batch(batch) + if err != nil { + t.Fatal(err) + } + err = index.Close() + if err != nil { + t.Fatal(err) + } + + indexMappingAfter := mapping.NewIndexMapping() + indexMappingAfter.TypeMapping = map[string]*mapping.DocumentMapping{} + indexMappingAfter.DefaultMapping = &mapping.DocumentMapping{ + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{ + "a": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector", + Index: true, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + "c": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector_base64", + Index: true, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + "d": { + Enabled: true, + Dynamic: false, + Properties: map[string]*mapping.DocumentMapping{}, + Fields: []*mapping.FieldMapping{ + { + Type: "vector_base64", + Index: false, + Dims: 4, + Similarity: "l2_norm", + VectorIndexOptimizedFor: "latency", + }, + }, + }, + }, + Fields: []*mapping.FieldMapping{}, + } + indexMappingAfter.IndexDynamic = false + indexMappingAfter.StoreDynamic = false + indexMappingAfter.DocValuesDynamic = false + + mappingString, err := json.Marshal(indexMappingAfter) + if err != nil { + t.Fatal(err) + } + config := map[string]interface{}{ + "updated_mapping": string(mappingString), + } + + index, err = OpenUsing(tmpIndexPath, config) + if err != nil { + t.Fatal(err) + } + + q1 := NewSearchRequest(NewMatchNoneQuery()) + q1.AddKNN("a", []float32{1, 2, 3, 4}, 3, 1.0) + res1, err := index.Search(q1) + if err != nil { + t.Fatal(err) + } + if len(res1.Hits) != 3 { + t.Fatalf("Expected 3 hits, got %d\n", len(res1.Hits)) + } + q2 := NewSearchRequest(NewMatchNoneQuery()) + q2.AddKNN("e", []float32{1, 2, 3, 4}, 3, 1.0) + res2, err := index.Search(q2) + if err != nil { + t.Fatal(err) + } + if len(res2.Hits) != 0 { + t.Fatalf("Expected 0 hits, got %d\n", len(res2.Hits)) + } + q3 := NewSearchRequest(NewMatchNoneQuery()) + q3.AddKNN("c", []float32{1, 2, 3, 4}, 3, 1.0) + res3, err := index.Search(q3) + if err != nil { + t.Fatal(err) + } + if len(res3.Hits) != 3 { + t.Fatalf("Expected 3 hits, got %d\n", len(res3.Hits)) + } + q4 := NewSearchRequest(NewMatchNoneQuery()) + q4.AddKNN("d", []float32{1, 2, 3, 4}, 3, 1.0) + res4, err := index.Search(q4) + if err != nil { + t.Fatal(err) + } + if len(res4.Hits) != 0 { + t.Fatalf("Expected 0 hits, got %d\n", len(res4.Hits)) + } +} From a18fe6f55cc3e05f7c2c14f97f11897fd3e3a3d3 Mon Sep 17 00:00:00 2001 From: Likith B Date: Wed, 12 Mar 2025 18:28:01 +0530 Subject: [PATCH 12/25] MB-57888: Fixed typo --- index_update.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/index_update.go b/index_update.go index dff5fbc88..8319b2a5f 100644 --- a/index_update.go +++ b/index_update.go @@ -80,9 +80,9 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.Update // Go through each mapping present in the updated // and consolidate according to the document paths for name, updDMapping := range upd.TypeMapping { - addPathInfo(updPaths, "", updDMapping, ori, nil, name) + addPathInfo(updPaths, "", updDMapping, upd, nil, name) } - addPathInfo(updPaths, "", upd.DefaultMapping, ori, nil, "") + addPathInfo(updPaths, "", upd.DefaultMapping, upd, nil, "") // Compare all components of custom analysis currently in use err = compareCustomComponents(oriPaths, updPaths, ori, upd) From 829d50255363515155889c67b5c70b1b6c6ef682 Mon Sep 17 00:00:00 2001 From: Likith B Date: Fri, 14 Mar 2025 16:56:54 +0530 Subject: [PATCH 13/25] MB-57888: Addressing review comments --- index/scorch/persister.go | 7 +- index/scorch/snapshot_index.go | 13 +- index_update.go | 226 ++++++++++++++------------------- index_update_test.go | 146 +++++++-------------- 4 files changed, 144 insertions(+), 248 deletions(-) diff --git a/index/scorch/persister.go b/index/scorch/persister.go index f5f5509a0..4edd4a66b 100644 --- a/index/scorch/persister.go +++ b/index/scorch/persister.go @@ -1056,17 +1056,14 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro } updatedFieldBytes := segmentBucket.Get(boltUpdatedFieldsKey) if updatedFieldBytes != nil { - var updatedFields map[string]index.UpdateFieldInfo + var updatedFields map[string]*index.UpdateFieldInfo err := json.Unmarshal(updatedFieldBytes, &updatedFields) if err != nil { _ = seg.Close() return nil, fmt.Errorf("error reading updated field bytes: %v", err) } - rv.updatedFields = make(map[string]*index.UpdateFieldInfo) - for field, info := range updatedFields { - rv.updatedFields[field] = &info - } + rv.updatedFields = updatedFields // Set the value within the segment base for use during merge rv.UpdateFieldsInfo(rv.updatedFields) } diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 8c198f3be..60a7f367e 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -803,15 +803,16 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( } // Filter out fields that are supposed to have no doc values - var filteredFields []string + idx := 0 for _, field := range vFields { if info, ok := is.updatedFields[field]; ok && (info.DocValues || info.Deleted) { continue - } else { - filteredFields = append(filteredFields, field) } + vFields[idx] = field + idx++ } + vFields = vFields[:idx] var errCh chan error @@ -820,7 +821,7 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( // if the caller happens to know we're on the same segmentIndex // from a previous invocation if cFields == nil { - cFields = subtractStrings(fields, filteredFields) + cFields = subtractStrings(fields, vFields) if !ss.cachedDocs.hasFields(cFields) { errCh = make(chan error, 1) @@ -835,8 +836,8 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( } } - if ssvOk && ssv != nil && len(filteredFields) > 0 { - dvs, err = ssv.VisitDocValues(localDocNum, filteredFields, visitor, dvs) + if ssvOk && ssv != nil && len(vFields) > 0 { + dvs, err = ssv.VisitDocValues(localDocNum, vFields, visitor, dvs) if err != nil { return nil, nil, err } diff --git a/index_update.go b/index_update.go index 8319b2a5f..469ba0244 100644 --- a/index_update.go +++ b/index_update.go @@ -18,7 +18,6 @@ import ( "fmt" "reflect" - "github.com/blevesearch/bleve/v2/analysis" "github.com/blevesearch/bleve/v2/mapping" index "github.com/blevesearch/bleve_index_api" ) @@ -38,7 +37,6 @@ type fieldMapInfo struct { fieldMapping *mapping.FieldMapping analyzer string datetimeParser string - synonymSource string rootName string parent *pathInfo } @@ -134,10 +132,6 @@ func compareMappings(ori, upd *mapping.IndexMappingImpl) error { return fmt.Errorf("default type cannot be changed") } - if ori.DefaultField != upd.DefaultField { - return fmt.Errorf("default field cannot be changed") - } - if ori.IndexDynamic != upd.IndexDynamic { return fmt.Errorf("index dynamic cannot be changed") } @@ -263,7 +257,13 @@ func compareCustomComponents(oriPaths, updPaths map[string]*pathInfo, ori, upd * } // Compare all synonum sources - err = compareSynonymSources(oriPaths, updPaths, ori, upd) + err = compareSynonymSources(ori, upd) + if err != nil { + return err + } + + // Compare all char filters, tokenizers, token filters and token maps + err = compareAnalyserSubcomponents(ori, upd) if err != nil { return err } @@ -277,38 +277,24 @@ func compareCustomComponents(oriPaths, updPaths map[string]*pathInfo, ori, upd * func compareAnalysers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error { oriAnalyzers := make(map[string]interface{}) updAnalyzers := make(map[string]interface{}) - oriCustomAnalysers := ori.CustomAnalysis.Analyzers - updCustomAnalysers := upd.CustomAnalysis.Analyzers - for path, info := range oriPaths { - if len(info.fieldMapInfo) == 0 { - continue - } - for _, fInfo := range info.fieldMapInfo { - if fInfo.fieldMapping.Type == "text" { - analyzerName := ori.AnalyzerNameForPath(path) - fInfo.analyzer = analyzerName - if val, ok := oriCustomAnalysers[analyzerName]; ok { - oriAnalyzers[analyzerName] = val + extractAnalyzers := func(paths map[string]*pathInfo, customAnalyzers map[string]map[string]interface{}, + analyzers map[string]interface{}, indexMapping *mapping.IndexMappingImpl) { + for path, info := range paths { + for _, fInfo := range info.fieldMapInfo { + if fInfo.fieldMapping.Type == "text" { + analyzerName := indexMapping.AnalyzerNameForPath(path) + fInfo.analyzer = analyzerName + if val, ok := customAnalyzers[analyzerName]; ok { + analyzers[analyzerName] = val + } } } } } - for path, info := range updPaths { - if len(info.fieldMapInfo) == 0 { - continue - } - for _, fInfo := range info.fieldMapInfo { - if fInfo.fieldMapping.Type == "text" { - analyzerName := upd.AnalyzerNameForPath(path) - fInfo.analyzer = analyzerName - if val, ok := updCustomAnalysers[analyzerName]; ok { - updAnalyzers[analyzerName] = val - } - } - } - } + extractAnalyzers(oriPaths, ori.CustomAnalysis.Analyzers, oriAnalyzers, ori) + extractAnalyzers(updPaths, upd.CustomAnalysis.Analyzers, updAnalyzers, upd) for name, anUpd := range updAnalyzers { if anOri, ok := oriAnalyzers[name]; ok { @@ -326,41 +312,29 @@ func compareAnalysers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping // Compares all date time parsers currently in use // Date time parsers in custom analysis but not in use are not compared func compareDateTimeParsers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error { - oriDateTimeParsers := make(map[string]analysis.DateTimeParser) - updDateTimeParsers := make(map[string]analysis.DateTimeParser) - - for _, info := range oriPaths { - if len(info.fieldMapInfo) == 0 { - continue - } - for _, fInfo := range info.fieldMapInfo { - if fInfo.fieldMapping.Type == "datetime" { - if fInfo.fieldMapping.DateFormat == "" { - fInfo.datetimeParser = ori.DefaultDateTimeParser - oriDateTimeParsers[ori.DefaultDateTimeParser] = ori.DateTimeParserNamed(ori.DefaultDateTimeParser) - } else { - oriDateTimeParsers[fInfo.fieldMapping.DateFormat] = ori.DateTimeParserNamed(fInfo.fieldMapping.DateFormat) + oriDateTimeParsers := make(map[string]interface{}) + updDateTimeParsers := make(map[string]interface{}) + + extractDateTimeParsers := func(paths map[string]*pathInfo, customParsers map[string]map[string]interface{}, + parsers map[string]interface{}, indexMapping *mapping.IndexMappingImpl) { + for _, info := range paths { + for _, fInfo := range info.fieldMapInfo { + if fInfo.fieldMapping.Type == "datetime" { + parserName := fInfo.fieldMapping.DateFormat + if parserName == "" { + parserName = indexMapping.DefaultDateTimeParser + } + fInfo.datetimeParser = parserName + if val, ok := customParsers[parserName]; ok { + parsers[parserName] = val + } } } } } - for _, info := range updPaths { - if len(info.fieldMapInfo) == 0 { - continue - } - for _, fInfo := range info.fieldMapInfo { - if fInfo.fieldMapping.Type == "datetime" { - if fInfo.fieldMapping.DateFormat == "" { - fInfo.datetimeParser = upd.DefaultDateTimeParser - updDateTimeParsers[upd.DefaultDateTimeParser] = upd.DateTimeParserNamed(upd.DefaultDateTimeParser) - } else { - fInfo.datetimeParser = fInfo.fieldMapping.DateFormat - updDateTimeParsers[fInfo.fieldMapping.DateFormat] = upd.DateTimeParserNamed(fInfo.fieldMapping.DateFormat) - } - } - } - } + extractDateTimeParsers(oriPaths, ori.CustomAnalysis.DateTimeParsers, oriDateTimeParsers, ori) + extractDateTimeParsers(updPaths, upd.CustomAnalysis.DateTimeParsers, updDateTimeParsers, upd) for name, dtUpd := range updDateTimeParsers { if dtOri, ok := oriDateTimeParsers[name]; ok { @@ -377,38 +351,31 @@ func compareDateTimeParsers(oriPaths, updPaths map[string]*pathInfo, ori, upd *m // Compares all synonym sources // Synonym sources currently not in use are also compared -func compareSynonymSources(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error { - oriSynonymSources := make(map[string]analysis.SynonymSource) - updSynonymSources := make(map[string]analysis.SynonymSource) +func compareSynonymSources(ori, upd *mapping.IndexMappingImpl) error { + if !reflect.DeepEqual(ori.CustomAnalysis.SynonymSources, upd.CustomAnalysis.SynonymSources) { + return fmt.Errorf("synonym sources cannot be changed") + } - for path, info := range oriPaths { - if len(info.fieldMapInfo) == 0 { - continue - } - for _, fInfo := range info.fieldMapInfo { - if fInfo.fieldMapping.Type == "text" { - synonymSourceName := ori.SynonymSourceForPath(path) - fInfo.synonymSource = synonymSourceName - oriSynonymSources[synonymSourceName] = ori.SynonymSourceNamed(synonymSourceName) - } - } + return nil +} + +// Compares all char filters, tokenizers, token filters and token maps +// Components not currently in use are also compared +func compareAnalyserSubcomponents(ori, upd *mapping.IndexMappingImpl) error { + if !reflect.DeepEqual(ori.CustomAnalysis.CharFilters, upd.CustomAnalysis.CharFilters) { + return fmt.Errorf("char filters cannot be changed") } - for path, info := range updPaths { - if len(info.fieldMapInfo) == 0 { - continue - } - for _, fInfo := range info.fieldMapInfo { - if fInfo.fieldMapping.Type == "text" { - synonymSourceName := upd.SynonymSourceForPath(path) - fInfo.synonymSource = synonymSourceName - updSynonymSources[synonymSourceName] = upd.SynonymSourceNamed(synonymSourceName) - } - } + if !reflect.DeepEqual(ori.CustomAnalysis.TokenFilters, upd.CustomAnalysis.TokenFilters) { + return fmt.Errorf("token filters cannot be changed") } - if !reflect.DeepEqual(ori.CustomAnalysis.SynonymSources, upd.CustomAnalysis.SynonymSources) { - return fmt.Errorf("synonym sources cannot be changed") + if !reflect.DeepEqual(ori.CustomAnalysis.TokenMaps, upd.CustomAnalysis.TokenMaps) { + return fmt.Errorf("token maps cannot be changed") + } + + if !reflect.DeepEqual(ori.CustomAnalysis.Tokenizers, upd.CustomAnalysis.Tokenizers) { + return fmt.Errorf("tokenizers cannot be changed") } return nil @@ -417,18 +384,17 @@ func compareSynonymSources(oriPaths, updPaths map[string]*pathInfo, ori, upd *ma // Compare all of the fields at a particular document path and add its field information func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) error { var info *index.UpdateFieldInfo - var updated bool var err error // Assume deleted or disabled mapping if upd is nil. Checks for ori being nil // or upd having mappings not in orihave already been done before this stage if upd == nil { for _, oriFMapInfo := range ori.fieldMapInfo { - info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, nil) + info, err = compareFieldMapping(oriFMapInfo.fieldMapping, nil) if err != nil { return err } - err = validateFieldInfo(info, updated, fInfo, ori, oriFMapInfo) + err = validateFieldInfo(info, fInfo, ori, oriFMapInfo) if err != nil { return err } @@ -438,7 +404,6 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) e var updFMap *mapping.FieldMapping var updAnalyser string var updDatetimeParser string - var updSynonymSource string // For multiple fields at a single document path, compare // only with the matching ones @@ -448,7 +413,6 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) e updFMap = updFMapInfo.fieldMapping if updFMap.Type == "text" { updAnalyser = updFMapInfo.analyzer - updSynonymSource = updFMapInfo.synonymSource } else if updFMap.Type == "datetime" { updDatetimeParser = updFMapInfo.datetimeParser } @@ -462,17 +426,14 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) e if updDatetimeParser != "" && oriFMapInfo.datetimeParser != updDatetimeParser { return fmt.Errorf("datetime parser has been changed for a text field") } - if updSynonymSource != "" && oriFMapInfo.synonymSource != updSynonymSource { - return fmt.Errorf("synonym source has been changed for a text field") - } - info, updated, err = compareFieldMapping(oriFMapInfo.fieldMapping, updFMap) + info, err = compareFieldMapping(oriFMapInfo.fieldMapping, updFMap) if err != nil { return err } // Validate to ensure change is possible // Needed if multiple mappings are aliased to the same field - err = validateFieldInfo(info, updated, fInfo, ori, oriFMapInfo) + err = validateFieldInfo(info, fInfo, ori, oriFMapInfo) if err != nil { return err } @@ -492,63 +453,62 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) e // second return argument gives a flag indicating whether any changes, if detected, are doable or if // update is impossible // third argument is an error explaining exactly why the change is not possible -func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.UpdateFieldInfo, bool, error) { +func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.UpdateFieldInfo, error) { rv := &index.UpdateFieldInfo{} if updated == nil { if original != nil && !original.IncludeInAll { rv.Deleted = true - return rv, true, nil + return rv, nil } else if original == nil { - return nil, false, nil + return nil, fmt.Errorf("both field mappings cannot be nil") } - return nil, false, fmt.Errorf("deleted field present in '_all' field") + return nil, fmt.Errorf("deleted field present in '_all' field") } else if original == nil { - return nil, false, fmt.Errorf("matching field not found in original index mapping") + return nil, fmt.Errorf("matching field not found in original index mapping") } if original.Type != updated.Type { - return nil, false, fmt.Errorf("field type cannot be updated") + return nil, fmt.Errorf("field type cannot be updated") } if original.Type == "text" { - if original.SynonymSource != updated.SynonymSource { - return nil, false, fmt.Errorf("synonym source cannot be changed for text field") - } if original.Analyzer != updated.Analyzer { - return nil, false, fmt.Errorf("analyzer cannot be updated for text fields") + return nil, fmt.Errorf("analyzer cannot be updated for text fields") } } if original.Type == "datetime" { if original.DateFormat != updated.DateFormat { - return nil, false, fmt.Errorf("dateFormat cannot be updated for datetime fields") + return nil, fmt.Errorf("dateFormat cannot be updated for datetime fields") } } if original.Type == "vector" || original.Type == "vector_base64" { if original.Dims != updated.Dims { - return nil, false, fmt.Errorf("dimensions cannot be updated for vector and vector_base64 fields") + return nil, fmt.Errorf("dimensions cannot be updated for vector and vector_base64 fields") } if original.Similarity != updated.Similarity { - return nil, false, fmt.Errorf("similarity cannot be updated for vector and vector_base64 fields") + return nil, fmt.Errorf("similarity cannot be updated for vector and vector_base64 fields") } if original.VectorIndexOptimizedFor != updated.VectorIndexOptimizedFor { - return nil, false, fmt.Errorf("vectorIndexOptimizedFor cannot be updated for vector and vector_base64 fields") + return nil, fmt.Errorf("vectorIndexOptimizedFor cannot be updated for vector and vector_base64 fields") } } if original.IncludeInAll != updated.IncludeInAll { - return nil, false, fmt.Errorf("includeInAll cannot be changed") + return nil, fmt.Errorf("includeInAll cannot be changed") } if original.IncludeTermVectors != updated.IncludeTermVectors { - return nil, false, fmt.Errorf("includeTermVectors cannot be changed") + return nil, fmt.Errorf("includeTermVectors cannot be changed") } if original.SkipFreqNorm != updated.SkipFreqNorm { - return nil, false, fmt.Errorf("skipFreqNorm cannot be changed") + return nil, fmt.Errorf("skipFreqNorm cannot be changed") } // Updating is not possible if store changes from true // to false when the field is included in _all if original.Store != updated.Store { - if updated.Store || updated.IncludeInAll { - return nil, false, fmt.Errorf("store cannot be changed if field present in `_all' field") + if updated.Store { + return nil, fmt.Errorf("store cannot be changed from false to true") + } else if updated.IncludeInAll { + return nil, fmt.Errorf("store cannot be changed if field present in `_all' field") } else { rv.Store = true } @@ -557,8 +517,10 @@ func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.Update // Updating is not possible if index changes from true // to false when the field is included in _all if original.Index != updated.Index { - if updated.Index || updated.IncludeInAll { - return nil, false, fmt.Errorf("index cannot be changed if field present in `_all' field") + if updated.Index { + return nil, fmt.Errorf("index cannot be changed from false to true") + } else if updated.IncludeInAll { + return nil, fmt.Errorf("index cannot be changed if field present in `_all' field") } else { rv.Index = true rv.DocValues = true @@ -568,22 +530,21 @@ func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.Update // Updating is not possible if docvalues changes from true // to false when the field is included in _all if original.DocValues != updated.DocValues { - if updated.DocValues || updated.IncludeInAll { - return nil, false, fmt.Errorf("docvalues cannot be changed if field present in `_all' field") + if updated.DocValues { + return nil, fmt.Errorf("docvalues cannot be changed from false to true") + } else if updated.IncludeInAll { + return nil, fmt.Errorf("docvalues cannot be changed if field present in `_all' field") } else { rv.DocValues = true } } - if rv.Deleted || rv.Index || rv.Store || rv.DocValues { - return rv, true, nil - } - return rv, false, nil + return rv, nil } // After identifying changes, validate against the existing changes incase of duplicate fields. // In such a situation, any conflicting changes found will abort the update process -func validateFieldInfo(newInfo *index.UpdateFieldInfo, updated bool, fInfo map[string]*index.UpdateFieldInfo, +func validateFieldInfo(newInfo *index.UpdateFieldInfo, fInfo map[string]*index.UpdateFieldInfo, ori *pathInfo, oriFMapInfo *fieldMapInfo) error { var name string if oriFMapInfo.parent.parentPath == "" { @@ -599,14 +560,11 @@ func validateFieldInfo(newInfo *index.UpdateFieldInfo, updated bool, fInfo map[s name = oriFMapInfo.parent.parentPath + "." + oriFMapInfo.fieldMapping.Name } } - if updated { - if ori.dynamic { - return fmt.Errorf("updated field is under a dynamic property") - } + if (newInfo.Deleted || newInfo.Index || newInfo.DocValues || newInfo.Store) && ori.dynamic { + return fmt.Errorf("updated field is under a dynamic property") } if oldInfo, ok := fInfo[name]; ok { - if oldInfo.Deleted != newInfo.Deleted || oldInfo.Index != newInfo.Index || - oldInfo.DocValues != newInfo.DocValues || oldInfo.Store != newInfo.Store { + if !reflect.DeepEqual(oldInfo, newInfo) { return fmt.Errorf("updated field impossible to verify because multiple mappings point to the same field name") } } else { diff --git a/index_update_test.go b/index_update_test.go index e202e9319..85e8ab1b9 100644 --- a/index_update_test.go +++ b/index_update_test.go @@ -42,15 +42,13 @@ func TestCompareFieldMapping(t *testing.T) { original *mapping.FieldMapping updated *mapping.FieldMapping indexFieldInfo *index.UpdateFieldInfo - changed bool err bool }{ - { // both nil => no op + { // both nil => error original: nil, updated: nil, indexFieldInfo: nil, - changed: false, - err: false, + err: true, }, { // updated nil => delete all original: &mapping.FieldMapping{}, @@ -58,8 +56,7 @@ func TestCompareFieldMapping(t *testing.T) { indexFieldInfo: &index.UpdateFieldInfo{ Deleted: true, }, - changed: true, - err: false, + err: false, }, { // type changed => not updatable original: &mapping.FieldMapping{ @@ -69,10 +66,9 @@ func TestCompareFieldMapping(t *testing.T) { Type: "datetime", }, indexFieldInfo: nil, - changed: false, err: true, }, - { // synonym source changed for text => not updatable + { // synonym source changed for text => updatable original: &mapping.FieldMapping{ Type: "text", SynonymSource: "a", @@ -81,9 +77,8 @@ func TestCompareFieldMapping(t *testing.T) { Type: "text", SynonymSource: "b", }, - indexFieldInfo: nil, - changed: false, - err: true, + indexFieldInfo: &index.UpdateFieldInfo{}, + err: false, }, { // analyser changed for text => not updatable original: &mapping.FieldMapping{ @@ -95,7 +90,6 @@ func TestCompareFieldMapping(t *testing.T) { Analyzer: "b", }, indexFieldInfo: nil, - changed: false, err: true, }, { // dims changed for vector => not updatable @@ -112,7 +106,6 @@ func TestCompareFieldMapping(t *testing.T) { VectorIndexOptimizedFor: "memory-efficient", }, indexFieldInfo: nil, - changed: false, err: true, }, { // similarity changed for vectorbase64 => not updatable @@ -129,7 +122,6 @@ func TestCompareFieldMapping(t *testing.T) { VectorIndexOptimizedFor: "memory-efficient", }, indexFieldInfo: nil, - changed: false, err: true, }, { // vectorindexoptimizedfor chagned for vector => not updatable @@ -146,7 +138,6 @@ func TestCompareFieldMapping(t *testing.T) { VectorIndexOptimizedFor: "latency", }, indexFieldInfo: nil, - changed: false, err: true, }, { // includeinall changed => not updatable @@ -159,7 +150,6 @@ func TestCompareFieldMapping(t *testing.T) { IncludeInAll: false, }, indexFieldInfo: nil, - changed: false, err: true, }, { //includetermvectors changed => not updatable @@ -172,7 +162,6 @@ func TestCompareFieldMapping(t *testing.T) { IncludeTermVectors: true, }, indexFieldInfo: nil, - changed: false, err: true, }, { // store changed after all checks => updatable with store delete @@ -185,7 +174,6 @@ func TestCompareFieldMapping(t *testing.T) { SkipFreqNorm: false, }, indexFieldInfo: nil, - changed: false, err: true, }, { // index changed after all checks => updatable with index and docvalues delete @@ -201,8 +189,7 @@ func TestCompareFieldMapping(t *testing.T) { Index: true, DocValues: true, }, - changed: true, - err: false, + err: false, }, { // docvalues changed after all checks => docvalues delete original: &mapping.FieldMapping{ @@ -216,8 +203,7 @@ func TestCompareFieldMapping(t *testing.T) { indexFieldInfo: &index.UpdateFieldInfo{ DocValues: true, }, - changed: true, - err: false, + err: false, }, { // no relavent changes => continue but no op original: &mapping.FieldMapping{ @@ -253,20 +239,16 @@ func TestCompareFieldMapping(t *testing.T) { SynonymSource: "b", }, indexFieldInfo: &index.UpdateFieldInfo{}, - changed: false, err: false, }, } for i, test := range tests { - rv, changed, err := compareFieldMapping(test.original, test.updated) + rv, err := compareFieldMapping(test.original, test.updated) if err == nil && test.err || err != nil && !test.err { t.Errorf("Unexpected error value for test %d, expecting %t, got %v\n", i, test.err, err) } - if changed != test.changed { - t.Errorf("Unexpected changed value for test %d, expecting %t, got %t, err %v\n", i, test.changed, changed, err) - } if rv == nil && test.indexFieldInfo != nil || rv != nil && test.indexFieldInfo == nil || !reflect.DeepEqual(rv, test.indexFieldInfo) { t.Errorf("Unexpected index field info value for test %d, expecting %+v, got %+v, err %v", i, test.indexFieldInfo, rv, err) } @@ -332,14 +314,14 @@ func TestCompareMappings(t *testing.T) { }, err: false, }, - { // changed default field => error + { // changed default field => false original: &mapping.IndexMappingImpl{ DefaultField: "a", }, updated: &mapping.IndexMappingImpl{ DefaultField: "b", }, - err: true, + err: false, }, { // changed index dynamic => error original: &mapping.IndexMappingImpl{ @@ -555,6 +537,9 @@ func TestCompareDatetimeParsers(t *testing.T) { "2006/01/02 3:04PM", }, }) + if err != nil { + t.Fatal(err) + } err = upd.AddCustomDateTimeParser("customDT", map[string]interface{}{ "type": sanitized.Name, @@ -563,6 +548,9 @@ func TestCompareDatetimeParsers(t *testing.T) { "2006/01/02 3:04PM", }, }) + if err != nil { + t.Fatal(err) + } oriPaths := map[string]*pathInfo{ "a": { @@ -647,7 +635,7 @@ func TestCompareDatetimeParsers(t *testing.T) { // Test case has identical datetime parsers for all fields err = compareDateTimeParsers(oriPaths, updPaths, ori, upd) if err != nil { - t.Errorf("Expected error to be nil, got %v", err) + t.Fatalf("Expected error to be nil, got %v", err) } ori2 := mapping.NewIndexMapping() @@ -671,6 +659,9 @@ func TestCompareDatetimeParsers(t *testing.T) { "2006/01/02 3:04PM", }, }) + if err != nil { + t.Fatal(err) + } err = upd2.AddCustomDateTimeParser("customDT", map[string]interface{}{ "type": sanitized.Name, @@ -679,6 +670,9 @@ func TestCompareDatetimeParsers(t *testing.T) { "2006/01/02", }, }) + if err != nil { + t.Fatal(err) + } // test case has different custom datetime parser for field "b" err = compareDateTimeParsers(oriPaths, updPaths, ori2, upd2) @@ -731,62 +725,8 @@ func TestCompareSynonymSources(t *testing.T) { t.Fatal(err) } - oriPaths := map[string]*pathInfo{ - "a": { - fieldMapInfo: []*fieldMapInfo{ - { - fieldMapping: &mapping.FieldMapping{ - Type: "text", - }, - }, - }, - dynamic: false, - path: "a", - parentPath: "", - }, - "b": { - fieldMapInfo: []*fieldMapInfo{ - { - fieldMapping: &mapping.FieldMapping{ - Type: "text", - }, - }, - }, - dynamic: false, - path: "b", - parentPath: "", - }, - } - - updPaths := map[string]*pathInfo{ - "a": { - fieldMapInfo: []*fieldMapInfo{ - { - fieldMapping: &mapping.FieldMapping{ - Type: "text", - }, - }, - }, - dynamic: false, - path: "a", - parentPath: "", - }, - "b": { - fieldMapInfo: []*fieldMapInfo{ - { - fieldMapping: &mapping.FieldMapping{ - Type: "text", - }, - }, - }, - dynamic: false, - path: "b", - parentPath: "", - }, - } - - // Test case has identical synonym sources for all fields - err = compareSynonymSources(oriPaths, updPaths, ori, upd) + // Test case has identical synonym sources + err = compareSynonymSources(ori, upd) if err != nil { t.Errorf("Expected error to be nil, got %v", err) } @@ -833,8 +773,8 @@ func TestCompareSynonymSources(t *testing.T) { t.Fatal(err) } - // test case has different synonym source for field "b" - err = compareSynonymSources(oriPaths, updPaths, ori2, upd2) + // test case has different synonym sources + err = compareSynonymSources(ori2, upd2) if err == nil { t.Errorf("Expected error, got nil") } @@ -2475,10 +2415,10 @@ func TestIndexUpdateText(t *testing.T) { t.Fatal(err) } if len(res1.Hits) != 3 { - t.Fatalf("Expected 3 hits, got %d\n", len(res1.Hits)) + t.Errorf("Expected 3 hits, got %d\n", len(res1.Hits)) } if len(res1.Hits[0].Fields) != 1 { - t.Fatalf("Expected 1 field, got %d\n", len(res1.Hits[0].Fields)) + t.Errorf("Expected 1 field, got %d\n", len(res1.Hits[0].Fields)) } q2 := NewSearchRequest(NewQueryStringQuery("b:*")) q2.Fields = append(q2.Fields, "b") @@ -2487,7 +2427,7 @@ func TestIndexUpdateText(t *testing.T) { t.Fatal(err) } if len(res2.Hits) != 0 { - t.Fatalf("Expected 0 hits, got %d\n", len(res2.Hits)) + t.Errorf("Expected 0 hits, got %d\n", len(res2.Hits)) } q3 := NewSearchRequest(NewQueryStringQuery("c:*")) q3.Fields = append(q3.Fields, "c") @@ -2496,10 +2436,10 @@ func TestIndexUpdateText(t *testing.T) { t.Fatal(err) } if len(res3.Hits) != 3 { - t.Fatalf("Expected 3 hits, got %d\n", len(res3.Hits)) + t.Errorf("Expected 3 hits, got %d\n", len(res3.Hits)) } if len(res3.Hits[0].Fields) != 0 { - t.Fatalf("Expected 0 fields, got %d\n", len(res3.Hits[0].Fields)) + t.Errorf("Expected 0 fields, got %d\n", len(res3.Hits[0].Fields)) } q4 := NewSearchRequest(NewQueryStringQuery("d:*")) q4.Fields = append(q4.Fields, "d") @@ -2508,7 +2448,7 @@ func TestIndexUpdateText(t *testing.T) { t.Fatal(err) } if len(res4.Hits) != 0 { - t.Fatalf("Expected 0 hits, got %d\n", len(res4.Hits)) + t.Errorf("Expected 0 hits, got %d\n", len(res4.Hits)) } } @@ -2646,7 +2586,7 @@ func TestIndexUpdateSynonym(t *testing.T) { t.Fatal(err) } if len(res1.Hits) != 1 { - t.Fatalf("Expected 1 hit, got %d\n", len(res1.Hits)) + t.Errorf("Expected 1 hit, got %d\n", len(res1.Hits)) } q2 := NewSearchRequest(NewQueryStringQuery("b:devoted")) @@ -2655,7 +2595,7 @@ func TestIndexUpdateSynonym(t *testing.T) { t.Fatal(err) } if len(res2.Hits) != 0 { - t.Fatalf("Expected 0 hits, got %d\n", len(res2.Hits)) + t.Errorf("Expected 0 hits, got %d\n", len(res2.Hits)) } q3 := NewSearchRequest(NewQueryStringQuery("c:unruffled")) @@ -2664,7 +2604,7 @@ func TestIndexUpdateSynonym(t *testing.T) { t.Fatal(err) } if len(res3.Hits) != 0 { - t.Fatalf("Expected 0 hits, got %d\n", len(res3.Hits)) + t.Errorf("Expected 0 hits, got %d\n", len(res3.Hits)) } } @@ -2877,10 +2817,10 @@ func TestIndexUpdateMerge(t *testing.T) { t.Fatal(err) } if len(res1.Hits) != 10 { - t.Fatalf("Expected 10 hits, got %d\n", len(res1.Hits)) + t.Errorf("Expected 10 hits, got %d\n", len(res1.Hits)) } if len(res1.Hits[0].Fields) != 1 { - t.Fatalf("Expected 1 field, got %d\n", len(res1.Hits[0].Fields)) + t.Errorf("Expected 1 field, got %d\n", len(res1.Hits[0].Fields)) } q2 := NewSearchRequest(NewQueryStringQuery("b:*")) q2.Fields = append(q2.Fields, "b") @@ -2889,7 +2829,7 @@ func TestIndexUpdateMerge(t *testing.T) { t.Fatal(err) } if len(res2.Hits) != 0 { - t.Fatalf("Expected 0 hits, got %d\n", len(res2.Hits)) + t.Errorf("Expected 0 hits, got %d\n", len(res2.Hits)) } q3 := NewSearchRequest(NewQueryStringQuery("c:*")) q3.Fields = append(q3.Fields, "c") @@ -2898,10 +2838,10 @@ func TestIndexUpdateMerge(t *testing.T) { t.Fatal(err) } if len(res3.Hits) != 10 { - t.Fatalf("Expected 10 hits, got %d\n", len(res3.Hits)) + t.Errorf("Expected 10 hits, got %d\n", len(res3.Hits)) } if len(res3.Hits[0].Fields) != 0 { - t.Fatalf("Expected 0 fields, got %d\n", len(res3.Hits[0].Fields)) + t.Errorf("Expected 0 fields, got %d\n", len(res3.Hits[0].Fields)) } q4 := NewSearchRequest(NewQueryStringQuery("d:*")) q4.Fields = append(q4.Fields, "d") @@ -2910,7 +2850,7 @@ func TestIndexUpdateMerge(t *testing.T) { t.Fatal(err) } if len(res4.Hits) != 0 { - t.Fatalf("Expected 0 hits, got %d\n", len(res4.Hits)) + t.Errorf("Expected 0 hits, got %d\n", len(res4.Hits)) } } From b65b4aa5d4382e608512a246b527d65c0e30ddf3 Mon Sep 17 00:00:00 2001 From: Likith B Date: Fri, 14 Mar 2025 17:27:32 +0530 Subject: [PATCH 14/25] MB-57888: Added checks for all dynamic mappings' analysers and datetime parsers --- index_update.go | 21 +++++++++--- index_update_test.go | 79 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 4 deletions(-) diff --git a/index_update.go b/index_update.go index 469ba0244..5eb02ba49 100644 --- a/index_update.go +++ b/index_update.go @@ -28,6 +28,7 @@ type pathInfo struct { fieldMapInfo []*fieldMapInfo dynamic bool path string + analyser string parentPath string } @@ -121,7 +122,7 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.Update return fieldInfo, nil } -// Ensures non of the top level index mapping fields have changed +// Ensures none of the top level index mapping fields have changed func compareMappings(ori, upd *mapping.IndexMappingImpl) error { if ori.TypeField != upd.TypeField && (len(ori.TypeMapping) != 0 || len(upd.TypeMapping) != 0) { @@ -137,11 +138,19 @@ func compareMappings(ori, upd *mapping.IndexMappingImpl) error { } if ori.StoreDynamic != upd.StoreDynamic { - return fmt.Errorf(("store dynamic cannot be changed")) + return fmt.Errorf("store dynamic cannot be changed") } if ori.DocValuesDynamic != upd.DocValuesDynamic { - return fmt.Errorf(("docvalues dynamic cannot be changed")) + return fmt.Errorf("docvalues dynamic cannot be changed") + } + + if ori.DefaultAnalyzer != upd.DefaultAnalyzer && upd.IndexDynamic { + return fmt.Errorf("default analyser cannot be changed if index dynamic is true") + } + + if ori.DefaultDateTimeParser != upd.DefaultDateTimeParser && upd.IndexDynamic { + return fmt.Errorf("default datetime parser cannot be changed if index dynamic is true") } return nil @@ -210,6 +219,7 @@ func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMa fieldMapInfo: make([]*fieldMapInfo, 0), } pInfo.dynamic = mp.Dynamic && im.IndexDynamic + pInfo.analyser = im.AnalyzerNameForPath(name) } pInfo.dynamic = (pInfo.dynamic || mp.Dynamic) && im.IndexDynamic @@ -400,6 +410,9 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) e } } } else { + if upd.dynamic && ori.analyser != upd.analyser { + return fmt.Errorf("analyser has been changed for a dynamic mapping") + } for _, oriFMapInfo := range ori.fieldMapInfo { var updFMap *mapping.FieldMapping var updAnalyser string @@ -424,7 +437,7 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) e return fmt.Errorf("analyser has been changed for a text field") } if updDatetimeParser != "" && oriFMapInfo.datetimeParser != updDatetimeParser { - return fmt.Errorf("datetime parser has been changed for a text field") + return fmt.Errorf("datetime parser has been changed for a date time field") } info, err = compareFieldMapping(oriFMapInfo.fieldMapping, updFMap) if err != nil { diff --git a/index_update_test.go b/index_update_test.go index 85e8ab1b9..fef5ff73f 100644 --- a/index_update_test.go +++ b/index_update_test.go @@ -787,6 +787,85 @@ func TestDeletedFields(t *testing.T) { fieldInfo map[string]*index.UpdateFieldInfo err bool }{ + { + // changed default analyzer with index dynamic + // => error + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{}, + DefaultMapping: &mapping.DocumentMapping{}, + DefaultAnalyzer: standard.Name, + IndexDynamic: true, + StoreDynamic: false, + DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{}, + DefaultMapping: &mapping.DocumentMapping{}, + DefaultAnalyzer: simple.Name, + IndexDynamic: true, + StoreDynamic: false, + DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, + }, + fieldInfo: nil, + err: true, + }, + { + // changed default analyzer within a mapping with index dynamic + // => error + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{}, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: true, + DefaultAnalyzer: standard.Name, + }, + DefaultAnalyzer: "", + IndexDynamic: true, + StoreDynamic: false, + DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{}, + DefaultMapping: &mapping.DocumentMapping{ + Enabled: true, + Dynamic: true, + DefaultAnalyzer: simple.Name, + }, + IndexDynamic: true, + StoreDynamic: false, + DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, + }, + fieldInfo: nil, + err: true, + }, + { + // changed default datetime parser with index dynamic + // => error + original: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{}, + DefaultMapping: &mapping.DocumentMapping{}, + DefaultDateTimeParser: percent.Name, + IndexDynamic: true, + StoreDynamic: false, + DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, + }, + updated: &mapping.IndexMappingImpl{ + TypeMapping: map[string]*mapping.DocumentMapping{}, + DefaultMapping: &mapping.DocumentMapping{}, + DefaultDateTimeParser: sanitized.Name, + IndexDynamic: true, + StoreDynamic: false, + DocValuesDynamic: false, + CustomAnalysis: NewIndexMapping().CustomAnalysis, + }, + fieldInfo: nil, + err: true, + }, { // no change between original and updated having type and default mapping // => empty fieldInfo with no error From 2ff54174db0938af3966c252668dd9e9ac3c97de Mon Sep 17 00:00:00 2001 From: Likith B Date: Thu, 20 Mar 2025 11:11:54 +0530 Subject: [PATCH 15/25] MB-57888: Adding documentation and addressing review comments --- docs/index_update.md | 29 +++++++++++++++++++++++++++++ index/scorch/optimize_knn.go | 3 ++- index/scorch/snapshot_index.go | 14 +++++++++++--- index/scorch/snapshot_segment.go | 1 - 4 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 docs/index_update.md diff --git a/docs/index_update.md b/docs/index_update.md new file mode 100644 index 000000000..1de2ffb43 --- /dev/null +++ b/docs/index_update.md @@ -0,0 +1,29 @@ +# Ability to reduce downtime during index mapping updates + +* *v2.5.0* (and after) will come with support to delete certain fields or parts of the fields without requiring a full rebuild of the index +* We do this by storing which portions of the field has to be deleted within zap and then lazily executing the deletion during subsequent merging of the segments + +## Usage + +While opening an index, if an updated mapping is provided as a string under the key `updated_mapping` within the `runtimeConfig` parameter of `openIndexUsing`, then we open the index and try to update it to use the new mapping provided. + +On failure, we still return a usable index with an error explaining why the update failed. + +## What can be deleted and what can't be deleted? + +* Non updatable changes + * Any additional fields or enabled document mappings in the new index mapping + * Any changes to IncludeInAll, type, IncludeTermvECTORS AND SkipFreqNorm + * Any document mapping having it's enabled value changing from false to true + * Text fields with a different analyser or date time fields with a different date time format + * Vector and VectorBase64 fields changing dims, similarity or vectorIndexOptimizedFor + * Any changes when field is part of `_all` + * Full field deletions when it is covered by any dynamic setting (Index, Store or DocValues Dynamic) + * Any changes to dynamic settings at the top level or any enabled document mapping + * If multiple fields sharing the same field name either from different type mappings or aliases are present, then any non compatible changes across all of these fields +* Updatable changes provided non of the other contitions are hit + * Index, DocValues, Store of a field changing from true to false + * Document mapping being disabled or completely removed + +## How to enforce immediate deletion? +Since the deletion is only done during merging, a [force merge](https://github.com/blevesearch/bleve/blob/b82baf10b205511cf12da5cb24330abd9f5b1b74/index/scorch/merge.go#L164) may be used to completely remove the stale data. \ No newline at end of file diff --git a/index/scorch/optimize_knn.go b/index/scorch/optimize_knn.go index afe7bacad..a5ddb7a9d 100644 --- a/index/scorch/optimize_knn.go +++ b/index/scorch/optimize_knn.go @@ -79,7 +79,8 @@ func (o *OptimizeVR) Finish() error { wg.Done() }() for field, vrs := range o.vrs { - // Noop if the vector field or its index data is supposed to be deleted + // Early exit if the field is supposed to be completely deleted or + // if it's index data has been deleted if info, ok := o.snapshot.updatedFields[field]; ok && (info.Deleted || info.Index) { continue } diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 60a7f367e..72ebf7bdc 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -85,6 +85,11 @@ type IndexSnapshot struct { m3 sync.RWMutex // bm25 metrics specific - not to interfere with TFR creation fieldCardinality map[string]int + // Stores information about zapx fields that have been + // fully deleted (indicated by UpdateFieldInfo.Deleted) or + // partially deleted index, store or docvalues (indicated by + // UpdateFieldInfo.Index or .Store or .DocValues). + // Used to short circuit queries trying to read stale data updatedFields map[string]*index.UpdateFieldInfo } @@ -511,7 +516,8 @@ func (is *IndexSnapshot) Document(id string) (rv index.Document, err error) { // Keeping that TODO for now until we have a cleaner way. rvd.StoredFieldsSize += uint64(len(val)) - // Skip fields that are supposed to have deleted store values + // Skip fields that have been completely deleted or had their + // store data deleted if info, ok := is.updatedFields[name]; ok && (info.Deleted || info.Store) { return true @@ -646,7 +652,8 @@ func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field var dict segment.TermDictionary var err error - // Skip fields that are supposed to have no indexing + // Skip fields that have been completely deleted or had their + // index data deleted if info, ok := is.updatedFields[field]; ok && (info.Index || info.Deleted) { dict, err = s.segment.Dictionary("") @@ -802,7 +809,8 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( } } - // Filter out fields that are supposed to have no doc values + // Filter out fields that have been completely deleted or had their + // docvalues data deleted idx := 0 for _, field := range vFields { if info, ok := is.updatedFields[field]; ok && diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go index d13db344e..17d107497 100644 --- a/index/scorch/snapshot_segment.go +++ b/index/scorch/snapshot_segment.go @@ -149,7 +149,6 @@ func (s *SegmentSnapshot) Size() (rv int) { // Merge given updated field information with existing and pass it on to the segment base func (s *SegmentSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) { - if s.updatedFields == nil { s.updatedFields = updatedFields } else { From 3d6cf2f23f23afa41593f604e761d443ab77fcde Mon Sep 17 00:00:00 2001 From: Likith B Date: Fri, 21 Mar 2025 17:31:34 +0530 Subject: [PATCH 16/25] MB-57888: Addressing review comments and race conditions --- builder.go | 2 +- docs/index_update.md | 46 ++++++++++++-------- index/scorch/persister.go | 76 ++++++++++++++------------------- index/scorch/rollback.go | 9 ++-- index/scorch/scorch.go | 34 ++++++++++----- index/scorch/snapshot_index.go | 28 +++++++------ index_impl.go | 77 +++++++++++++++++++++------------- index_update_test.go | 2 +- util/keys.go | 32 ++++++++++++++ 9 files changed, 187 insertions(+), 119 deletions(-) create mode 100644 util/keys.go diff --git a/builder.go b/builder.go index c92920301..f170317ee 100644 --- a/builder.go +++ b/builder.go @@ -68,7 +68,7 @@ func newBuilder(path string, mapping mapping.IndexMapping, config map[string]int return nil, err } config["internal"] = map[string][]byte{ - string(scorch.MappingInternalKey): mappingBytes, + string(util.MappingInternalKey): mappingBytes, } // do not use real config, as these are options for the builder, diff --git a/docs/index_update.md b/docs/index_update.md index 1de2ffb43..4afd40345 100644 --- a/docs/index_update.md +++ b/docs/index_update.md @@ -1,29 +1,41 @@ # Ability to reduce downtime during index mapping updates -* *v2.5.0* (and after) will come with support to delete certain fields or parts of the fields without requiring a full rebuild of the index +* *v2.5.0* (and after) will come with support to delete or modify any field mapping in the index mapping without requiring a full rebuild of the index * We do this by storing which portions of the field has to be deleted within zap and then lazily executing the deletion during subsequent merging of the segments ## Usage -While opening an index, if an updated mapping is provided as a string under the key `updated_mapping` within the `runtimeConfig` parameter of `openIndexUsing`, then we open the index and try to update it to use the new mapping provided. +While opening an index, if an updated mapping is provided as a string under the key `updated_mapping` within the `runtimeConfig` parameter of `OpenUsing`, then we open the index and try to update it to use the new mapping provided. -On failure, we still return a usable index with an error explaining why the update failed. +If the update fails, the index is unchanged and an error is returned explaining why the update was unsuccessful. ## What can be deleted and what can't be deleted? +Fields can be partially deleted by changing their Index, Store, and DocValues parameters from true to false, or completely removed by deleting the field itself. -* Non updatable changes - * Any additional fields or enabled document mappings in the new index mapping - * Any changes to IncludeInAll, type, IncludeTermvECTORS AND SkipFreqNorm - * Any document mapping having it's enabled value changing from false to true - * Text fields with a different analyser or date time fields with a different date time format - * Vector and VectorBase64 fields changing dims, similarity or vectorIndexOptimizedFor - * Any changes when field is part of `_all` - * Full field deletions when it is covered by any dynamic setting (Index, Store or DocValues Dynamic) - * Any changes to dynamic settings at the top level or any enabled document mapping - * If multiple fields sharing the same field name either from different type mappings or aliases are present, then any non compatible changes across all of these fields -* Updatable changes provided non of the other contitions are hit - * Index, DocValues, Store of a field changing from true to false - * Document mapping being disabled or completely removed +Additionally, document mappings can be deleted either by fully removing them from the index mapping or by setting the Enabled value to false, which deletes all fields defined within that mapping. + +However, if any of the following conditions are met, the index is considered non-updatable. +* Any additional fields or enabled document mappings in the new index mapping +* Any changes to IncludeInAll, type, IncludeTermVectors and SkipFreqNorm +* Any document mapping having it's enabled value changing from false to true +* Text fields with a different analyser or date time fields with a different date time format +* Vector and VectorBase64 fields changing dims, similarity or vectorIndexOptimizedFor +* Any changes when field is part of `_all` +* Full field deletions when it is covered by any dynamic setting (Index, Store or DocValues Dynamic) +* Any changes to dynamic settings at the top level or any enabled document mapping +* If multiple fields sharing the same field name either from different type mappings or aliases are present, then any non compatible changes across all of these fields ## How to enforce immediate deletion? -Since the deletion is only done during merging, a [force merge](https://github.com/blevesearch/bleve/blob/b82baf10b205511cf12da5cb24330abd9f5b1b74/index/scorch/merge.go#L164) may be used to completely remove the stale data. \ No newline at end of file +Since the deletion is only done during merging, a [force merge](https://github.com/blevesearch/bleve/blob/b82baf10b205511cf12da5cb24330abd9f5b1b74/index/scorch/merge.go#L164) may be used to completely remove the stale data. + +## Sample code to update an existing index +``` +newMapping := `` +config := map[string]interface{}{ + "updated_mapping": newMapping +} +index, err := OpenUsing("", config) +if err != nil { + return err +} +``` diff --git a/index/scorch/persister.go b/index/scorch/persister.go index 4edd4a66b..0576884e4 100644 --- a/index/scorch/persister.go +++ b/index/scorch/persister.go @@ -608,7 +608,7 @@ func persistToDirectory(seg segment.UnpersistedSegment, d index.Directory, func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string, segPlugin SegmentPlugin, exclude map[uint64]struct{}, d index.Directory) ( []string, map[uint64]string, error) { - snapshotsBucket, err := tx.CreateBucketIfNotExists(boltSnapshotsBucket) + snapshotsBucket, err := tx.CreateBucketIfNotExists(util.BoltSnapshotsBucket) if err != nil { return nil, nil, err } @@ -619,17 +619,17 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string, } // persist meta values - metaBucket, err := snapshotBucket.CreateBucketIfNotExists(boltMetaDataKey) + metaBucket, err := snapshotBucket.CreateBucketIfNotExists(util.BoltMetaDataKey) if err != nil { return nil, nil, err } - err = metaBucket.Put(boltMetaDataSegmentTypeKey, []byte(segPlugin.Type())) + err = metaBucket.Put(util.BoltMetaDataSegmentTypeKey, []byte(segPlugin.Type())) if err != nil { return nil, nil, err } buf := make([]byte, binary.MaxVarintLen32) binary.BigEndian.PutUint32(buf, segPlugin.Version()) - err = metaBucket.Put(boltMetaDataSegmentVersionKey, buf) + err = metaBucket.Put(util.BoltMetaDataSegmentVersionKey, buf) if err != nil { return nil, nil, err } @@ -643,13 +643,13 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string, if err != nil { return nil, nil, err } - err = metaBucket.Put(boltMetaDataTimeStamp, timeStampBinary) + err = metaBucket.Put(util.BoltMetaDataTimeStamp, timeStampBinary) if err != nil { return nil, nil, err } // persist internal values - internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey) + internalBucket, err := snapshotBucket.CreateBucketIfNotExists(util.BoltInternalKey) if err != nil { return nil, nil, err } @@ -665,7 +665,7 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string, val := make([]byte, 8) bytesWritten := atomic.LoadUint64(&snapshot.parent.stats.TotBytesWrittenAtIndexTime) binary.LittleEndian.PutUint64(val, bytesWritten) - err = internalBucket.Put(TotBytesWrittenKey, val) + err = internalBucket.Put(util.TotBytesWrittenKey, val) if err != nil { return nil, nil, err } @@ -689,7 +689,7 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string, return nil, nil, fmt.Errorf("segment: %s copy err: %v", segPath, err) } filename := filepath.Base(segPath) - err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename)) + err = snapshotSegmentBucket.Put(util.BoltPathKey, []byte(filename)) if err != nil { return nil, nil, err } @@ -705,7 +705,7 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string, return nil, nil, fmt.Errorf("segment: %s persist err: %v", path, err) } newSegmentPaths[segmentSnapshot.id] = path - err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename)) + err = snapshotSegmentBucket.Put(util.BoltPathKey, []byte(filename)) if err != nil { return nil, nil, err } @@ -721,7 +721,7 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string, if err != nil { return nil, nil, fmt.Errorf("error persisting roaring bytes: %v", err) } - err = snapshotSegmentBucket.Put(boltDeletedKey, roaringBuf.Bytes()) + err = snapshotSegmentBucket.Put(util.BoltDeletedKey, roaringBuf.Bytes()) if err != nil { return nil, nil, err } @@ -733,7 +733,7 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string, if err != nil { return nil, nil, err } - err = snapshotSegmentBucket.Put(boltStatsKey, b) + err = snapshotSegmentBucket.Put(util.BoltStatsKey, b) if err != nil { return nil, nil, err } @@ -745,7 +745,7 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string, if err != nil { return nil, nil, err } - err = snapshotSegmentBucket.Put(boltUpdatedFieldsKey, b) + err = snapshotSegmentBucket.Put(util.BoltUpdatedFieldsKey, b) if err != nil { return nil, nil, err } @@ -844,23 +844,9 @@ func zapFileName(epoch uint64) string { // bolt snapshot code -var ( - boltSnapshotsBucket = []byte{'s'} - boltPathKey = []byte{'p'} - boltDeletedKey = []byte{'d'} - boltInternalKey = []byte{'i'} - boltMetaDataKey = []byte{'m'} - boltMetaDataSegmentTypeKey = []byte("type") - boltMetaDataSegmentVersionKey = []byte("version") - boltMetaDataTimeStamp = []byte("timeStamp") - boltStatsKey = []byte("stats") - boltUpdatedFieldsKey = []byte("fields") - TotBytesWrittenKey = []byte("TotBytesWritten") -) - func (s *Scorch) loadFromBolt() error { err := s.rootBolt.View(func(tx *bolt.Tx) error { - snapshots := tx.Bucket(boltSnapshotsBucket) + snapshots := tx.Bucket(util.BoltSnapshotsBucket) if snapshots == nil { return nil } @@ -925,7 +911,7 @@ func (s *Scorch) loadFromBolt() error { // NOTE: this is currently ONLY intended to be used by the command-line tool func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) { err = s.rootBolt.View(func(tx *bolt.Tx) error { - snapshots := tx.Bucket(boltSnapshotsBucket) + snapshots := tx.Bucket(util.BoltSnapshotsBucket) if snapshots == nil { return nil } @@ -953,14 +939,14 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { // first we look for the meta-data bucket, this will tell us // which segment type/version was used for this snapshot // all operations for this scorch will use this type/version - metaBucket := snapshot.Bucket(boltMetaDataKey) + metaBucket := snapshot.Bucket(util.BoltMetaDataKey) if metaBucket == nil { _ = rv.DecRef() return nil, fmt.Errorf("meta-data bucket missing") } - segmentType := string(metaBucket.Get(boltMetaDataSegmentTypeKey)) + segmentType := string(metaBucket.Get(util.BoltMetaDataSegmentTypeKey)) segmentVersion := binary.BigEndian.Uint32( - metaBucket.Get(boltMetaDataSegmentVersionKey)) + metaBucket.Get(util.BoltMetaDataSegmentVersionKey)) err := s.loadSegmentPlugin(segmentType, segmentVersion) if err != nil { _ = rv.DecRef() @@ -970,7 +956,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { var running uint64 c := snapshot.Cursor() for k, _ := c.First(); k != nil; k, _ = c.Next() { - if k[0] == boltInternalKey[0] { + if k[0] == util.BoltInternalKey[0] { internalBucket := snapshot.Bucket(k) if internalBucket == nil { _ = rv.DecRef() @@ -985,7 +971,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { _ = rv.DecRef() return nil, err } - } else if k[0] != boltMetaDataKey[0] { + } else if k[0] != util.BoltMetaDataKey[0] { segmentBucket := snapshot.Bucket(k) if segmentBucket == nil { _ = rv.DecRef() @@ -1014,7 +1000,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { } func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, error) { - pathBytes := segmentBucket.Get(boltPathKey) + pathBytes := segmentBucket.Get(util.BoltPathKey) if pathBytes == nil { return nil, fmt.Errorf("segment path missing") } @@ -1029,7 +1015,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro cachedDocs: &cachedDocs{cache: nil}, cachedMeta: &cachedMeta{meta: nil}, } - deletedBytes := segmentBucket.Get(boltDeletedKey) + deletedBytes := segmentBucket.Get(util.BoltDeletedKey) if deletedBytes != nil { deletedBitmap := roaring.NewBitmap() r := bytes.NewReader(deletedBytes) @@ -1042,7 +1028,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro rv.deleted = deletedBitmap } } - statBytes := segmentBucket.Get(boltStatsKey) + statBytes := segmentBucket.Get(util.BoltStatsKey) if statBytes != nil { var statsMap map[string]map[string]uint64 @@ -1054,7 +1040,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro } rv.stats = stats } - updatedFieldBytes := segmentBucket.Get(boltUpdatedFieldsKey) + updatedFieldBytes := segmentBucket.Get(util.BoltUpdatedFieldsKey) if updatedFieldBytes != nil { var updatedFields map[string]*index.UpdateFieldInfo @@ -1245,7 +1231,7 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) { } }() - snapshots := tx.Bucket(boltSnapshotsBucket) + snapshots := tx.Bucket(util.BoltSnapshotsBucket) if snapshots == nil { return 0, nil } @@ -1355,7 +1341,7 @@ func (s *Scorch) rootBoltSnapshotMetaData() ([]*snapshotMetaData, error) { expirationDuration := time.Duration(s.numSnapshotsToKeep-1) * s.rollbackSamplingInterval err := s.rootBolt.View(func(tx *bolt.Tx) error { - snapshots := tx.Bucket(boltSnapshotsBucket) + snapshots := tx.Bucket(util.BoltSnapshotsBucket) if snapshots == nil { return nil } @@ -1379,11 +1365,11 @@ func (s *Scorch) rootBoltSnapshotMetaData() ([]*snapshotMetaData, error) { if snapshot == nil { continue } - metaBucket := snapshot.Bucket(boltMetaDataKey) + metaBucket := snapshot.Bucket(util.BoltMetaDataKey) if metaBucket == nil { continue } - timeStampBytes := metaBucket.Get(boltMetaDataTimeStamp) + timeStampBytes := metaBucket.Get(util.BoltMetaDataTimeStamp) var timeStamp time.Time err = timeStamp.UnmarshalText(timeStampBytes) if err != nil { @@ -1420,7 +1406,7 @@ func (s *Scorch) rootBoltSnapshotMetaData() ([]*snapshotMetaData, error) { func (s *Scorch) RootBoltSnapshotEpochs() ([]uint64, error) { var rv []uint64 err := s.rootBolt.View(func(tx *bolt.Tx) error { - snapshots := tx.Bucket(boltSnapshotsBucket) + snapshots := tx.Bucket(util.BoltSnapshotsBucket) if snapshots == nil { return nil } @@ -1441,7 +1427,7 @@ func (s *Scorch) RootBoltSnapshotEpochs() ([]uint64, error) { func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) { rv := map[string]struct{}{} err := s.rootBolt.View(func(tx *bolt.Tx) error { - snapshots := tx.Bucket(boltSnapshotsBucket) + snapshots := tx.Bucket(util.BoltSnapshotsBucket) if snapshots == nil { return nil } @@ -1453,14 +1439,14 @@ func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) { } segc := snapshot.Cursor() for segk, _ := segc.First(); segk != nil; segk, _ = segc.Next() { - if segk[0] == boltInternalKey[0] { + if segk[0] == util.BoltInternalKey[0] { continue } segmentBucket := snapshot.Bucket(segk) if segmentBucket == nil { continue } - pathBytes := segmentBucket.Get(boltPathKey) + pathBytes := segmentBucket.Get(util.BoltPathKey) if pathBytes == nil { continue } diff --git a/index/scorch/rollback.go b/index/scorch/rollback.go index 895f939dd..f047762fa 100644 --- a/index/scorch/rollback.go +++ b/index/scorch/rollback.go @@ -19,6 +19,7 @@ import ( "log" "os" + "github.com/blevesearch/bleve/v2/util" bolt "go.etcd.io/bbolt" ) @@ -61,7 +62,7 @@ func RollbackPoints(path string) ([]*RollbackPoint, error) { _ = rootBolt.Close() }() - snapshots := tx.Bucket(boltSnapshotsBucket) + snapshots := tx.Bucket(util.BoltSnapshotsBucket) if snapshots == nil { return nil, nil } @@ -87,7 +88,7 @@ func RollbackPoints(path string) ([]*RollbackPoint, error) { meta := map[string][]byte{} c2 := snapshot.Cursor() for j, _ := c2.First(); j != nil; j, _ = c2.Next() { - if j[0] == boltInternalKey[0] { + if j[0] == util.BoltInternalKey[0] { internalBucket := snapshot.Bucket(j) if internalBucket == nil { err = fmt.Errorf("internal bucket missing") @@ -151,7 +152,7 @@ func Rollback(path string, to *RollbackPoint) error { var found bool var eligibleEpochs []uint64 err = rootBolt.View(func(tx *bolt.Tx) error { - snapshots := tx.Bucket(boltSnapshotsBucket) + snapshots := tx.Bucket(util.BoltSnapshotsBucket) if snapshots == nil { return nil } @@ -193,7 +194,7 @@ func Rollback(path string, to *RollbackPoint) error { } }() - snapshots := tx.Bucket(boltSnapshotsBucket) + snapshots := tx.Bucket(util.BoltSnapshotsBucket) if snapshots == nil { return nil } diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 4ef1a5458..a84b25a08 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -25,6 +25,7 @@ import ( "github.com/RoaringBitmap/roaring/v2" "github.com/blevesearch/bleve/v2/registry" + "github.com/blevesearch/bleve/v2/util" index "github.com/blevesearch/bleve_index_api" segment "github.com/blevesearch/scorch_segment_api/v2" bolt "go.etcd.io/bbolt" @@ -36,8 +37,6 @@ const Version uint8 = 2 var ErrClosed = fmt.Errorf("scorch closed") -var MappingInternalKey = []byte("_mapping") - type Scorch struct { nextSegmentID uint64 stats Stats @@ -76,6 +75,7 @@ type Scorch struct { merges chan *segmentMerge introducerNotifier chan *epochWatcher persisterNotifier chan *epochWatcher + loadedBolt bool rootBolt *bolt.DB asyncTasks sync.WaitGroup @@ -124,6 +124,7 @@ func NewScorch(storeName string, forceMergeRequestCh: make(chan *mergerCtrl, 1), segPlugin: defaultSegmentPlugin, copyScheduled: map[string]int{}, + loadedBolt: false, } forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config) @@ -219,9 +220,11 @@ func (s *Scorch) fireAsyncError(err error) { } func (s *Scorch) Open() error { - err := s.openBolt() - if err != nil { - return err + if !s.loadedBolt { + err := s.openBolt() + if err != nil { + return err + } } s.asyncTasks.Add(1) @@ -958,10 +961,19 @@ func (s *Scorch) UpdateFields(fieldInfo map[string]*index.UpdateFieldInfo, mappi return nil } +func (s *Scorch) OpenMeta() error { + err := s.openBolt() + if err != nil { + return err + } + s.loadedBolt = true + return nil +} + // Merge and update deleted field info and rewrite index mapping func (s *Scorch) updateBolt(fieldInfo map[string]*index.UpdateFieldInfo, mappingBytes []byte) error { return s.rootBolt.Update(func(tx *bolt.Tx) error { - snapshots := tx.Bucket(boltSnapshotsBucket) + snapshots := tx.Bucket(util.BoltSnapshotsBucket) if snapshots == nil { return nil } @@ -976,22 +988,22 @@ func (s *Scorch) updateBolt(fieldInfo map[string]*index.UpdateFieldInfo, mapping snapshot := snapshots.Bucket(k) cc := snapshot.Cursor() for kk, _ := cc.First(); kk != nil; kk, _ = cc.Next() { - if kk[0] == boltInternalKey[0] { + if kk[0] == util.BoltInternalKey[0] { internalBucket := snapshot.Bucket(kk) if internalBucket == nil { return fmt.Errorf("segment key, but bucket missing %x", kk) } - err = internalBucket.Put(MappingInternalKey, mappingBytes) + err = internalBucket.Put(util.MappingInternalKey, mappingBytes) if err != nil { return err } - } else if kk[0] != boltMetaDataKey[0] { + } else if kk[0] != util.BoltMetaDataKey[0] { segmentBucket := snapshot.Bucket(kk) if segmentBucket == nil { return fmt.Errorf("segment key, but bucket missing %x", kk) } var updatedFields map[string]*index.UpdateFieldInfo - updatedFieldBytes := segmentBucket.Get(boltUpdatedFieldsKey) + updatedFieldBytes := segmentBucket.Get(util.BoltUpdatedFieldsKey) if updatedFieldBytes != nil { err := json.Unmarshal(updatedFieldBytes, &updatedFields) if err != nil { @@ -1016,7 +1028,7 @@ func (s *Scorch) updateBolt(fieldInfo map[string]*index.UpdateFieldInfo, mapping if err != nil { return err } - err = segmentBucket.Put(boltUpdatedFieldsKey, b) + err = segmentBucket.Put(util.BoltUpdatedFieldsKey, b) if err != nil { return err } diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 72ebf7bdc..e70107a36 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -810,17 +810,21 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( } // Filter out fields that have been completely deleted or had their - // docvalues data deleted - idx := 0 - for _, field := range vFields { - if info, ok := is.updatedFields[field]; ok && - (info.DocValues || info.Deleted) { - continue + // docvalues data deleted from both visitable fields and required fields + filterUpdatedFields := func(fields []string) []string { + filteredFields := make([]string, 0) + for _, field := range fields { + if info, ok := is.updatedFields[field]; ok && + (info.DocValues || info.Deleted) { + continue + } + filteredFields = append(filteredFields, field) } - vFields[idx] = field - idx++ + return filteredFields } - vFields = vFields[:idx] + + fieldsFiltered := filterUpdatedFields(fields) + vFieldsFiltered := filterUpdatedFields(vFields) var errCh chan error @@ -829,7 +833,7 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( // if the caller happens to know we're on the same segmentIndex // from a previous invocation if cFields == nil { - cFields = subtractStrings(fields, vFields) + cFields = subtractStrings(fieldsFiltered, vFieldsFiltered) if !ss.cachedDocs.hasFields(cFields) { errCh = make(chan error, 1) @@ -844,8 +848,8 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( } } - if ssvOk && ssv != nil && len(vFields) > 0 { - dvs, err = ssv.VisitDocValues(localDocNum, vFields, visitor, dvs) + if ssvOk && ssv != nil && len(vFieldsFiltered) > 0 { + dvs, err = ssv.VisitDocValues(localDocNum, fieldsFiltered, visitor, dvs) if err != nil { return nil, nil, err } diff --git a/index_impl.go b/index_impl.go index cff909f42..303a0dcec 100644 --- a/index_impl.go +++ b/index_impl.go @@ -133,7 +133,7 @@ func newIndexUsing(path string, mapping mapping.IndexMapping, indexType string, if err != nil { return nil, err } - err = rv.i.SetInternal(scorch.MappingInternalKey, mappingBytes) + err = rv.i.SetInternal(util.MappingInternalKey, mappingBytes) if err != nil { return nil, err } @@ -203,15 +203,32 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde if err != nil { return nil, err } - err = rv.i.Open() - if err != nil { - return nil, err - } - defer func(rv *indexImpl) { - if !rv.open { - rv.i.Close() + + var ui index.UpdateIndex + if um != nil { + var ok bool + ui, ok = rv.i.(index.UpdateIndex) + if !ok { + return nil, fmt.Errorf("updated mapping present for unupdatable index") + } + + // Load the meta data from bolt so that we can read the current index + // mapping to compare with + err = ui.OpenMeta() + if err != nil { + return nil, err } - }(rv) + } else { + err = rv.i.Open() + if err != nil { + return nil, err + } + defer func(rv *indexImpl) { + if !rv.open { + rv.i.Close() + } + }(rv) + } // now load the mapping indexReader, err := rv.i.Reader() @@ -224,7 +241,7 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde } }() - mappingBytes, err := indexReader.GetInternal(scorch.MappingInternalKey) + mappingBytes, err := indexReader.GetInternal(util.MappingInternalKey) if err != nil { return nil, err } @@ -235,44 +252,48 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes)) } - // mark the index as open - rv.mutex.Lock() - defer rv.mutex.Unlock() - rv.open = true - // validate the mapping err = im.Validate() if err != nil { - // note even if the mapping is invalid - // we still return an open usable index - return rv, err + // no longer return usable index on error because there + // is a chance the index is not open at this stage + return nil, err } // Validate and update the index with the new mapping - // return usable index with error as to why update failed for any error - if um != nil { - ui, ok := rv.i.(index.UpdateIndex) - if !ok { - return rv, fmt.Errorf("updated mapping present for unupdatable index") - } - + if um != nil && ui != nil { err = um.Validate() if err != nil { - return rv, err + return nil, err } fieldInfo, err := DeletedFields(im, um) if err != nil { - return rv, err + return nil, err } err = ui.UpdateFields(fieldInfo, umBytes) if err != nil { - return rv, err + return nil, err } im = um + + err = rv.i.Open() + if err != nil { + return nil, err + } + defer func(rv *indexImpl) { + if !rv.open { + rv.i.Close() + } + }(rv) } + // mark the index as open + rv.mutex.Lock() + defer rv.mutex.Unlock() + rv.open = true + rv.m = im indexStats.Register(rv) return rv, err diff --git a/index_update_test.go b/index_update_test.go index fef5ff73f..d7bd58f99 100644 --- a/index_update_test.go +++ b/index_update_test.go @@ -2768,7 +2768,7 @@ func TestIndexUpdateMerge(t *testing.T) { } numDocsPerBatch := 1000 - numBatches := 3 + numBatches := 10 var batch *Batch doc := make(map[string]interface{}) diff --git a/util/keys.go b/util/keys.go new file mode 100644 index 000000000..b71a7f48b --- /dev/null +++ b/util/keys.go @@ -0,0 +1,32 @@ +// Copyright (c) 2025 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +var ( + // Bolt keys + BoltSnapshotsBucket = []byte{'s'} + BoltPathKey = []byte{'p'} + BoltDeletedKey = []byte{'d'} + BoltInternalKey = []byte{'i'} + BoltMetaDataKey = []byte{'m'} + BoltMetaDataSegmentTypeKey = []byte("type") + BoltMetaDataSegmentVersionKey = []byte("version") + BoltMetaDataTimeStamp = []byte("timeStamp") + BoltStatsKey = []byte("stats") + BoltUpdatedFieldsKey = []byte("fields") + TotBytesWrittenKey = []byte("TotBytesWritten") + + MappingInternalKey = []byte("_mapping") +) From 00d75b23176a926a42337d7b6b680d8815787451 Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Fri, 12 Sep 2025 14:51:08 -0600 Subject: [PATCH 17/25] Absorb dependent zapx, scorch_segment_api, bleve_index_api commits --- go.mod | 6 +++--- go.sum | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index 13715a3a7..af86a7dee 100644 --- a/go.mod +++ b/go.mod @@ -7,14 +7,14 @@ toolchain go1.23.9 require ( github.com/RoaringBitmap/roaring/v2 v2.4.5 github.com/bits-and-blooms/bitset v1.22.0 - github.com/blevesearch/bleve_index_api v1.2.8 + github.com/blevesearch/bleve_index_api v1.2.9 github.com/blevesearch/geo v0.2.3 github.com/blevesearch/go-faiss v1.0.25 github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 github.com/blevesearch/go-porterstemmer v1.0.3 github.com/blevesearch/goleveldb v1.0.1 github.com/blevesearch/gtreap v0.1.1 - github.com/blevesearch/scorch_segment_api/v2 v2.3.10 + github.com/blevesearch/scorch_segment_api/v2 v2.3.11 github.com/blevesearch/segment v0.9.1 github.com/blevesearch/snowball v0.6.1 github.com/blevesearch/snowballstem v0.9.0 @@ -26,7 +26,7 @@ require ( github.com/blevesearch/zapx/v13 v13.4.2 github.com/blevesearch/zapx/v14 v14.4.2 github.com/blevesearch/zapx/v15 v15.4.2 - github.com/blevesearch/zapx/v16 v16.2.4 + github.com/blevesearch/zapx/v16 v16.2.5-0.20250912204702-6a1c8f089c73 github.com/couchbase/moss v0.2.0 github.com/spf13/cobra v1.8.1 go.etcd.io/bbolt v1.4.0 diff --git a/go.sum b/go.sum index 8f748628e..75317c836 100644 --- a/go.sum +++ b/go.sum @@ -3,8 +3,8 @@ github.com/RoaringBitmap/roaring/v2 v2.4.5/go.mod h1:FiJcsfkGje/nZBZgCu0ZxCPOKD/ github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4= github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= -github.com/blevesearch/bleve_index_api v1.2.8 h1:Y98Pu5/MdlkRyLM0qDHostYo7i+Vv1cDNhqTeR4Sy6Y= -github.com/blevesearch/bleve_index_api v1.2.8/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0= +github.com/blevesearch/bleve_index_api v1.2.9 h1:WqD3kvYwnlYLv8sTdH+AF7n/L4v969Cek68+wZnYj4Q= +github.com/blevesearch/bleve_index_api v1.2.9/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0= github.com/blevesearch/geo v0.2.3 h1:K9/vbGI9ehlXdxjxDRJtoAMt7zGAsMIzc6n8zWcwnhg= github.com/blevesearch/geo v0.2.3/go.mod h1:K56Q33AzXt2YExVHGObtmRSFYZKYGv0JEN5mdacJJR8= github.com/blevesearch/go-faiss v1.0.25 h1:lel1rkOUGbT1CJ0YgzKwC7k+XH0XVBHnCVWahdCXk4U= @@ -20,8 +20,8 @@ github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgY github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA= github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= -github.com/blevesearch/scorch_segment_api/v2 v2.3.10 h1:Yqk0XD1mE0fDZAJXTjawJ8If/85JxnLd8v5vG/jWE/s= -github.com/blevesearch/scorch_segment_api/v2 v2.3.10/go.mod h1:Z3e6ChN3qyN35yaQpl00MfI5s8AxUJbpTR/DL8QOQ+8= +github.com/blevesearch/scorch_segment_api/v2 v2.3.11 h1:bYuEgsyGqgU/gy0/Vk6g1eCUqGBs2r+3bRCv+Cnq2kc= +github.com/blevesearch/scorch_segment_api/v2 v2.3.11/go.mod h1:aAWoeQ3DdoZ3Z5138jXVSd1T/klGwvg11z0pSxrJSEk= github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU= github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw= github.com/blevesearch/snowball v0.6.1 h1:cDYjn/NCH+wwt2UdehaLpr2e4BwLIjN4V/TdLsL+B5A= @@ -44,8 +44,8 @@ github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8= github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k= github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw= -github.com/blevesearch/zapx/v16 v16.2.4 h1:tGgfvleXTAkwsD5mEzgM3zCS/7pgocTCnO1oyAUjlww= -github.com/blevesearch/zapx/v16 v16.2.4/go.mod h1:Rti/REtuuMmzwsI8/C/qIzRaEoSK/wiFYw5e5ctUKKs= +github.com/blevesearch/zapx/v16 v16.2.5-0.20250912204702-6a1c8f089c73 h1:aebhv0dk0sjTGsBe2IMxURfy7L0DaFOW7v4rskxTCUQ= +github.com/blevesearch/zapx/v16 v16.2.5-0.20250912204702-6a1c8f089c73/go.mod h1:T4ydQDpsyQxB5LM04lJN0vP+pzjTgVJH5MmNIPqN0ZA= github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps= github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k= github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o= From 96663d4c36703139212ccdbf42b31279161430d8 Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Fri, 12 Sep 2025 14:53:14 -0600 Subject: [PATCH 18/25] Fix missing refactor for PutUpdatedFields -> SetUpdatedFields --- index/scorch/snapshot_segment.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go index 17d107497..c6f3584cc 100644 --- a/index/scorch/snapshot_segment.go +++ b/index/scorch/snapshot_segment.go @@ -165,7 +165,7 @@ func (s *SegmentSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.Updat } if segment, ok := s.segment.(segment.UpdatableSegment); ok { - segment.PutUpdatedFields(s.updatedFields) + segment.SetUpdatedFields(s.updatedFields) } } From 989127636598cbdf429d1bd23adbbcbb4edb6346 Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Fri, 12 Sep 2025 14:55:36 -0600 Subject: [PATCH 19/25] Use zapx/v16's v16.2.5 --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 04fb88dc8..b7fc7c424 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/blevesearch/zapx/v13 v13.4.2 github.com/blevesearch/zapx/v14 v14.4.2 github.com/blevesearch/zapx/v15 v15.4.2 - github.com/blevesearch/zapx/v16 v16.2.5-0.20250912204702-6a1c8f089c73 + github.com/blevesearch/zapx/v16 v16.2.5 github.com/couchbase/moss v0.2.0 github.com/spf13/cobra v1.8.1 go.etcd.io/bbolt v1.4.0 diff --git a/go.sum b/go.sum index 2e14d3203..d403cca78 100644 --- a/go.sum +++ b/go.sum @@ -44,8 +44,8 @@ github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8= github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k= github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw= -github.com/blevesearch/zapx/v16 v16.2.5-0.20250912204702-6a1c8f089c73 h1:aebhv0dk0sjTGsBe2IMxURfy7L0DaFOW7v4rskxTCUQ= -github.com/blevesearch/zapx/v16 v16.2.5-0.20250912204702-6a1c8f089c73/go.mod h1:T4ydQDpsyQxB5LM04lJN0vP+pzjTgVJH5MmNIPqN0ZA= +github.com/blevesearch/zapx/v16 v16.2.5 h1:xfMrpzYIpAL6JEzLXUQZVXcLrvHe3w7+/YoATZPq6i0= +github.com/blevesearch/zapx/v16 v16.2.5/go.mod h1:T4ydQDpsyQxB5LM04lJN0vP+pzjTgVJH5MmNIPqN0ZA= github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps= github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k= github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o= From a4fbb0c3c0875a21e2a93262cbec99c3fa9e5d6e Mon Sep 17 00:00:00 2001 From: Likith B Date: Wed, 17 Sep 2025 14:53:56 +0530 Subject: [PATCH 20/25] Fixing few test issues --- index/scorch/snapshot_index_vr.go | 4 ++++ index_update_test.go | 24 ++++++++++++++++++++++++ search_knn_test.go | 2 +- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/index/scorch/snapshot_index_vr.go b/index/scorch/snapshot_index_vr.go index 7c6741125..3f2a43a12 100644 --- a/index/scorch/snapshot_index_vr.go +++ b/index/scorch/snapshot_index_vr.go @@ -83,6 +83,10 @@ func (i *IndexSnapshotVectorReader) Next(preAlloced *index.VectorDoc) ( } for i.segmentOffset < len(i.iterators) { + if i.iterators[i.segmentOffset] == nil { + i.segmentOffset++ + continue + } next, err := i.iterators[i.segmentOffset].Next() if err != nil { return nil, err diff --git a/index_update_test.go b/index_update_test.go index d7bd58f99..5d6326576 100644 --- a/index_update_test.go +++ b/index_update_test.go @@ -2486,6 +2486,12 @@ func TestIndexUpdateText(t *testing.T) { if err != nil { t.Fatal(err) } + defer func() { + err := index.Close() + if err != nil { + t.Fatal(err) + } + }() q1 := NewSearchRequest(NewQueryStringQuery("a:*")) q1.Fields = append(q1.Fields, "a") @@ -2658,6 +2664,12 @@ func TestIndexUpdateSynonym(t *testing.T) { if err != nil { t.Fatal(err) } + defer func() { + err := index.Close() + if err != nil { + t.Fatal(err) + } + }() q1 := NewSearchRequest(NewQueryStringQuery("a:devoted")) res1, err := index.Search(q1) @@ -2873,6 +2885,12 @@ func TestIndexUpdateMerge(t *testing.T) { if err != nil { t.Fatal(err) } + defer func() { + err := index.Close() + if err != nil { + t.Fatal(err) + } + }() impl, ok := index.(*indexImpl) if !ok { @@ -3047,6 +3065,12 @@ func BenchmarkIndexUpdateText(b *testing.B) { if err != nil { b.Fatal(err) } + defer func() { + err := index.Close() + if err != nil { + b.Fatal(err) + } + }() b.ResetTimer() diff --git a/search_knn_test.go b/search_knn_test.go index 84cebd398..4dbe25744 100644 --- a/search_knn_test.go +++ b/search_knn_test.go @@ -1882,7 +1882,7 @@ func TestIndexUpdateVector(t *testing.T) { t.Fatalf("Expected 3 hits, got %d\n", len(res1.Hits)) } q2 := NewSearchRequest(NewMatchNoneQuery()) - q2.AddKNN("e", []float32{1, 2, 3, 4}, 3, 1.0) + q2.AddKNN("b", []float32{1, 2, 3, 4}, 3, 1.0) res2, err := index.Search(q2) if err != nil { t.Fatal(err) From 7fcbf77de63110784bca3283dc9e3eb4ac4424d3 Mon Sep 17 00:00:00 2001 From: Likith B Date: Fri, 19 Sep 2025 13:30:55 +0530 Subject: [PATCH 21/25] Minor Changes --- docs/index_update.md | 2 +- index/scorch/scorch.go | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/index_update.md b/docs/index_update.md index 4afd40345..f736dde73 100644 --- a/docs/index_update.md +++ b/docs/index_update.md @@ -1,6 +1,6 @@ # Ability to reduce downtime during index mapping updates -* *v2.5.0* (and after) will come with support to delete or modify any field mapping in the index mapping without requiring a full rebuild of the index +* *v2.5.4* (and after) will come with support to delete or modify any field mapping in the index mapping without requiring a full rebuild of the index * We do this by storing which portions of the field has to be deleted within zap and then lazily executing the deletion during subsequent merging of the segments ## Usage diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index a84b25a08..66849ce36 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -962,6 +962,10 @@ func (s *Scorch) UpdateFields(fieldInfo map[string]*index.UpdateFieldInfo, mappi } func (s *Scorch) OpenMeta() error { + if s.loadedBolt { + return nil + } + err := s.openBolt() if err != nil { return err From 4fab6badff9b04c9a0148db0111bad951240e450 Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 30 Sep 2025 18:49:43 +0530 Subject: [PATCH 22/25] Addressing review comments --- index/scorch/scorch.go | 2 -- index/scorch/snapshot_index.go | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 66849ce36..d63deebfb 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -955,9 +955,7 @@ func (s *Scorch) UpdateFields(fieldInfo map[string]*index.UpdateFieldInfo, mappi return err } // Pass the update field info to all snapshots and segment bases - s.root.m.Lock() s.root.UpdateFieldsInfo(fieldInfo) - s.root.m.Unlock() return nil } diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index e70107a36..c09a7db40 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -1207,6 +1207,9 @@ func (is *IndexSnapshot) UpdateSynonymSearchCount(delta uint64) { // Update current snapshot updated field data as well as pass it on to all segments and segment bases func (is *IndexSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) { + is.m.Lock() + defer is.m.Unlock() + is.MergeUpdateFieldsInfo(updatedFields) for _, segmentSnapshot := range is.segment { From 61c5adc408a09683acb78da6bf6dbb680adc4ea0 Mon Sep 17 00:00:00 2001 From: Likith B Date: Wed, 8 Oct 2025 11:00:53 +0530 Subject: [PATCH 23/25] Addressing review comments --- index/scorch/scorch.go | 17 +++++++---------- index_update.go | 7 +++++++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index d63deebfb..83924978e 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -75,7 +75,6 @@ type Scorch struct { merges chan *segmentMerge introducerNotifier chan *epochWatcher persisterNotifier chan *epochWatcher - loadedBolt bool rootBolt *bolt.DB asyncTasks sync.WaitGroup @@ -124,7 +123,6 @@ func NewScorch(storeName string, forceMergeRequestCh: make(chan *mergerCtrl, 1), segPlugin: defaultSegmentPlugin, copyScheduled: map[string]int{}, - loadedBolt: false, } forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config) @@ -220,7 +218,7 @@ func (s *Scorch) fireAsyncError(err error) { } func (s *Scorch) Open() error { - if !s.loadedBolt { + if s.rootBolt == nil { err := s.openBolt() if err != nil { return err @@ -376,6 +374,7 @@ func (s *Scorch) Close() (err error) { } } s.root = nil + s.rootBolt = nil s.rootLock.Unlock() } @@ -960,15 +959,13 @@ func (s *Scorch) UpdateFields(fieldInfo map[string]*index.UpdateFieldInfo, mappi } func (s *Scorch) OpenMeta() error { - if s.loadedBolt { - return nil + if s.rootBolt == nil { + err := s.openBolt() + if err != nil { + return err + } } - err := s.openBolt() - if err != nil { - return err - } - s.loadedBolt = true return nil } diff --git a/index_update.go b/index_update.go index 5eb02ba49..46d1fde1a 100644 --- a/index_update.go +++ b/index_update.go @@ -153,6 +153,13 @@ func compareMappings(ori, upd *mapping.IndexMappingImpl) error { return fmt.Errorf("default datetime parser cannot be changed if index dynamic is true") } + if ori.ScoringModel != upd.ScoringModel { + if ori.ScoringModel != "" && ori.ScoringModel != "tf-idf" && ori.ScoringModel != "bm25" || + upd.ScoringModel != "" && upd.ScoringModel != "tf-idf" && upd.ScoringModel != "bm25" { + return fmt.Errorf("scoring model can only be changed between \"\", tf-idf and bm25") + } + } + return nil } From c0f8c1635bcd76c9a0448fb03f56a20d7f18ee91 Mon Sep 17 00:00:00 2001 From: Likith B Date: Wed, 8 Oct 2025 21:35:46 +0530 Subject: [PATCH 24/25] Addressing review comments --- index_update.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/index_update.go b/index_update.go index 46d1fde1a..9ae4db8ed 100644 --- a/index_update.go +++ b/index_update.go @@ -154,9 +154,9 @@ func compareMappings(ori, upd *mapping.IndexMappingImpl) error { } if ori.ScoringModel != upd.ScoringModel { - if ori.ScoringModel != "" && ori.ScoringModel != "tf-idf" && ori.ScoringModel != "bm25" || - upd.ScoringModel != "" && upd.ScoringModel != "tf-idf" && upd.ScoringModel != "bm25" { - return fmt.Errorf("scoring model can only be changed between \"\", tf-idf and bm25") + if ori.ScoringModel != "" && ori.ScoringModel != index.TFIDFScoring && ori.ScoringModel != index.BM25Scoring || + upd.ScoringModel != "" && upd.ScoringModel != index.TFIDFScoring && upd.ScoringModel != index.BM25Scoring { + return fmt.Errorf("scoring model can only be changed between \"\", %q and %q", index.TFIDFScoring, index.BM25Scoring) } } From 29ff859f0bada314a0c9b1b52d0be6dce4bc5cad Mon Sep 17 00:00:00 2001 From: Likith B Date: Thu, 9 Oct 2025 12:44:09 +0530 Subject: [PATCH 25/25] Adding commentary --- index_update.go | 1 + 1 file changed, 1 insertion(+) diff --git a/index_update.go b/index_update.go index 9ae4db8ed..fa9789bb1 100644 --- a/index_update.go +++ b/index_update.go @@ -153,6 +153,7 @@ func compareMappings(ori, upd *mapping.IndexMappingImpl) error { return fmt.Errorf("default datetime parser cannot be changed if index dynamic is true") } + // Scoring model changes between "", "tf-idf" and "bm25" require no index changes to be made if ori.ScoringModel != upd.ScoringModel { if ori.ScoringModel != "" && ori.ScoringModel != index.TFIDFScoring && ori.ScoringModel != index.BM25Scoring || upd.ScoringModel != "" && upd.ScoringModel != index.TFIDFScoring && upd.ScoringModel != index.BM25Scoring {