Skip to content

Commit 41a6f37

Browse files
authored
GT-176 Adjust Inverted Index changes (#433)
1 parent ec3fd78 commit 41a6f37

File tree

3 files changed

+63
-19
lines changed

3 files changed

+63
-19
lines changed

collection_indexes.go

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -208,26 +208,60 @@ type EnsureZKDIndexOptions struct {
208208
type InvertedIndexOptions struct {
209209
// Name optional user defined name used for hints in AQL queries
210210
Name string `json:"name"`
211-
// InBackground if true will not hold an exclusive collection lock for the entire index creation period (rocksdb only).
211+
// InBackground This attribute can be set to true to create the index in the background,
212+
// not write-locking the underlying collection for as long as if the index is built in the foreground.
213+
// The default value is false.
212214
InBackground bool `json:"inBackground,omitempty"`
213215
IsNewlyCreated bool `json:"isNewlyCreated,omitempty"`
214216

217+
// The number of threads to use for indexing the fields. Default: 2
215218
Parallelism int `json:"parallelism,omitempty"`
216-
// PrimarySort describes how individual fields are sorted
219+
// PrimarySort You can define a primary sort order to enable an AQL optimization.
220+
// If a query iterates over all documents of a collection, wants to sort them by attribute values, and the (left-most) fields to sort by,
221+
// as well as their sorting direction, match with the primarySort definition, then the SORT operation is optimized away.
217222
PrimarySort InvertedIndexPrimarySort `json:"primarySort,omitempty"`
218-
// StoredValues these values specifies how the index should track values.
223+
// StoredValues The optional storedValues attribute can contain an array of paths to additional attributes to store in the index.
224+
// These additional attributes cannot be used for index lookups or for sorting, but they can be used for projections.
225+
// This allows an index to fully cover more queries and avoid extra document lookups.
219226
StoredValues []StoredValue `json:"storedValues,omitempty"`
220-
// Analyzer to be used for indexing
221-
Analyzer ArangoSearchAnalyzerType `json:"analyzer,omitempty"`
227+
// Analyzer The name of an Analyzer to use by default. This Analyzer is applied to the values of the indexed fields for which you don’t define Analyzers explicitly.
228+
Analyzer string `json:"analyzer,omitempty"`
222229
// Features list of analyzer features, default []
223230
Features []ArangoSearchAnalyzerFeature `json:"features,omitempty"`
224231
// IncludeAllFields If set to true, all fields of this element will be indexed. Defaults to false.
225232
IncludeAllFields bool `json:"includeAllFields,omitempty"`
226233
// TrackListPositions If set to true, values in a listed are treated as separate values. Defaults to false.
227234
TrackListPositions bool `json:"trackListPositions,omitempty"`
235+
// This option only applies if you use the inverted index in a search-alias Views.
236+
// You can set the option to true to get the same behavior as with arangosearch Views regarding the indexing of array values as the default.
237+
// If enabled, both, array and primitive values (strings, numbers, etc.) are accepted. Every element of an array is indexed according to the trackListPositions option.
238+
// If set to false, it depends on the attribute path. If it explicitly expand an array ([*]), then the elements are indexed separately.
239+
// Otherwise, the array is indexed as a whole, but only geopoint and aql Analyzers accept array inputs.
240+
// You cannot use an array expansion if searchField is enabled.
241+
SearchField bool `json:"searchField,omitempty"`
228242
// Fields contains the properties for individual fields of the element.
229243
// The key of the map are field names.
230244
Fields []InvertedIndexField `json:"fields,omitempty"`
245+
// ConsolidationIntervalMsec Wait at least this many milliseconds between applying ‘consolidationPolicy’ to consolidate View data store
246+
// and possibly release space on the filesystem (default: 1000, to disable use: 0).
247+
ConsolidationIntervalMsec *int64 `json:"consolidationIntervalMsec,omitempty"`
248+
// CommitIntervalMsec Wait at least this many milliseconds between committing View data store changes and making
249+
// documents visible to queries (default: 1000, to disable use: 0).
250+
CommitIntervalMsec *int64 `json:"commitIntervalMsec,omitempty"`
251+
// CleanupIntervalStep Wait at least this many commits between removing unused files in the ArangoSearch data directory
252+
// (default: 2, to disable use: 0).
253+
CleanupIntervalStep *int64 `json:"cleanupIntervalStep,omitempty"`
254+
// ConsolidationPolicy The consolidation policy to apply for selecting which segments should be merged (default: {}).
255+
ConsolidationPolicy *ArangoSearchConsolidationPolicy `json:"consolidationPolicy,omitempty"`
256+
// WriteBufferIdle Maximum number of writers (segments) cached in the pool (default: 64, use 0 to disable)
257+
WriteBufferIdle *int64 `json:"writebufferIdle,omitempty"`
258+
// WriteBufferActive Maximum number of concurrent active writers (segments) that perform a transaction.
259+
// Other writers (segments) wait till current active writers (segments) finish (default: 0, use 0 to disable)
260+
WriteBufferActive *int64 `json:"writebufferActive,omitempty"`
261+
// WriteBufferSizeMax Maximum memory byte size per writer (segment) before a writer (segment) flush is triggered.
262+
// 0 value turns off this limit for any writer (buffer) and data will be flushed periodically based on the value defined for the flush thread (ArangoDB server startup option).
263+
// 0 value should be used carefully due to high potential memory consumption (default: 33554432, use 0 to disable)
264+
WriteBufferSizeMax *int64 `json:"writebufferSizeMax,omitempty"`
231265
}
232266

233267
// InvertedIndexPrimarySort defines compression and list of fields to be sorted.
@@ -239,17 +273,30 @@ type InvertedIndexPrimarySort struct {
239273

240274
// InvertedIndexField contains configuration for indexing of the field
241275
type InvertedIndexField struct {
242-
// Name of the field
276+
// Name An attribute path. The . character denotes sub-attributes.
243277
Name string `json:"name"`
244-
// Analyzer optional
245-
Analyzer ArangoSearchAnalyzerType `json:"analyzer,omitempty"`
246-
// IncludeAllFields If set to true, all fields of this element will be indexed. Defaults to false.
278+
// Analyzer indicating the name of an analyzer instance
279+
// Default: the value defined by the top-level analyzer option, or if not set, the default identity Analyzer.
280+
Analyzer string `json:"analyzer,omitempty"`
281+
// IncludeAllFields This option only applies if you use the inverted index in a search-alias Views.
282+
// If set to true, then all sub-attributes of this field are indexed, excluding any sub-attributes that are configured separately by other elements in the fields array (and their sub-attributes). The analyzer and features properties apply to the sub-attributes.
283+
// If set to false, then sub-attributes are ignored. The default value is defined by the top-level includeAllFields option, or false if not set.
247284
IncludeAllFields bool `json:"includeAllFields,omitempty"`
248-
// TrackListPositions If set to true, values in a listed are treated as separate values. Defaults to false.
285+
// SearchField This option only applies if you use the inverted index in a search-alias Views.
286+
// You can set the option to true to get the same behavior as with arangosearch Views regarding the indexing of array values for this field. If enabled, both, array and primitive values (strings, numbers, etc.) are accepted. Every element of an array is indexed according to the trackListPositions option.
287+
// If set to false, it depends on the attribute path. If it explicitly expand an array ([*]), then the elements are indexed separately. Otherwise, the array is indexed as a whole, but only geopoint and aql Analyzers accept array inputs. You cannot use an array expansion if searchField is enabled.
288+
// Default: the value defined by the top-level searchField option, or false if not set.
289+
SearchField bool `json:"searchField,omitempty"`
290+
// TrackListPositions This option only applies if you use the inverted index in a search-alias Views.
291+
// If set to true, then track the value position in arrays for array values. For example, when querying a document like { attr: [ "valueX", "valueY", "valueZ" ] }, you need to specify the array element, e.g. doc.attr[1] == "valueY".
292+
// If set to false, all values in an array are treated as equal alternatives. You don’t specify an array element in queries, e.g. doc.attr == "valueY", and all elements are searched for a match.
293+
// Default: the value defined by the top-level trackListPositions option, or false if not set.
249294
TrackListPositions bool `json:"trackListPositions,omitempty"`
250-
// Features list of analyzer features, default [].
295+
// A list of Analyzer features to use for this field. They define what features are enabled for the analyzer
251296
Features []ArangoSearchAnalyzerFeature `json:"features,omitempty"`
252-
// Nested
297+
// Nested - Index the specified sub-objects that are stored in an array.
298+
// Other than with the fields property, the values get indexed in a way that lets you query for co-occurring values.
299+
// For example, you can search the sub-objects and all the conditions need to be met by a single sub-object instead of across all of them.
253300
// Enterprise-only feature
254301
Nested []InvertedIndexField `json:"nested,omitempty"`
255302
}

test/index_ensure_test.go

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -528,8 +528,6 @@ func TestEnsureInvertedIndex(t *testing.T) {
528528
},
529529
Compression: driver.PrimarySortCompressionLz4,
530530
},
531-
Features: []driver.ArangoSearchAnalyzerFeature{},
532-
StoredValues: []driver.StoredValue{},
533531
Fields: []driver.InvertedIndexField{
534532
{Name: "field1", Features: []driver.ArangoSearchAnalyzerFeature{driver.ArangoSearchAnalyzerFeatureFrequency}, Nested: nil},
535533
{Name: "field2", Features: []driver.ArangoSearchAnalyzerFeature{driver.ArangoSearchAnalyzerFeaturePosition}, TrackListPositions: false, Nested: nil},
@@ -547,8 +545,6 @@ func TestEnsureInvertedIndex(t *testing.T) {
547545
},
548546
Compression: driver.PrimarySortCompressionLz4,
549547
},
550-
Features: []driver.ArangoSearchAnalyzerFeature{},
551-
StoredValues: []driver.StoredValue{},
552548
Fields: []driver.InvertedIndexField{
553549
{Name: "field1", Features: []driver.ArangoSearchAnalyzerFeature{driver.ArangoSearchAnalyzerFeatureFrequency}, Nested: nil},
554550
{Name: "field2", Features: []driver.ArangoSearchAnalyzerFeature{driver.ArangoSearchAnalyzerFeaturePosition}, TrackListPositions: false,
@@ -584,12 +580,13 @@ func TestEnsureInvertedIndex(t *testing.T) {
584580
require.True(t, created)
585581

586582
tc.Options.IsNewlyCreated = true
587-
tc.Options.Analyzer = driver.ArangoSearchAnalyzerTypeIdentity // default value for analyzer
583+
tc.Options.Analyzer = string(driver.ArangoSearchAnalyzerTypeIdentity) // default value for analyzer
588584

589585
requireIdxEquality := func(invertedIdx driver.Index) {
590586
require.Equal(t, driver.InvertedIndex, idx.Type())
591587
require.Equal(t, tc.Options.Name, idx.UserName())
592-
require.Equal(t, tc.Options, idx.InvertedIndexOptions())
588+
require.Equal(t, tc.Options.PrimarySort, idx.InvertedIndexOptions().PrimarySort)
589+
require.Equal(t, tc.Options.Fields, idx.InvertedIndexOptions().Fields)
593590
}
594591
requireIdxEquality(idx)
595592

view_arangosearch.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ type ArangoSearchViewProperties struct {
319319
// CommitInterval ArangoSearch waits at least this many milliseconds between committing view data store changes and making documents visible to queries
320320
CommitInterval *int64 `json:"commitIntervalMsec,omitempty"`
321321

322-
// WriteBufferIdel specifies the maximum number of writers (segments) cached in the pool.
322+
// WriteBufferIdle specifies the maximum number of writers (segments) cached in the pool.
323323
// 0 value turns off caching, default value is 64.
324324
WriteBufferIdel *int64 `json:"writebufferIdle,omitempty"`
325325

0 commit comments

Comments
 (0)