Skip to content

Commit 9357340

Browse files
committed
Add support for synthetic vectors in dense vector field mapper
1 parent 6c584e9 commit 9357340

File tree

8 files changed

+812
-62
lines changed

8 files changed

+812
-62
lines changed
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
setup:
2+
- requires:
3+
reason: 'synthetic vectors are required'
4+
test_runner_features: [ capabilities ]
5+
capabilities:
6+
- method: GET
7+
path: /_search
8+
capabilities: [ synthetic_vectors_setting ]
9+
- skip:
10+
features: "headers"
11+
12+
- do:
13+
indices.create:
14+
index: test
15+
body:
16+
settings:
17+
index.mapping.synthetic_vectors: true
18+
mappings:
19+
properties:
20+
name:
21+
type: keyword
22+
vector:
23+
type: dense_vector
24+
dims: 5
25+
similarity: l2_norm
26+
27+
nested:
28+
type: nested
29+
properties:
30+
paragraph_id:
31+
type: keyword
32+
vector:
33+
type: dense_vector
34+
dims: 5
35+
similarity: l2_norm
36+
37+
- do:
38+
index:
39+
index: test
40+
id: "1"
41+
body:
42+
name: cow.jpg
43+
vector: [36, 267, -311, 12, -202]
44+
45+
- do:
46+
index:
47+
index: test
48+
id: "2"
49+
body:
50+
name: moose.jpg
51+
nested:
52+
- paragraph_id: 0
53+
vector: [-0.5, 100.0, -13, 14.8, -156.0]
54+
- paragraph_id: 2
55+
vector: [0, 100.0, 0, 14.8, -156.0]
56+
- paragraph_id: 3
57+
vector: [0, 1.0, 0, 1.8, -15.0]
58+
59+
- do:
60+
index:
61+
index: test
62+
id: "3"
63+
body:
64+
name: rabbit.jpg
65+
vector: [-0.5, 100.0, -13, 14.8, -156.0]
66+
67+
- do:
68+
index:
69+
index: test
70+
id: "4"
71+
body:
72+
name: zoolander.jpg
73+
nested:
74+
- paragraph_id: 0
75+
vector: [ -0.5, 100.0, -13, 14.8, -156.0 ]
76+
- paragraph_id: 1
77+
- paragraph_id: 2
78+
vector: [ -9.8, 109, 32, 14.8, 23 ]
79+
80+
81+
- do:
82+
indices.refresh: {}
83+
84+
---
85+
"exclude synthetic vectors":
86+
- do:
87+
search:
88+
index: test
89+
body:
90+
sort: ["name"]
91+
92+
- match: { hits.hits.0._id: "1"}
93+
- match: { hits.hits.0._source.name: "cow.jpg"}
94+
- not_exists: hits.hits.0._source.vector
95+
96+
- match: { hits.hits.1._id: "2"}
97+
- match: { hits.hits.1._source.name: "moose.jpg"}
98+
- length: { hits.hits.1._source.nested: 3 }
99+
- not_exists: hits.hits.1._source.nested.0.vector
100+
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
101+
- not_exists: hits.hits.1._source.nested.1.vector
102+
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
103+
- not_exists: hits.hits.1._source.nested.2.vector
104+
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }
105+
106+
- match: { hits.hits.2._id: "3" }
107+
- match: { hits.hits.2._source.name: "rabbit.jpg" }
108+
- not_exists: hits.hits.2._source.vector
109+
110+
- match: { hits.hits.3._id: "4" }
111+
- match: { hits.hits.3._source.name: "zoolander.jpg" }
112+
- length: { hits.hits.3._source.nested: 3 }
113+
- not_exists: hits.hits.3._source.nested.0.vector
114+
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
115+
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
116+
- not_exists: hits.hits.3._source.nested.2.vector
117+
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }
118+
119+
---
120+
"include synthetic vectors":
121+
- do:
122+
search:
123+
index: test
124+
body:
125+
_source:
126+
exclude_vectors: false
127+
sort: ["name"]
128+
129+
- match: { hits.hits.0._id: "1"}
130+
- match: { hits.hits.0._source.name: "cow.jpg"}
131+
- exists: hits.hits.0._source.vector
132+
133+
- match: { hits.hits.1._id: "2"}
134+
- match: { hits.hits.1._source.name: "moose.jpg"}
135+
- length: { hits.hits.1._source.nested: 3 }
136+
- exists: hits.hits.1._source.nested.0.vector
137+
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
138+
- exists: hits.hits.1._source.nested.1.vector
139+
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
140+
- exists: hits.hits.1._source.nested.2.vector
141+
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }
142+
143+
- match: { hits.hits.2._id: "3" }
144+
- match: { hits.hits.2._source.name: "rabbit.jpg" }
145+
- exists: hits.hits.2._source.vector
146+
147+
- match: { hits.hits.3._id: "4" }
148+
- match: { hits.hits.3._source.name: "zoolander.jpg" }
149+
- length: { hits.hits.3._source.nested: 3 }
150+
- exists: hits.hits.3._source.nested.0.vector
151+
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }

server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.elasticsearch.common.xcontent.XContentHelper;
1818
import org.elasticsearch.core.Nullable;
1919
import org.elasticsearch.features.NodeFeature;
20+
import org.elasticsearch.index.IndexSettings;
2021
import org.elasticsearch.index.IndexVersion;
2122
import org.elasticsearch.index.IndexVersions;
2223
import org.elasticsearch.index.fielddata.FieldDataContext;
@@ -804,7 +805,8 @@ private static void postProcessDynamicArrayMapping(DocumentParserContext context
804805

805806
DenseVectorFieldMapper.Builder builder = new DenseVectorFieldMapper.Builder(
806807
fieldName,
807-
context.indexSettings().getIndexVersionCreated()
808+
context.indexSettings().getIndexVersionCreated(),
809+
IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(context.indexSettings().getSettings())
808810
);
809811
builder.dimensions(mappers.size());
810812
DenseVectorFieldMapper denseVectorFieldMapper = builder.build(builderContext);

0 commit comments

Comments
 (0)