Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,380 @@
setup:
- requires:
reason: 'synthetic vectors are required'
test_runner_features: [ capabilities ]
capabilities:
- method: GET
path: /_search
capabilities: [ synthetic_vectors_setting ]
- skip:
features: "headers"

- do:
indices.create:
index: test
body:
settings:
index.mapping.synthetic_vectors: true
mappings:
properties:
name:
type: keyword
emb:
type: sparse_vector

nested:
type: nested
properties:
paragraph_id:
type: keyword
emb:
type: sparse_vector

- do:
index:
index: test
id: "1"
body:
name: cow.jpg
emb:
token_1: 2.0
token_2: 3.0

- do:
index:
index: test
id: "2"
body:
name: moose.jpg
nested:
- paragraph_id: 0
emb:
token_1: 2.0
token_2: 3.0
- paragraph_id: 2
emb:
token_3: 2.0
token_2: 3.0
- paragraph_id: 3
emb:
token_3: 2.0
token_7: 3.0
token_1: 4.0

- do:
index:
index: test
id: "3"
body:
name: rabbit.jpg
emb:
token_3: 2.0
token_9: 3.0
token_2: 4.0

- do:
index:
index: test
id: "4"
body:
name: zoolander.jpg
nested:
- paragraph_id: 0
emb:
token_3: 2.0
token_7: 3.0
token_1: 4.0
- paragraph_id: 1
- paragraph_id: 2
emb:
token_8: 2.0

- do:
indices.refresh: {}

---
"exclude synthetic vectors":
- do:
search:
index: test
body:
sort: ["name"]

- match: { hits.hits.0._id: "1"}
- match: { hits.hits.0._source.name: "cow.jpg"}
- not_exists: hits.hits.0._source.emb

- match: { hits.hits.1._id: "2"}
- match: { hits.hits.1._source.name: "moose.jpg"}
- length: { hits.hits.1._source.nested: 3 }
- not_exists: hits.hits.1._source.nested.0.emb
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
- not_exists: hits.hits.1._source.nested.1.emb
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
- not_exists: hits.hits.1._source.nested.2.emb
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }

- match: { hits.hits.2._id: "3" }
- match: { hits.hits.2._source.name: "rabbit.jpg" }
- not_exists: hits.hits.2._source.emb

- match: { hits.hits.3._id: "4" }
- match: { hits.hits.3._source.name: "zoolander.jpg" }
- length: { hits.hits.3._source.nested: 3 }
- not_exists: hits.hits.3._source.nested.0.emb
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
- not_exists: hits.hits.3._source.nested.2.emb
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }

---
"include synthetic vectors":
- do:
search:
index: test
body:
_source:
exclude_vectors: false
sort: ["name"]

- match: { hits.hits.0._id: "1"}
- match: { hits.hits.0._source.name: "cow.jpg"}
- exists: hits.hits.0._source.emb

- match: { hits.hits.1._id: "2"}
- match: { hits.hits.1._source.name: "moose.jpg"}
- length: { hits.hits.1._source.nested: 3 }
- exists: hits.hits.1._source.nested.0.emb
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
- exists: hits.hits.1._source.nested.1.emb
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
- exists: hits.hits.1._source.nested.2.emb
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }

- match: { hits.hits.2._id: "3" }
- match: { hits.hits.2._source.name: "rabbit.jpg" }
- exists: hits.hits.2._source.emb

- match: { hits.hits.3._id: "4" }
- match: { hits.hits.3._source.name: "zoolander.jpg" }
- length: { hits.hits.3._source.nested: 3 }
- exists: hits.hits.3._source.nested.0.emb
- length: { hits.hits.3._source.nested.0.emb: 3 }
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }

- do:
search:
index: test
body:
_source:
exclude_vectors: false
includes: nested.emb
sort: ["name"]

- match: { hits.hits.0._id: "1"}
- length: { hits.hits.0._source: 0}

- match: { hits.hits.1._id: "2"}
- length: { hits.hits.3._source: 1 }
- length: { hits.hits.1._source.nested: 3 }
- exists: hits.hits.1._source.nested.0.emb
- not_exists: hits.hits.1._source.nested.0.paragraph_id
- exists: hits.hits.1._source.nested.1.emb
- not_exists: hits.hits.1._source.nested.1.paragraph_id
- exists: hits.hits.1._source.nested.2.emb
- not_exists: hits.hits.1._source.nested.2.paragraph_id

- match: { hits.hits.2._id: "3" }
- length: { hits.hits.2._source: 0}

- match: { hits.hits.3._id: "4" }
- length: { hits.hits.3._source: 1 }
- length: { hits.hits.3._source.nested: 2 }
- exists: hits.hits.3._source.nested.0.emb
- length: { hits.hits.3._source.nested.0.emb: 3 }
- not_exists: hits.hits.3._source.nested.0.paragraph_id
- exists: hits.hits.3._source.nested.1.emb
- length: { hits.hits.3._source.nested.1.emb: 1 }
- not_exists: hits.hits.3._source.nested.1.paragraph_id

- do:
headers:
# Force JSON content type so that we use a parser that interprets the embeddings as doubles
Content-Type: application/json
search:
index: test
body:
_source:
exclude_vectors: true
sort: ["name"]
fields: ["emb"]

- match: { hits.hits.0._id: "1"}
- match: { hits.hits.0._source.name: "cow.jpg"}
- not_exists: hits.hits.0._source.emb
- length: { hits.hits.0.fields.emb: 1}
- length: { hits.hits.0.fields.emb.0: 2}
- match: { hits.hits.0.fields.emb.0.token_1: 2.0}
- match: { hits.hits.0.fields.emb.0.token_2: 3.0}

- match: { hits.hits.1._id: "2"}
- match: { hits.hits.1._source.name: "moose.jpg"}
- length: { hits.hits.1._source.nested: 3 }
- not_exists: hits.hits.1._source.nested.0.emb

- match: { hits.hits.2._id: "3" }
- match: { hits.hits.2._source.name: "rabbit.jpg" }
- length: { hits.hits.2.fields.emb: 1}
- length: { hits.hits.2.fields.emb.0: 3}
- match: { hits.hits.2.fields.emb.0.token_2: 4.0}
- match: { hits.hits.2.fields.emb.0.token_3: 2.0}
- match: { hits.hits.2.fields.emb.0.token_9: 3.0}

- match: { hits.hits.3._id: "4" }
- match: { hits.hits.3._source.name: "zoolander.jpg" }
- length: { hits.hits.3._source.nested: 3 }
- not_exists: hits.hits.3._source.nested.0.emb


---
"Bulk partial update with synthetic vectors":
- do:
headers:
# Force JSON content type so that we use a parser that interprets the embeddings as doubles
Content-Type: application/json
bulk:
index: test
_source: true
body:
- '{"update": {"_id": "4"}}'
- >
{
"doc": {
"name": "zoolander2.jpg",
"emb": {
"token_12": 2.0,
"token_13": 1.0
}
}
}

- length: { items.0.update.get._source.emb: 2 }
- match: { items.0.update.get._source.emb.token_12: 2.0 }
- match: { items.0.update.get._source.emb.token_13: 1.0 }
- exists: items.0.update.get._source.nested
- length: { items.0.update.get._source.nested: 3}
- exists: items.0.update.get._source.nested.0.emb
- match: { items.0.update.get._source.nested.0.paragraph_id: 0 }
- length: { items.0.update.get._source.nested.0.emb: 3 }
- not_exists: items.0.update.get._source.nested.1.emb
- match: { items.0.update.get._source.nested.1.paragraph_id: 1 }
- exists: items.0.update.get._source.nested.2.emb
- length: { items.0.update.get._source.nested.2.emb: 1 }
- match: { items.0.update.get._source.nested.2.paragraph_id: 2 }
- set: { items.0.update.get._source.nested: original_nested }

- do:
headers:
# Force JSON content type so that we use a parser that interprets the embeddings as doubles
Content-Type: application/json
get:
_source_exclude_vectors: false
index: test
id: "4"

- match: { _source.name: zoolander2.jpg }
- length: { _source.emb: 2 }
- match: { _source.emb.token_12: 2.0 }
- match: { _source.emb.token_13: 1.0 }
- match: { _source.nested: $original_nested }

- do:
indices.refresh: {}

- do:
headers:
# Force JSON content type so that we use a parser that interprets the embeddings as doubles
Content-Type: application/json
search:
index: test
body:
_source:
"exclude_vectors": false
query:
term:
_id: 4

- match: { hits.total.value: 1 }
- match: { hits.total.relation: eq }
- match: { hits.hits.0._source.name: zoolander2.jpg }
- match: { hits.hits.0._source.nested: $original_nested }

---
"Partial update with synthetic vectors":
- do:
headers:
# Force JSON content type so that we use a parser that interprets the vectors as doubles
Content-Type: application/json
update:
index: test
id: "4"
body:
_source: true
doc: {
"name": "zoolander3.jpg",
"emb": {
"token_3": 2.0,
"token_9": 2.5
}
}

- length: { get._source.emb: 2 }
- match: { get._source.emb.token_3: 2.0 }
- match: { get._source.emb.token_9: 2.5 }
- exists: get._source.nested
- length: { get._source.nested: 3}
- exists: get._source.nested.0.emb
- match: { get._source.nested.0.paragraph_id: 0 }
- length: { get._source.nested.0.emb: 3 }
- not_exists: get._source.nested.1.emb
- match: { get._source.nested.1.paragraph_id: 1 }
- exists: get._source.nested.2.emb
- length: { get._source.nested.2.emb: 1 }
- match: { get._source.nested.2.paragraph_id: 2 }
- set: { get._source.nested: original_nested }

- do:
headers:
# Force JSON content type so that we use a parser that interprets the vectors as doubles
Content-Type: application/json
get:
_source_exclude_vectors: false
index: test
id: "4"

- length: { _source.emb: 2 }
- match: { _source.emb.token_3: 2.0 }
- match: { _source.emb.token_9: 2.5 }
- match: { _source.name: zoolander3.jpg }
- match: { _source.nested: $original_nested }

- do:
indices.refresh: {}

- do:
headers:
# Force JSON content type so that we use a parser that interprets the vectors as doubles
Content-Type: application/json
search:
index: test
body:
_source:
"exclude_vectors": false
query:
term:
_id: 4

- match: { hits.total.value: 1 }
- match: { hits.total.relation: eq }
- match: { hits.hits.0._source.name: zoolander3.jpg }
- match: { hits.hits.0._source.nested: $original_nested }
Loading