|
| 1 | +setup: |
| 2 | + - requires: |
| 3 | + cluster_features: "mapper.base64_dense_vectors" |
| 4 | + reason: 'base64 encoding for vectors feature required' |
| 5 | + |
| 6 | + - do: |
| 7 | + indices.create: |
| 8 | + index: knn_base64_vector_index |
| 9 | + body: |
| 10 | + settings: |
| 11 | + number_of_shards: 1 |
| 12 | + mappings: |
| 13 | + dynamic: false |
| 14 | + properties: |
| 15 | + my_vector_byte: |
| 16 | + type: dense_vector |
| 17 | + dims: 3 |
| 18 | + index : true |
| 19 | + similarity : l2_norm |
| 20 | + element_type: byte |
| 21 | + my_vector_float: |
| 22 | + type: dense_vector |
| 23 | + dims: 3 |
| 24 | + index: true |
| 25 | + element_type: float |
| 26 | + similarity : l2_norm |
| 27 | + |
| 28 | + # [0.8837743, 0.6310808, 0.7800066] - is encoded as 'P2I/CD8hjoM/R66D' |
| 29 | + # [-128, 127, 10] - is encoded as 'gH8K' |
| 30 | + - do: |
| 31 | + index: |
| 32 | + index: knn_base64_vector_index |
| 33 | + id: "1" |
| 34 | + body: |
| 35 | + my_vector_float: "P2I/CD8hjoM/R66D" |
| 36 | + my_vector_byte: "gH8K" |
| 37 | + |
| 38 | + |
| 39 | + # [0.27721548, 0.9202792 , 0.46455473] - is encoded as 'Po3vMD9rl2s+7doe' |
| 40 | + # [0, 1, 0] - is encoded as 'AAEA' |
| 41 | + - do: |
| 42 | + index: |
| 43 | + index: knn_base64_vector_index |
| 44 | + id: "2" |
| 45 | + body: |
| 46 | + my_vector_float: "Po3vMD9rl2s+7doe" |
| 47 | + my_vector_byte: "AAEA" |
| 48 | + |
| 49 | + - do: |
| 50 | + index: |
| 51 | + index: knn_base64_vector_index |
| 52 | + id: "3" |
| 53 | + body: |
| 54 | + my_vector_float: [0.2509804, -0.039215684, -0.11764706] |
| 55 | + my_vector_byte: [64, -10, -30] |
| 56 | + |
| 57 | + - do: |
| 58 | + indices.refresh: {} |
| 59 | + |
| 60 | +--- |
| 61 | +"Fail to index hex-encoded vector on float field": |
| 62 | + |
| 63 | + # [-128, 127, 10] - is encoded as '807f0a' |
| 64 | + - do: |
| 65 | + catch: bad_request |
| 66 | + index: |
| 67 | + index: knn_base64_vector_index |
| 68 | + id: "5" |
| 69 | + body: |
| 70 | + my_vector_float: "807f0a" |
| 71 | + |
| 72 | +--- |
| 73 | +"Knn retrieve base64 encoded vectors" : |
| 74 | + - do: |
| 75 | + get: |
| 76 | + index: knn_base64_vector_index |
| 77 | + id: "1" |
| 78 | + _source_exclude_vectors: false |
| 79 | + |
| 80 | + - match: { _source.my_vector_float: [0.8837743, 0.6310808, 0.7800066] } |
| 81 | + - match: { _source.my_vector_byte: [-128, 127, 10] } |
| 82 | +--- |
| 83 | +"Base64 bytes infers the dimensions correctly": |
| 84 | + - do: |
| 85 | + indices.create: |
| 86 | + index: knn_base64_vector_index_infer_dims |
| 87 | + body: |
| 88 | + settings: |
| 89 | + number_of_shards: 1 |
| 90 | + mappings: |
| 91 | + dynamic: false |
| 92 | + properties: |
| 93 | + my_vector_byte: |
| 94 | + type: dense_vector |
| 95 | + index : true |
| 96 | + similarity : l2_norm |
| 97 | + element_type: byte |
| 98 | + |
| 99 | + # [-128, 127, 10, 0] - is encoded as 'gH8KAA==' |
| 100 | + - do: |
| 101 | + index: |
| 102 | + index: knn_base64_vector_index_infer_dims |
| 103 | + id: "1" |
| 104 | + body: |
| 105 | + my_vector_byte: "gH8KAA==" |
| 106 | + |
| 107 | + - do: |
| 108 | + cluster.health: |
| 109 | + wait_for_events: languid |
| 110 | + |
| 111 | + - do: |
| 112 | + indices.get_mapping: |
| 113 | + index: knn_base64_vector_index_infer_dims |
| 114 | + |
| 115 | + # sanity |
| 116 | + - match: { knn_base64_vector_index_infer_dims.mappings.properties.my_vector_byte.type: dense_vector } |
| 117 | + - match: { knn_base64_vector_index_infer_dims.mappings.properties.my_vector_byte.index: true } |
| 118 | + - match: { knn_base64_vector_index_infer_dims.mappings.properties.my_vector_byte.dims: 4 } |
| 119 | +--- |
| 120 | +"Base64 floats infers the dimensions correctly": |
| 121 | + - do: |
| 122 | + indices.create: |
| 123 | + index: knn_base64_vector_index_infer_dims |
| 124 | + body: |
| 125 | + settings: |
| 126 | + number_of_shards: 1 |
| 127 | + mappings: |
| 128 | + dynamic: false |
| 129 | + properties: |
| 130 | + my_vector_byte: |
| 131 | + type: dense_vector |
| 132 | + index : true |
| 133 | + similarity : l2_norm |
| 134 | + element_type: float |
| 135 | + |
| 136 | + # [0.8837743, 0.6310808, 0.7800066, 0.0] - is encoded as 'P2I/CD8hjoM/R66DAAAAAA==' |
| 137 | + - do: |
| 138 | + index: |
| 139 | + index: knn_base64_vector_index_infer_dims |
| 140 | + id: "1" |
| 141 | + body: |
| 142 | + my_vector_byte: "P2I/CD8hjoM/R66DAAAAAA==" |
| 143 | + |
| 144 | + - do: |
| 145 | + cluster.health: |
| 146 | + wait_for_events: languid |
| 147 | + - do: |
| 148 | + indices.get_mapping: |
| 149 | + index: knn_base64_vector_index_infer_dims |
| 150 | + |
| 151 | + # sanity |
| 152 | + - match: { knn_base64_vector_index_infer_dims.mappings.properties.my_vector_byte.type: dense_vector } |
| 153 | + - match: { knn_base64_vector_index_infer_dims.mappings.properties.my_vector_byte.index: true } |
| 154 | + - match: { knn_base64_vector_index_infer_dims.mappings.properties.my_vector_byte.dims: 4 } |
| 155 | +--- |
| 156 | +"Retrieve Base64 encoded vectors when exclude vectors from source is false": |
| 157 | + - do: |
| 158 | + indices.create: |
| 159 | + index: knn_base64_vector_index_with_source_vectors |
| 160 | + body: |
| 161 | + settings: |
| 162 | + number_of_shards: 1 |
| 163 | + index: |
| 164 | + mapping: |
| 165 | + exclude_source_vectors: false |
| 166 | + mappings: |
| 167 | + dynamic: false |
| 168 | + properties: |
| 169 | + my_vector_byte: |
| 170 | + type: dense_vector |
| 171 | + dims: 3 |
| 172 | + index : true |
| 173 | + similarity : l2_norm |
| 174 | + element_type: byte |
| 175 | + my_vector_float: |
| 176 | + type: dense_vector |
| 177 | + dims: 3 |
| 178 | + index: true |
| 179 | + element_type: float |
| 180 | + similarity : l2_norm |
| 181 | + |
| 182 | + - do: |
| 183 | + index: |
| 184 | + index: knn_base64_vector_index_with_source_vectors |
| 185 | + id: "1" |
| 186 | + body: |
| 187 | + my_vector_float: "P2I/CD8hjoM/R66D" |
| 188 | + my_vector_byte: "gH8K" |
| 189 | + |
| 190 | + - do: |
| 191 | + index: |
| 192 | + index: knn_base64_vector_index_with_source_vectors |
| 193 | + id: "3" |
| 194 | + body: |
| 195 | + my_vector_float: [0.2509804, -0.039215684, -0.11764706] |
| 196 | + my_vector_byte: [64, -10, -30] |
| 197 | + |
| 198 | + - do: |
| 199 | + indices.refresh: {} |
| 200 | + |
| 201 | + - do: |
| 202 | + search: |
| 203 | + index: knn_base64_vector_index_with_source_vectors |
| 204 | + body: |
| 205 | + query: |
| 206 | + ids: |
| 207 | + values: ["1"] |
| 208 | + _source: false |
| 209 | + fields: |
| 210 | + - my_vector_float |
| 211 | + - my_vector_byte |
| 212 | + |
| 213 | + - match: { hits.hits.0.fields.my_vector_float: ["P2I/CD8hjoM/R66D"] } |
| 214 | + - match: { hits.hits.0.fields.my_vector_byte: ["gH8K"] } |
| 215 | + |
| 216 | + - do: |
| 217 | + search: |
| 218 | + index: knn_base64_vector_index_with_source_vectors |
| 219 | + body: |
| 220 | + query: |
| 221 | + ids: |
| 222 | + values: ["3"] |
| 223 | + _source: false |
| 224 | + fields: |
| 225 | + - my_vector_float |
| 226 | + - my_vector_byte |
| 227 | + |
| 228 | + - match: { hits.hits.0.fields.my_vector_float: [0.2509804, -0.039215684, -0.11764706] } |
| 229 | + - match: { hits.hits.0.fields.my_vector_byte: [64, -10, -30] } |
0 commit comments