|
11 | 11 | import org.elasticsearch.action.ResolvedIndices;
|
12 | 12 | import org.elasticsearch.common.bytes.BytesArray;
|
13 | 13 | import org.elasticsearch.common.settings.Settings;
|
| 14 | +import org.elasticsearch.core.Tuple; |
14 | 15 | import org.elasticsearch.index.query.QueryRewriteContext;
|
15 | 16 | import org.elasticsearch.search.SearchModule;
|
16 | 17 | import org.elasticsearch.search.builder.PointInTimeBuilder;
|
@@ -235,6 +236,270 @@ public void testMultiFieldsParamsRewrite() {
|
235 | 236 | );
|
236 | 237 | }
|
237 | 238 |
|
| 239 | + public void testMultiIndexMultiFieldsParamsRewrite() { |
| 240 | + String indexName = "test-index"; |
| 241 | + String anotherIndexName = "test-another-index"; |
| 242 | + final ResolvedIndices resolvedIndices = createMockResolvedIndices( |
| 243 | + Map.of( |
| 244 | + indexName, |
| 245 | + List.of("semantic_field_1", "semantic_field_2"), |
| 246 | + anotherIndexName, |
| 247 | + List.of("semantic_field_2", "semantic_field_3") |
| 248 | + ), |
| 249 | + null, |
| 250 | + Map.of() // use random and different inference IDs for semantic_text fields |
| 251 | + ); |
| 252 | + |
| 253 | + final QueryRewriteContext queryRewriteContext = new QueryRewriteContext( |
| 254 | + parserConfig(), |
| 255 | + null, |
| 256 | + null, |
| 257 | + TransportVersion.current(), |
| 258 | + RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY, |
| 259 | + resolvedIndices, |
| 260 | + new PointInTimeBuilder(new BytesArray("pitid")), |
| 261 | + null, |
| 262 | + null |
| 263 | + ); |
| 264 | + |
| 265 | + // No wildcards, no per-field boosting |
| 266 | + RRFRetrieverBuilder retriever = new RRFRetrieverBuilder( |
| 267 | + null, |
| 268 | + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), |
| 269 | + "foo", |
| 270 | + DEFAULT_RANK_WINDOW_SIZE, |
| 271 | + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
| 272 | + new float[0] |
| 273 | + ); |
| 274 | + assertMultiIndexMultiFieldsParamsRewrite( |
| 275 | + retriever, |
| 276 | + queryRewriteContext, |
| 277 | + Map.of( |
| 278 | + Map.of("field_1", 1.0f, "field_2", 1.0f), |
| 279 | + List.of(indexName), |
| 280 | + Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f), |
| 281 | + List.of(anotherIndexName) |
| 282 | + ), |
| 283 | + Map.of( |
| 284 | + new Tuple<>("semantic_field_1", List.of(indexName)), |
| 285 | + 1.0f, |
| 286 | + new Tuple<>("semantic_field_2", List.of(indexName)), // field with different inference IDs, we filter on index name |
| 287 | + 1.0f, |
| 288 | + new Tuple<>("semantic_field_2", List.of(anotherIndexName)), |
| 289 | + 1.0f |
| 290 | + ), |
| 291 | + "foo", |
| 292 | + null |
| 293 | + ); |
| 294 | + |
| 295 | + // Glob matching on inference and non-inference fields |
| 296 | + retriever = new RRFRetrieverBuilder( |
| 297 | + null, |
| 298 | + List.of("field_*", "field_1", "*_field_1", "semantic_*"), |
| 299 | + "baz2", |
| 300 | + DEFAULT_RANK_WINDOW_SIZE, |
| 301 | + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
| 302 | + new float[0] |
| 303 | + ); |
| 304 | + assertMultiIndexMultiFieldsParamsRewrite( |
| 305 | + retriever, |
| 306 | + queryRewriteContext, |
| 307 | + Map.of(Map.of("field_*", 1.0f, "field_1", 1.0f, "*_field_1", 1.0f, "semantic_*", 1.0f), List.of()), |
| 308 | + Map.of( |
| 309 | + new Tuple<>("semantic_field_1", List.of(indexName)), |
| 310 | + 1.0f, |
| 311 | + new Tuple<>("semantic_field_2", List.of(indexName)), |
| 312 | + 1.0f, |
| 313 | + new Tuple<>("semantic_field_2", List.of(anotherIndexName)), |
| 314 | + 1.0f, |
| 315 | + new Tuple<>("semantic_field_3", List.of(anotherIndexName)), |
| 316 | + 1.0f |
| 317 | + ), |
| 318 | + "baz2", |
| 319 | + null |
| 320 | + ); |
| 321 | + |
| 322 | + // Non-default rank window size |
| 323 | + retriever = new RRFRetrieverBuilder( |
| 324 | + null, |
| 325 | + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), |
| 326 | + "foo2", |
| 327 | + DEFAULT_RANK_WINDOW_SIZE * 2, |
| 328 | + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
| 329 | + new float[0] |
| 330 | + ); |
| 331 | + assertMultiIndexMultiFieldsParamsRewrite( |
| 332 | + retriever, |
| 333 | + queryRewriteContext, |
| 334 | + Map.of( |
| 335 | + Map.of("field_1", 1.0f, "field_2", 1.0f), |
| 336 | + List.of(indexName), |
| 337 | + Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f), |
| 338 | + List.of(anotherIndexName) |
| 339 | + ), |
| 340 | + Map.of( |
| 341 | + new Tuple<>("semantic_field_1", List.of(indexName)), |
| 342 | + 1.0f, |
| 343 | + new Tuple<>("semantic_field_2", List.of(indexName)), |
| 344 | + 1.0f, |
| 345 | + new Tuple<>("semantic_field_2", List.of(anotherIndexName)), |
| 346 | + 1.0f |
| 347 | + ), |
| 348 | + "foo2", |
| 349 | + null |
| 350 | + ); |
| 351 | + |
| 352 | + // All-fields wildcard |
| 353 | + retriever = new RRFRetrieverBuilder( |
| 354 | + null, |
| 355 | + List.of("*"), |
| 356 | + "qux", |
| 357 | + DEFAULT_RANK_WINDOW_SIZE, |
| 358 | + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
| 359 | + new float[0] |
| 360 | + ); |
| 361 | + assertMultiIndexMultiFieldsParamsRewrite( |
| 362 | + retriever, |
| 363 | + queryRewriteContext, |
| 364 | + Map.of(Map.of("*", 1.0f), List.of()), // no index filter for the lexical retriever |
| 365 | + Map.of( |
| 366 | + new Tuple<>("semantic_field_1", List.of(indexName)), |
| 367 | + 1.0f, |
| 368 | + new Tuple<>("semantic_field_2", List.of(indexName)), |
| 369 | + 1.0f, |
| 370 | + new Tuple<>("semantic_field_2", List.of(anotherIndexName)), |
| 371 | + 1.0f, |
| 372 | + new Tuple<>("semantic_field_3", List.of(anotherIndexName)), |
| 373 | + 1.0f |
| 374 | + ), |
| 375 | + "qux", |
| 376 | + null |
| 377 | + ); |
| 378 | + } |
| 379 | + |
| 380 | + public void testMultiIndexMultiFieldsParamsRewriteWithSameInferenceIds() { |
| 381 | + String indexName = "test-index"; |
| 382 | + String anotherIndexName = "test-another-index"; |
| 383 | + final ResolvedIndices resolvedIndices = createMockResolvedIndices( |
| 384 | + Map.of( |
| 385 | + indexName, |
| 386 | + List.of("semantic_field_1", "semantic_field_2"), |
| 387 | + anotherIndexName, |
| 388 | + List.of("semantic_field_2", "semantic_field_3") |
| 389 | + ), |
| 390 | + null, |
| 391 | + Map.of("semantic_field_2", "common_inference_id") // use the same inference ID for semantic_field_2 |
| 392 | + ); |
| 393 | + |
| 394 | + final QueryRewriteContext queryRewriteContext = new QueryRewriteContext( |
| 395 | + parserConfig(), |
| 396 | + null, |
| 397 | + null, |
| 398 | + TransportVersion.current(), |
| 399 | + RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY, |
| 400 | + resolvedIndices, |
| 401 | + new PointInTimeBuilder(new BytesArray("pitid")), |
| 402 | + null, |
| 403 | + null |
| 404 | + ); |
| 405 | + |
| 406 | + // No wildcards, no per-field boosting |
| 407 | + RRFRetrieverBuilder retriever = new RRFRetrieverBuilder( |
| 408 | + null, |
| 409 | + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), |
| 410 | + "foo", |
| 411 | + DEFAULT_RANK_WINDOW_SIZE, |
| 412 | + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
| 413 | + new float[0] |
| 414 | + ); |
| 415 | + assertMultiIndexMultiFieldsParamsRewrite( |
| 416 | + retriever, |
| 417 | + queryRewriteContext, |
| 418 | + Map.of( |
| 419 | + Map.of("field_1", 1.0f, "field_2", 1.0f), |
| 420 | + List.of(indexName), |
| 421 | + Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f), |
| 422 | + List.of(anotherIndexName) |
| 423 | + ), |
| 424 | + Map.of(new Tuple<>("semantic_field_1", List.of(indexName)), 1.0f, new Tuple<>("semantic_field_2", List.of()), 1.0f), |
| 425 | + "foo", |
| 426 | + null |
| 427 | + ); |
| 428 | + |
| 429 | + // Non-default rank window size |
| 430 | + retriever = new RRFRetrieverBuilder( |
| 431 | + null, |
| 432 | + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), |
| 433 | + "foo2", |
| 434 | + DEFAULT_RANK_WINDOW_SIZE * 2, |
| 435 | + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
| 436 | + new float[0] |
| 437 | + ); |
| 438 | + assertMultiIndexMultiFieldsParamsRewrite( |
| 439 | + retriever, |
| 440 | + queryRewriteContext, |
| 441 | + Map.of( |
| 442 | + Map.of("field_1", 1.0f, "field_2", 1.0f), |
| 443 | + List.of(indexName), |
| 444 | + Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f), |
| 445 | + List.of(anotherIndexName) |
| 446 | + ), |
| 447 | + Map.of(new Tuple<>("semantic_field_1", List.of(indexName)), 1.0f, new Tuple<>("semantic_field_2", List.of()), 1.0f), |
| 448 | + "foo2", |
| 449 | + null |
| 450 | + ); |
| 451 | + |
| 452 | + // Glob matching on inference and non-inference fields |
| 453 | + retriever = new RRFRetrieverBuilder( |
| 454 | + null, |
| 455 | + List.of("field_*", "field_1", "*_field_1", "semantic_*"), |
| 456 | + "baz2", |
| 457 | + DEFAULT_RANK_WINDOW_SIZE, |
| 458 | + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
| 459 | + new float[0] |
| 460 | + ); |
| 461 | + assertMultiIndexMultiFieldsParamsRewrite( |
| 462 | + retriever, |
| 463 | + queryRewriteContext, |
| 464 | + Map.of(Map.of("field_*", 1.0f, "field_1", 1.0f, "*_field_1", 1.0f, "semantic_*", 1.0f), List.of()), |
| 465 | + Map.of( |
| 466 | + new Tuple<>("semantic_field_1", List.of(indexName)), |
| 467 | + 1.0f, |
| 468 | + new Tuple<>("semantic_field_2", List.of()), |
| 469 | + 1.0f, |
| 470 | + new Tuple<>("semantic_field_3", List.of(anotherIndexName)), |
| 471 | + 1.0f |
| 472 | + ), |
| 473 | + "baz2", |
| 474 | + null |
| 475 | + ); |
| 476 | + |
| 477 | + // All-fields wildcard |
| 478 | + retriever = new RRFRetrieverBuilder( |
| 479 | + null, |
| 480 | + List.of("*"), |
| 481 | + "qux", |
| 482 | + DEFAULT_RANK_WINDOW_SIZE, |
| 483 | + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
| 484 | + new float[0] |
| 485 | + ); |
| 486 | + assertMultiIndexMultiFieldsParamsRewrite( |
| 487 | + retriever, |
| 488 | + queryRewriteContext, |
| 489 | + Map.of(Map.of("*", 1.0f), List.of()), // on index filter on the lexical query |
| 490 | + Map.of( |
| 491 | + new Tuple<>("semantic_field_1", List.of(indexName)), |
| 492 | + 1.0f, |
| 493 | + new Tuple<>("semantic_field_2", List.of()), // no index filter since both indices have this field |
| 494 | + 1.0f, |
| 495 | + new Tuple<>("semantic_field_3", List.of(anotherIndexName)), |
| 496 | + 1.0f |
| 497 | + ), |
| 498 | + "qux", |
| 499 | + null |
| 500 | + ); |
| 501 | + } |
| 502 | + |
238 | 503 | public void testSearchRemoteIndex() {
|
239 | 504 | final ResolvedIndices resolvedIndices = createMockResolvedIndices(
|
240 | 505 | Map.of("local-index", List.of()),
|
|
0 commit comments