|
47 | 47 | import java.util.concurrent.atomic.AtomicInteger; |
48 | 48 |
|
49 | 49 | import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; |
50 | | -import static org.elasticsearch.search.rank.RankBuilder.DEFAULT_RANK_WINDOW_SIZE; |
51 | 50 | import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; |
52 | 51 | import static org.hamcrest.CoreMatchers.is; |
53 | 52 | import static org.hamcrest.Matchers.containsString; |
@@ -840,183 +839,4 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws |
840 | 839 | assertThat(numAsyncCalls.get(), equalTo(4)); |
841 | 840 | } |
842 | 841 |
|
843 | | - public void testRRFWithWeightedFields() { |
844 | | - // Test that weighted fields affect ranking as expected |
845 | | - client().prepareIndex(INDEX).setId("1").setSource("title", "elasticsearch guide", "content", "comprehensive tutorial").get(); |
846 | | - client().prepareIndex(INDEX).setId("2").setSource("title", "advanced elasticsearch", "content", "expert guide").get(); |
847 | | - client().prepareIndex(INDEX).setId("3").setSource("title", "tutorial", "content", "elasticsearch basics").get(); |
848 | | - refresh(); |
849 | | - |
850 | | - // First search without weights - baseline |
851 | | - var retrieverNoWeights = new RRFRetrieverBuilder( |
852 | | - null, |
853 | | - List.of("title", "content"), |
854 | | - "elasticsearch", |
855 | | - DEFAULT_RANK_WINDOW_SIZE, |
856 | | - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
857 | | - new float[0] |
858 | | - ); |
859 | | - |
860 | | - var responseNoWeights = client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(retrieverNoWeights)).get(); |
861 | | - |
862 | | - // Second search with title field heavily weighted |
863 | | - var retrieverWithWeights = new RRFRetrieverBuilder( |
864 | | - null, |
865 | | - List.of("title^5.0", "content^1.0"), |
866 | | - "elasticsearch", |
867 | | - DEFAULT_RANK_WINDOW_SIZE, |
868 | | - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
869 | | - new float[0] |
870 | | - ); |
871 | | - |
872 | | - var responseWithWeights = client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(retrieverWithWeights)).get(); |
873 | | - |
874 | | - // Both searches should return the same documents |
875 | | - assertEquals(responseNoWeights.getHits().getTotalHits().value(), responseWithWeights.getHits().getTotalHits().value()); |
876 | | - assertEquals(3L, responseWithWeights.getHits().getTotalHits().value()); |
877 | | - |
878 | | - // Verify that weighting title field more heavily affects the ranking |
879 | | - // Document 2 has "elasticsearch" in title, so should rank higher with title weighted |
880 | | - var hitsWithWeights = responseWithWeights.getHits().getHits(); |
881 | | - assertTrue("Should have results", hitsWithWeights.length > 0); |
882 | | - |
883 | | - // The exact ranking may vary, but we should get consistent results |
884 | | - for (var hit : hitsWithWeights) { |
885 | | - assertTrue("All results should have positive scores", hit.getScore() > 0); |
886 | | - } |
887 | | - } |
888 | | - |
889 | | - public void testRRFWeightValidation() { |
890 | | - // Test that negative weights are properly rejected |
891 | | - var retrieverWithNegativeWeight = new RRFRetrieverBuilder( |
892 | | - null, |
893 | | - List.of("title^-1.0", "content"), |
894 | | - "test", |
895 | | - DEFAULT_RANK_WINDOW_SIZE, |
896 | | - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
897 | | - new float[0] |
898 | | - ); |
899 | | - |
900 | | - var exception = expectThrows( |
901 | | - IllegalArgumentException.class, |
902 | | - () -> client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(retrieverWithNegativeWeight)).get() |
903 | | - ); |
904 | | - |
905 | | - assertThat(exception.getMessage(), containsString("per-field weights must be non-negative")); |
906 | | - } |
907 | | - |
908 | | - public void testRRFZeroWeights() { |
909 | | - // Test that zero weights are accepted but effectively disable the field |
910 | | - client().prepareIndex(INDEX).setId("1").setSource("title", "test document", "content", "content text").get(); |
911 | | - refresh(); |
912 | | - |
913 | | - var retrieverWithZeroWeight = new RRFRetrieverBuilder( |
914 | | - null, |
915 | | - List.of("title^0.0", "content^1.0"), |
916 | | - "test", |
917 | | - DEFAULT_RANK_WINDOW_SIZE, |
918 | | - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
919 | | - new float[0] |
920 | | - ); |
921 | | - |
922 | | - var response = client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(retrieverWithZeroWeight)).get(); |
923 | | - |
924 | | - assertEquals(1L, response.getHits().getTotalHits().value()); |
925 | | - assertTrue("Should find the document via content field", response.getHits().getHits()[0].getScore() > 0); |
926 | | - } |
927 | | - |
928 | | - public void testRRFLargeWeightValues() { |
929 | | - // Test that very large weight values are handled gracefully |
930 | | - client().prepareIndex(INDEX).setId("1").setSource("title", "elasticsearch", "content", "search engine").get(); |
931 | | - client().prepareIndex(INDEX).setId("2").setSource("title", "search", "content", "elasticsearch engine").get(); |
932 | | - refresh(); |
933 | | - |
934 | | - var retrieverWithLargeWeights = new RRFRetrieverBuilder( |
935 | | - null, |
936 | | - List.of("title^1000000.0", "content^1.0"), |
937 | | - "elasticsearch", |
938 | | - DEFAULT_RANK_WINDOW_SIZE, |
939 | | - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
940 | | - new float[0] |
941 | | - ); |
942 | | - |
943 | | - var response = client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(retrieverWithLargeWeights)).get(); |
944 | | - |
945 | | - assertTrue("Should find documents", response.getHits().getTotalHits().value() > 0); |
946 | | - // Verify that large weights don't cause overflow issues |
947 | | - for (var hit : response.getHits().getHits()) { |
948 | | - assertTrue("Scores should be finite", Float.isFinite(hit.getScore())); |
949 | | - assertTrue("Scores should be positive", hit.getScore() > 0); |
950 | | - } |
951 | | - } |
952 | | - |
953 | | - public void testRRFMixedWeightedAndUnweightedFields() { |
954 | | - // Test scenario with both weighted and unweighted fields |
955 | | - client().prepareIndex(INDEX).setId("1").setSource("title", "elasticsearch", "content", "search", "description", "engine").get(); |
956 | | - refresh(); |
957 | | - |
958 | | - var retrieverMixed = new RRFRetrieverBuilder( |
959 | | - null, |
960 | | - List.of("title^3.0", "content", "description^0.5"), // Mixed weights |
961 | | - "elasticsearch", |
962 | | - DEFAULT_RANK_WINDOW_SIZE, |
963 | | - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
964 | | - new float[0] |
965 | | - ); |
966 | | - |
967 | | - var response = client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(retrieverMixed)).get(); |
968 | | - |
969 | | - assertEquals(1L, response.getHits().getTotalHits().value()); |
970 | | - assertTrue("Should find the document", response.getHits().getHits()[0].getScore() > 0); |
971 | | - } |
972 | | - |
973 | | - public void testRRFWeightedFieldsRankingImpact() { |
974 | | - // Test that different weight configurations produce different ranking results |
975 | | - client().prepareIndex(INDEX).setId("1").setSource("title", "elasticsearch search", "content", "powerful engine").get(); |
976 | | - client().prepareIndex(INDEX).setId("2").setSource("title", "search engine", "content", "elasticsearch technology").get(); |
977 | | - refresh(); |
978 | | - |
979 | | - // Title-weighted query |
980 | | - var titleWeightedRetriever = new RRFRetrieverBuilder( |
981 | | - null, |
982 | | - List.of("title^10.0", "content^1.0"), |
983 | | - "elasticsearch", |
984 | | - DEFAULT_RANK_WINDOW_SIZE, |
985 | | - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
986 | | - new float[0] |
987 | | - ); |
988 | | - |
989 | | - var titleWeightedResponse = client().prepareSearch(INDEX) |
990 | | - .setSource(new SearchSourceBuilder().retriever(titleWeightedRetriever)) |
991 | | - .get(); |
992 | | - |
993 | | - // Content-weighted query |
994 | | - var contentWeightedRetriever = new RRFRetrieverBuilder( |
995 | | - null, |
996 | | - List.of("title^1.0", "content^10.0"), |
997 | | - "elasticsearch", |
998 | | - DEFAULT_RANK_WINDOW_SIZE, |
999 | | - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, |
1000 | | - new float[0] |
1001 | | - ); |
1002 | | - |
1003 | | - var contentWeightedResponse = client().prepareSearch(INDEX) |
1004 | | - .setSource(new SearchSourceBuilder().retriever(contentWeightedRetriever)) |
1005 | | - .get(); |
1006 | | - |
1007 | | - // Both should return results |
1008 | | - assertEquals(2L, titleWeightedResponse.getHits().getTotalHits().value()); |
1009 | | - assertEquals(2L, contentWeightedResponse.getHits().getTotalHits().value()); |
1010 | | - |
1011 | | - // Verify that both return finite, positive scores |
1012 | | - for (var hit : titleWeightedResponse.getHits().getHits()) { |
1013 | | - assertTrue("Title-weighted scores should be finite", Float.isFinite(hit.getScore())); |
1014 | | - assertTrue("Title-weighted scores should be positive", hit.getScore() > 0); |
1015 | | - } |
1016 | | - |
1017 | | - for (var hit : contentWeightedResponse.getHits().getHits()) { |
1018 | | - assertTrue("Content-weighted scores should be finite", Float.isFinite(hit.getScore())); |
1019 | | - assertTrue("Content-weighted scores should be positive", hit.getScore() > 0); |
1020 | | - } |
1021 | | - } |
1022 | 842 | } |
0 commit comments