|
47 | 47 | import java.util.Collection; |
48 | 48 | import java.util.List; |
49 | 49 | import java.util.concurrent.atomic.AtomicInteger; |
50 | | -import java.util.stream.Collectors; |
51 | 50 |
|
52 | 51 | import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; |
53 | 52 | import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; |
54 | 53 | import static org.hamcrest.CoreMatchers.is; |
55 | 54 | import static org.hamcrest.Matchers.closeTo; |
56 | | -import static org.hamcrest.Matchers.containsInAnyOrder; |
57 | 55 | import static org.hamcrest.Matchers.containsString; |
58 | 56 | import static org.hamcrest.Matchers.equalTo; |
59 | 57 | import static org.hamcrest.Matchers.instanceOf; |
@@ -858,120 +856,120 @@ public void testLinearRetrieverWithMinScoreValidation() { |
858 | 856 | } |
859 | 857 |
|
860 | 858 | // public void testLinearRetrieverWithMinScoreScenarios() { |
861 | | - // final int rankWindowSize = 10; |
862 | | - |
863 | | - // // Setup test data |
864 | | - // indexDoc(INDEX, "doc_1", TEXT_FIELD, "term1", "views.last30d", 10, "views.all", 100); |
865 | | - // indexDoc(INDEX, "doc_2", TEXT_FIELD, "term1 term2", "views.last30d", 20, "views.all", 200); |
866 | | - // indexDoc(INDEX, "doc_3", TEXT_FIELD, "term1 term2 term3", "views.last30d", 30, "views.all", 300); |
867 | | - // indexDoc(INDEX, "doc_4", TEXT_FIELD, "term4", "views.last30d", 40, "views.all", 400); |
868 | | - // refresh(INDEX); |
869 | | - |
870 | | - // // Create retrievers with different scoring |
871 | | - // StandardRetrieverBuilder retrieverA = new StandardRetrieverBuilder(QueryBuilders.termQuery(TEXT_FIELD, "term1").boost(10.0f)); |
872 | | - // StandardRetrieverBuilder retrieverB = new StandardRetrieverBuilder(QueryBuilders.termQuery(TEXT_FIELD, "term2").boost(1.0f)); |
873 | | - |
874 | | - // float[] weights = new float[] { 1.0f, 1.0f }; |
875 | | - // ScoreNormalizer[] identityNormalizers = LinearRetrieverBuilder.getDefaultNormalizers(2); |
876 | | - |
877 | | - // // Scenario 1: No min_score - all docs returned |
878 | | - // LinearRetrieverBuilder builderNoMinScore = new LinearRetrieverBuilder( |
879 | | - // List.of( |
880 | | - // new CompoundRetrieverBuilder.RetrieverSource(retrieverA, null), |
881 | | - // new CompoundRetrieverBuilder.RetrieverSource(retrieverB, null) |
882 | | - // ), |
883 | | - // rankWindowSize, |
884 | | - // weights, |
885 | | - // identityNormalizers |
886 | | - // ); |
887 | | - |
888 | | - // SearchSourceBuilder sourceNoMinScore = new SearchSourceBuilder().retriever(builderNoMinScore).size(rankWindowSize); |
889 | | - |
890 | | - // ElasticsearchAssertions.assertResponse(client().prepareSearch(INDEX).setSource(sourceNoMinScore), resp -> { |
891 | | - // assertThat(resp.getHits().getTotalHits().value(), equalTo(3L)); // doc_1, doc_2, doc_3 match |
892 | | - // assertThat(resp.getHits().getHits()[0].getId(), equalTo("doc_3")); // term1(10) + term2(1) = 11 |
893 | | - // assertThat(resp.getHits().getHits()[1].getId(), equalTo("doc_2")); // term1(10) + term2(1) = 11 |
894 | | - // assertThat(resp.getHits().getHits()[2].getId(), equalTo("doc_1")); // term1(10) = 10 |
895 | | - // }); |
896 | | - |
897 | | - // // Scenario 2: minScore = 0.0f - all matching docs returned (inclusive) |
898 | | - // LinearRetrieverBuilder builderZeroMinScore = new LinearRetrieverBuilder( |
899 | | - // List.of( |
900 | | - // new CompoundRetrieverBuilder.RetrieverSource(retrieverA, null), |
901 | | - // new CompoundRetrieverBuilder.RetrieverSource(retrieverB, null) |
902 | | - // ), |
903 | | - // rankWindowSize, |
904 | | - // weights, |
905 | | - // identityNormalizers |
906 | | - // ).minScore(0.0f); |
907 | | - |
908 | | - // SearchSourceBuilder sourceZeroMinScore = new SearchSourceBuilder().retriever(builderZeroMinScore).size(rankWindowSize); |
909 | | - |
910 | | - // ElasticsearchAssertions.assertResponse( |
911 | | - // client().prepareSearch(INDEX).setSource(sourceZeroMinScore), |
912 | | - // resp -> assertThat(resp.getHits().getTotalHits().value(), equalTo(3L)) |
913 | | - // ); |
914 | | - |
915 | | - // // Scenario 3: Basic filtering - minScore = 10.5f |
916 | | - // LinearRetrieverBuilder builderFilterBasic = new LinearRetrieverBuilder( |
917 | | - // List.of( |
918 | | - // new CompoundRetrieverBuilder.RetrieverSource(retrieverA, null), |
919 | | - // new CompoundRetrieverBuilder.RetrieverSource(retrieverB, null) |
920 | | - // ), |
921 | | - // rankWindowSize, |
922 | | - // weights, |
923 | | - // identityNormalizers |
924 | | - // ).minScore(10.5f); |
925 | | - |
926 | | - // SearchSourceBuilder sourceFilterBasic = new SearchSourceBuilder().retriever(builderFilterBasic).size(rankWindowSize); |
927 | | - |
928 | | - // ElasticsearchAssertions.assertResponse(client().prepareSearch(INDEX).setSource(sourceFilterBasic), resp -> { |
929 | | - // assertThat(resp.getHits().getTotalHits().value(), equalTo(2L)); // doc_2 and doc_3 have score 11.0 |
930 | | - // List<String> ids = Arrays.stream(resp.getHits().getHits()).map(h -> h.getId()).collect(Collectors.toList()); |
931 | | - // assertThat(ids, containsInAnyOrder("doc_2", "doc_3")); |
932 | | - // }); |
933 | | - |
934 | | - // // Scenario 4: Filter all documents - minScore = 20.0f |
935 | | - // LinearRetrieverBuilder builderFilterAll = new LinearRetrieverBuilder( |
936 | | - // List.of( |
937 | | - // new CompoundRetrieverBuilder.RetrieverSource(retrieverA, null), |
938 | | - // new CompoundRetrieverBuilder.RetrieverSource(retrieverB, null) |
939 | | - // ), |
940 | | - // rankWindowSize, |
941 | | - // weights, |
942 | | - // identityNormalizers |
943 | | - // ).minScore(20.0f); |
944 | | - |
945 | | - // SearchSourceBuilder sourceFilterAll = new SearchSourceBuilder().retriever(builderFilterAll).size(rankWindowSize); |
946 | | - |
947 | | - // ElasticsearchAssertions.assertResponse( |
948 | | - // client().prepareSearch(INDEX).setSource(sourceFilterAll), |
949 | | - // resp -> assertThat(resp.getHits().getTotalHits().value(), equalTo(0L)) |
950 | | - // ); |
951 | | - |
952 | | - // // Scenario 5: Test with MinMax normalization |
953 | | - // StandardRetrieverBuilder retrieverC = new StandardRetrieverBuilder(QueryBuilders.termQuery(TEXT_FIELD, "term1").boost(4.0f)); |
954 | | - // StandardRetrieverBuilder retrieverD = new StandardRetrieverBuilder(QueryBuilders.termQuery(TEXT_FIELD, "term2").boost(1.0f)); |
955 | | - |
956 | | - // ScoreNormalizer[] minMaxNormalizers = new ScoreNormalizer[] { MinMaxScoreNormalizer.INSTANCE, MinMaxScoreNormalizer.INSTANCE }; |
957 | | - |
958 | | - // LinearRetrieverBuilder builderWithNorm = new LinearRetrieverBuilder( |
959 | | - // List.of( |
960 | | - // new CompoundRetrieverBuilder.RetrieverSource(retrieverC, null), |
961 | | - // new CompoundRetrieverBuilder.RetrieverSource(retrieverD, null) |
962 | | - // ), |
963 | | - // rankWindowSize, |
964 | | - // weights, |
965 | | - // minMaxNormalizers |
966 | | - // ).minScore(1.1f); |
967 | | - |
968 | | - // SearchSourceBuilder sourceWithNorm = new SearchSourceBuilder().retriever(builderWithNorm).size(rankWindowSize); |
969 | | - |
970 | | - // ElasticsearchAssertions.assertResponse(client().prepareSearch(INDEX).setSource(sourceWithNorm), resp -> { |
971 | | - // // With MinMax normalization, we expect doc_2 and doc_3 to have scores > 1.1 |
972 | | - // assertThat(resp.getHits().getTotalHits().value(), equalTo(2L)); |
973 | | - // List<String> ids = Arrays.stream(resp.getHits().getHits()).map(h -> h.getId()).collect(Collectors.toList()); |
974 | | - // assertThat(ids, containsInAnyOrder("doc_2", "doc_3")); |
975 | | - // }); |
| 859 | + // final int rankWindowSize = 10; |
| 860 | + |
| 861 | + // // Setup test data |
| 862 | + // indexDoc(INDEX, "doc_1", TEXT_FIELD, "term1", "views.last30d", 10, "views.all", 100); |
| 863 | + // indexDoc(INDEX, "doc_2", TEXT_FIELD, "term1 term2", "views.last30d", 20, "views.all", 200); |
| 864 | + // indexDoc(INDEX, "doc_3", TEXT_FIELD, "term1 term2 term3", "views.last30d", 30, "views.all", 300); |
| 865 | + // indexDoc(INDEX, "doc_4", TEXT_FIELD, "term4", "views.last30d", 40, "views.all", 400); |
| 866 | + // refresh(INDEX); |
| 867 | + |
| 868 | + // // Create retrievers with different scoring |
| 869 | + // StandardRetrieverBuilder retrieverA = new StandardRetrieverBuilder(QueryBuilders.termQuery(TEXT_FIELD, "term1").boost(10.0f)); |
| 870 | + // StandardRetrieverBuilder retrieverB = new StandardRetrieverBuilder(QueryBuilders.termQuery(TEXT_FIELD, "term2").boost(1.0f)); |
| 871 | + |
| 872 | + // float[] weights = new float[] { 1.0f, 1.0f }; |
| 873 | + // ScoreNormalizer[] identityNormalizers = LinearRetrieverBuilder.getDefaultNormalizers(2); |
| 874 | + |
| 875 | + // // Scenario 1: No min_score - all docs returned |
| 876 | + // LinearRetrieverBuilder builderNoMinScore = new LinearRetrieverBuilder( |
| 877 | + // List.of( |
| 878 | + // new CompoundRetrieverBuilder.RetrieverSource(retrieverA, null), |
| 879 | + // new CompoundRetrieverBuilder.RetrieverSource(retrieverB, null) |
| 880 | + // ), |
| 881 | + // rankWindowSize, |
| 882 | + // weights, |
| 883 | + // identityNormalizers |
| 884 | + // ); |
| 885 | + |
| 886 | + // SearchSourceBuilder sourceNoMinScore = new SearchSourceBuilder().retriever(builderNoMinScore).size(rankWindowSize); |
| 887 | + |
| 888 | + // ElasticsearchAssertions.assertResponse(client().prepareSearch(INDEX).setSource(sourceNoMinScore), resp -> { |
| 889 | + // assertThat(resp.getHits().getTotalHits().value(), equalTo(3L)); // doc_1, doc_2, doc_3 match |
| 890 | + // assertThat(resp.getHits().getHits()[0].getId(), equalTo("doc_3")); // term1(10) + term2(1) = 11 |
| 891 | + // assertThat(resp.getHits().getHits()[1].getId(), equalTo("doc_2")); // term1(10) + term2(1) = 11 |
| 892 | + // assertThat(resp.getHits().getHits()[2].getId(), equalTo("doc_1")); // term1(10) = 10 |
| 893 | + // }); |
| 894 | + |
| 895 | + // // Scenario 2: minScore = 0.0f - all matching docs returned (inclusive) |
| 896 | + // LinearRetrieverBuilder builderZeroMinScore = new LinearRetrieverBuilder( |
| 897 | + // List.of( |
| 898 | + // new CompoundRetrieverBuilder.RetrieverSource(retrieverA, null), |
| 899 | + // new CompoundRetrieverBuilder.RetrieverSource(retrieverB, null) |
| 900 | + // ), |
| 901 | + // rankWindowSize, |
| 902 | + // weights, |
| 903 | + // identityNormalizers |
| 904 | + // ).minScore(0.0f); |
| 905 | + |
| 906 | + // SearchSourceBuilder sourceZeroMinScore = new SearchSourceBuilder().retriever(builderZeroMinScore).size(rankWindowSize); |
| 907 | + |
| 908 | + // ElasticsearchAssertions.assertResponse( |
| 909 | + // client().prepareSearch(INDEX).setSource(sourceZeroMinScore), |
| 910 | + // resp -> assertThat(resp.getHits().getTotalHits().value(), equalTo(3L)) |
| 911 | + // ); |
| 912 | + |
| 913 | + // // Scenario 3: Basic filtering - minScore = 10.5f |
| 914 | + // LinearRetrieverBuilder builderFilterBasic = new LinearRetrieverBuilder( |
| 915 | + // List.of( |
| 916 | + // new CompoundRetrieverBuilder.RetrieverSource(retrieverA, null), |
| 917 | + // new CompoundRetrieverBuilder.RetrieverSource(retrieverB, null) |
| 918 | + // ), |
| 919 | + // rankWindowSize, |
| 920 | + // weights, |
| 921 | + // identityNormalizers |
| 922 | + // ).minScore(10.5f); |
| 923 | + |
| 924 | + // SearchSourceBuilder sourceFilterBasic = new SearchSourceBuilder().retriever(builderFilterBasic).size(rankWindowSize); |
| 925 | + |
| 926 | + // ElasticsearchAssertions.assertResponse(client().prepareSearch(INDEX).setSource(sourceFilterBasic), resp -> { |
| 927 | + // assertThat(resp.getHits().getTotalHits().value(), equalTo(2L)); // doc_2 and doc_3 have score 11.0 |
| 928 | + // List<String> ids = Arrays.stream(resp.getHits().getHits()).map(h -> h.getId()).collect(Collectors.toList()); |
| 929 | + // assertThat(ids, containsInAnyOrder("doc_2", "doc_3")); |
| 930 | + // }); |
| 931 | + |
| 932 | + // // Scenario 4: Filter all documents - minScore = 20.0f |
| 933 | + // LinearRetrieverBuilder builderFilterAll = new LinearRetrieverBuilder( |
| 934 | + // List.of( |
| 935 | + // new CompoundRetrieverBuilder.RetrieverSource(retrieverA, null), |
| 936 | + // new CompoundRetrieverBuilder.RetrieverSource(retrieverB, null) |
| 937 | + // ), |
| 938 | + // rankWindowSize, |
| 939 | + // weights, |
| 940 | + // identityNormalizers |
| 941 | + // ).minScore(20.0f); |
| 942 | + |
| 943 | + // SearchSourceBuilder sourceFilterAll = new SearchSourceBuilder().retriever(builderFilterAll).size(rankWindowSize); |
| 944 | + |
| 945 | + // ElasticsearchAssertions.assertResponse( |
| 946 | + // client().prepareSearch(INDEX).setSource(sourceFilterAll), |
| 947 | + // resp -> assertThat(resp.getHits().getTotalHits().value(), equalTo(0L)) |
| 948 | + // ); |
| 949 | + |
| 950 | + // // Scenario 5: Test with MinMax normalization |
| 951 | + // StandardRetrieverBuilder retrieverC = new StandardRetrieverBuilder(QueryBuilders.termQuery(TEXT_FIELD, "term1").boost(4.0f)); |
| 952 | + // StandardRetrieverBuilder retrieverD = new StandardRetrieverBuilder(QueryBuilders.termQuery(TEXT_FIELD, "term2").boost(1.0f)); |
| 953 | + |
| 954 | + // ScoreNormalizer[] minMaxNormalizers = new ScoreNormalizer[] { MinMaxScoreNormalizer.INSTANCE, MinMaxScoreNormalizer.INSTANCE }; |
| 955 | + |
| 956 | + // LinearRetrieverBuilder builderWithNorm = new LinearRetrieverBuilder( |
| 957 | + // List.of( |
| 958 | + // new CompoundRetrieverBuilder.RetrieverSource(retrieverC, null), |
| 959 | + // new CompoundRetrieverBuilder.RetrieverSource(retrieverD, null) |
| 960 | + // ), |
| 961 | + // rankWindowSize, |
| 962 | + // weights, |
| 963 | + // minMaxNormalizers |
| 964 | + // ).minScore(1.1f); |
| 965 | + |
| 966 | + // SearchSourceBuilder sourceWithNorm = new SearchSourceBuilder().retriever(builderWithNorm).size(rankWindowSize); |
| 967 | + |
| 968 | + // ElasticsearchAssertions.assertResponse(client().prepareSearch(INDEX).setSource(sourceWithNorm), resp -> { |
| 969 | + // // With MinMax normalization, we expect doc_2 and doc_3 to have scores > 1.1 |
| 970 | + // assertThat(resp.getHits().getTotalHits().value(), equalTo(2L)); |
| 971 | + // List<String> ids = Arrays.stream(resp.getHits().getHits()).map(h -> h.getId()).collect(Collectors.toList()); |
| 972 | + // assertThat(ids, containsInAnyOrder("doc_2", "doc_3")); |
| 973 | + // }); |
976 | 974 | // } |
977 | 975 | } |
0 commit comments