@@ -907,6 +907,187 @@ TEST_F(SearchTest, NotImplementedSearchTypes) {
907
907
<< " Infix search should return a not implemented error" ;
908
908
}
909
909
910
+ // Enumeration for different search types
911
+ enum class SearchType { PREFIX = 0 , SUFFIX = 1 , INFIX = 2 };
912
+
913
+ // Helper function to generate content with ASCII characters
914
+ static std::string GenerateWordSequence (size_t word_count, size_t doc_offset = 0 ) {
915
+ std::string content;
916
+ for (size_t i = 0 ; i < word_count; ++i) {
917
+ std::string word;
918
+ char start_char = ' a' + ((doc_offset + i) % 26 );
919
+ size_t word_len = 3 + (i % 5 ); // Word length 3-7 chars
920
+
921
+ for (size_t j = 0 ; j < word_len; ++j) {
922
+ char c = start_char + (j % 26 );
923
+ if (c > ' z' )
924
+ c = ' a' + (c - ' z' - 1 );
925
+ word += c;
926
+ }
927
+
928
+ if (i > 0 )
929
+ content += " " ;
930
+ content += word;
931
+ }
932
+ return content;
933
+ }
934
+
935
+ // Helper function to generate pattern with variety
936
+ static std::string GeneratePattern (SearchType search_type, size_t pattern_len, bool use_uniform) {
937
+ if (use_uniform) {
938
+ // Original uniform pattern for comparison
939
+ switch (search_type) {
940
+ case SearchType::PREFIX:
941
+ return std::string (pattern_len, ' p' );
942
+ case SearchType::SUFFIX:
943
+ return std::string (pattern_len, ' s' );
944
+ case SearchType::INFIX:
945
+ return std::string (pattern_len, ' i' );
946
+ }
947
+ } else {
948
+ // Diverse ASCII pattern
949
+ std::string pattern;
950
+ char base_char = (search_type == SearchType::PREFIX) ? ' p'
951
+ : (search_type == SearchType::SUFFIX) ? ' s'
952
+ : ' i' ;
953
+
954
+ for (size_t i = 0 ; i < pattern_len; ++i) {
955
+ char c = base_char + (i % 10 ); // Use variety of chars
956
+ if (c > ' z' )
957
+ c = ' a' + (c - ' z' - 1 );
958
+ pattern += c;
959
+ }
960
+ return pattern;
961
+ }
962
+ return " " ;
963
+ }
964
+
965
+ static void BM_SearchByTypeImpl (benchmark::State& state, bool use_diverse_pattern) {
966
+ size_t num_docs = state.range (0 );
967
+ size_t pattern_len = state.range (1 );
968
+ SearchType search_type = static_cast <SearchType>(state.range (2 ));
969
+
970
+ auto schema = MakeSimpleSchema ({{" title" , SchemaField::TEXT}});
971
+ FieldIndices indices{schema, kEmptyOptions , PMR_NS::get_default_resource (), nullptr };
972
+
973
+ // Generate pattern
974
+ std::string pattern = GeneratePattern (search_type, pattern_len, !use_diverse_pattern);
975
+ std::string search_type_name = (search_type == SearchType::PREFIX) ? " prefix"
976
+ : (search_type == SearchType::SUFFIX) ? " suffix"
977
+ : " infix" ;
978
+
979
+ // Generate test data with more realistic content
980
+ for (size_t i = 0 ; i < num_docs; i++) {
981
+ std::string content;
982
+ if (i < num_docs / 2 ) {
983
+ // Half documents have the pattern in appropriate position
984
+ std::string base_content = GenerateWordSequence (5 + (i % 5 ), i);
985
+
986
+ switch (search_type) {
987
+ case SearchType::PREFIX:
988
+ content = pattern + base_content;
989
+ break ;
990
+ case SearchType::SUFFIX:
991
+ content = base_content + pattern;
992
+ break ;
993
+ case SearchType::INFIX:
994
+ // Fix: embed pattern inside a word, not as separate word
995
+ size_t split_pos = base_content.length () / 2 ;
996
+ content = base_content.substr (0 , split_pos) + pattern + base_content.substr (split_pos);
997
+ break ;
998
+ }
999
+ } else {
1000
+ // Half don't have the pattern - generate different content
1001
+ content = GenerateWordSequence (8 + (i % 3 ), i + 1000 );
1002
+ }
1003
+ MockedDocument doc{Map{{" title" , content}}};
1004
+ indices.Add (i, doc);
1005
+ }
1006
+
1007
+ SearchAlgorithm algo{};
1008
+ QueryParams params;
1009
+ std::string query;
1010
+
1011
+ // Generate query based on search type
1012
+ switch (search_type) {
1013
+ case SearchType::PREFIX:
1014
+ query = pattern + " *" ;
1015
+ break ;
1016
+ case SearchType::SUFFIX:
1017
+ query = " *" + pattern;
1018
+ break ;
1019
+ case SearchType::INFIX:
1020
+ query = " *" + pattern + " *" ;
1021
+ break ;
1022
+ }
1023
+
1024
+ if (!algo.Init (query, ¶ms)) {
1025
+ state.SkipWithError (" Failed to initialize " + search_type_name + " search" );
1026
+ return ;
1027
+ }
1028
+
1029
+ while (state.KeepRunning ()) {
1030
+ auto result = algo.Search (&indices);
1031
+ benchmark::DoNotOptimize (result);
1032
+
1033
+ // If result has error, skip the benchmark
1034
+ if (!result.error .empty ()) {
1035
+ state.SkipWithError (search_type_name + " search returned error: " + result.error );
1036
+ return ;
1037
+ }
1038
+ }
1039
+
1040
+ // Set counters for analysis
1041
+ state.counters [" docs_total" ] = num_docs;
1042
+ state.counters [" pattern_length" ] = pattern_len;
1043
+ state.counters [" diverse_pattern" ] = use_diverse_pattern ? 1 : 0 ;
1044
+ state.SetLabel (search_type_name + (use_diverse_pattern ? " _diverse" : " _uniform" ));
1045
+ }
1046
+
1047
+ // Instantiate template functions
1048
+ static void BM_SearchByType_Uniform (benchmark::State& state) {
1049
+ BM_SearchByTypeImpl (state, false );
1050
+ }
1051
+
1052
+ static void BM_SearchByType_Diverse (benchmark::State& state) {
1053
+ BM_SearchByTypeImpl (state, true );
1054
+ }
1055
+
1056
+ // Benchmark to compare all search types - removed 100K docs per romange's suggestion
1057
+ BENCHMARK (BM_SearchByType_Uniform)
1058
+ // Uniform patterns (original test)
1059
+ ->Args ({1000 , 3 , static_cast <int >(SearchType::PREFIX)})
1060
+ ->Args({1000 , 5 , static_cast <int >(SearchType::PREFIX)})
1061
+ ->Args({10000 , 3 , static_cast <int >(SearchType::PREFIX)})
1062
+ ->Args({10000 , 5 , static_cast <int >(SearchType::PREFIX)})
1063
+ ->Args({1000 , 3 , static_cast <int >(SearchType::SUFFIX)})
1064
+ ->Args({1000 , 5 , static_cast <int >(SearchType::SUFFIX)})
1065
+ ->Args({10000 , 3 , static_cast <int >(SearchType::SUFFIX)})
1066
+ ->Args({10000 , 5 , static_cast <int >(SearchType::SUFFIX)})
1067
+ ->Args({1000 , 3 , static_cast <int >(SearchType::INFIX)})
1068
+ ->Args({1000 , 5 , static_cast <int >(SearchType::INFIX)})
1069
+ ->Args({10000 , 3 , static_cast <int >(SearchType::INFIX)})
1070
+ ->Args({10000 , 5 , static_cast <int >(SearchType::INFIX)})
1071
+ ->ArgNames({" docs" , " pattern_len" , " search_type" })
1072
+ ->Unit(benchmark::kMicrosecond );
1073
+
1074
+ BENCHMARK (BM_SearchByType_Diverse)
1075
+ // Diverse patterns (new test with ASCII variety)
1076
+ ->Args ({1000 , 3 , static_cast <int >(SearchType::PREFIX)})
1077
+ ->Args({1000 , 5 , static_cast <int >(SearchType::PREFIX)})
1078
+ ->Args({10000 , 3 , static_cast <int >(SearchType::PREFIX)})
1079
+ ->Args({10000 , 5 , static_cast <int >(SearchType::PREFIX)})
1080
+ ->Args({1000 , 3 , static_cast <int >(SearchType::SUFFIX)})
1081
+ ->Args({1000 , 5 , static_cast <int >(SearchType::SUFFIX)})
1082
+ ->Args({10000 , 3 , static_cast <int >(SearchType::SUFFIX)})
1083
+ ->Args({10000 , 5 , static_cast <int >(SearchType::SUFFIX)})
1084
+ ->Args({1000 , 3 , static_cast <int >(SearchType::INFIX)})
1085
+ ->Args({1000 , 5 , static_cast <int >(SearchType::INFIX)})
1086
+ ->Args({10000 , 3 , static_cast <int >(SearchType::INFIX)})
1087
+ ->Args({10000 , 5 , static_cast <int >(SearchType::INFIX)})
1088
+ ->ArgNames({" docs" , " pattern_len" , " search_type" })
1089
+ ->Unit(benchmark::kMicrosecond );
1090
+
910
1091
} // namespace search
911
1092
912
1093
} // namespace dfly
0 commit comments