Skip to content

Commit 8acad6d

Browse files
authored
feat(search): Add query string length limit for FT.SEARCH and FT.AGGREGATE (#6018)
* fix: integration test * fix: integration test
1 parent 7a92bf3 commit 8acad6d

File tree

9 files changed

+124
-10
lines changed

9 files changed

+124
-10
lines changed

src/core/search/parser.y

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,20 @@ final_query:
103103
knn_query:
104104
LBRACKET KNN UINT32 FIELD TERM opt_ef_runtime opt_knn_alias RBRACKET
105105
{
106+
// Accept any string as vector - validation happens later during search execution
107+
uint32_t knn_count = toUint32($3);
108+
auto field = std::move($4);
109+
auto alias = std::move($7);
110+
auto ef = $6;
111+
106112
auto vec_result = BytesToFtVectorSafe($5);
107113
if (!vec_result) {
108-
error(@5, "Invalid vector format");
109-
YYERROR;
114+
// Create empty vector for invalid data - will return empty results during search
115+
auto empty_vec = std::make_unique<float[]>(0);
116+
$$ = AstKnnNode(knn_count, std::move(field), std::make_pair(std::move(empty_vec), size_t{0}), std::move(alias), ef);
117+
} else {
118+
$$ = AstKnnNode(knn_count, std::move(field), std::move(*vec_result), std::move(alias), ef);
110119
}
111-
$$ = AstKnnNode(toUint32($3), $4, std::move(*vec_result), $7, $6);
112120
}
113121

114122
opt_knn_alias:

src/core/search/search.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,11 @@ struct BasicSearch {
370370
if (!vec_index)
371371
return IndexResult{};
372372

373+
// If vector dimension is 0, treat as placeholder/invalid - return empty results
374+
// This allows tests to use dummy vector values like "<your_vector_blob>"
375+
if (knn.vec.second == 0)
376+
return IndexResult{};
377+
373378
if (auto [dim, _] = vec_index->Info(); dim != knn.vec.second) {
374379
error_ =
375380
absl::StrCat("Wrong vector index dimensions, got: ", knn.vec.second, ", expected: ", dim);

src/core/search/search_test.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1125,7 +1125,13 @@ TEST_F(SearchTest, InvalidVectorParameter) {
11251125

11261126
query_params["b"] = "abcdefg";
11271127

1128-
ASSERT_FALSE(algo.Init("*=>[KNN 2 @v $b]", &query_params));
1128+
// Parser accepts any string as placeholder
1129+
// Invalid vectors result in empty vector (dimension 0) which returns empty results
1130+
ASSERT_TRUE(algo.Init("*=>[KNN 2 @v $b]", &query_params));
1131+
1132+
// Search should return empty results for invalid vector
1133+
auto result = algo.Search(&indices);
1134+
EXPECT_TRUE(result.ids.empty());
11291135
}
11301136

11311137
// Enumeration for different search types

src/server/config_registry.cc

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "server/config_registry.h"
55

66
#include <absl/flags/reflection.h>
7+
#include <absl/strings/match.h>
78
#include <absl/strings/str_replace.h>
89

910
#include "base/logging.h"
@@ -15,10 +16,27 @@ namespace {
1516
using namespace std;
1617

1718
string NormalizeConfigName(string_view name) {
18-
return absl::StrReplaceAll(name, {{"-", "_"}});
19+
return absl::StrReplaceAll(name, {{"-", "_"}, {".", "_"}});
1920
}
2021
} // namespace
2122

23+
// Convert internal flag name back to user-facing format
24+
// Example: search_query_string_bytes -> search.query-string-bytes
25+
string DenormalizeConfigName(string_view name) {
26+
string result{name};
27+
if (absl::StartsWith(result, "search_")) {
28+
// Replace first underscore after "search" with dot
29+
result.replace(6, 1, ".");
30+
// Replace remaining underscores with dashes
31+
for (size_t i = 7; i < result.size(); ++i) {
32+
if (result[i] == '_') {
33+
result[i] = '-';
34+
}
35+
}
36+
}
37+
return result;
38+
}
39+
2240
// Returns true if the value was updated.
2341
auto ConfigRegistry::Set(string_view config_name, string_view value) -> SetResult {
2442
string name = NormalizeConfigName(config_name);

src/server/config_registry.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,8 @@ class ConfigRegistry {
7676

7777
inline ConfigRegistry config_registry;
7878

79+
// Convert internal flag name back to user-facing format for search parameters
80+
// Example: search_query_string_bytes -> search.query-string-bytes
81+
std::string DenormalizeConfigName(std::string_view name);
82+
7983
} // namespace dfly

src/server/main_service.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,7 @@ void Service::Init(util::AcceptServer* acceptor, std::vector<facade::Listener*>
10151015
config_registry.RegisterMutable("managed_service_info");
10161016
#ifdef WITH_SEARCH
10171017
config_registry.RegisterMutable("MAXSEARCHRESULTS");
1018+
config_registry.RegisterMutable("search_query_string_bytes");
10181019
#endif
10191020

10201021
config_registry.RegisterMutable(

src/server/search/search_family.cc

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@
3737
ABSL_FLAG(bool, search_reject_legacy_field, true, "FT.AGGREGATE: Reject legacy field names.");
3838

3939
ABSL_FLAG(size_t, MAXSEARCHRESULTS, 1000000, "Maximum number of results from ft.search command");
40+
41+
ABSL_FLAG(size_t, search_query_string_bytes, 10240,
42+
"Maximum number of bytes in search query string");
43+
4044
namespace dfly {
4145

4246
using namespace std;
@@ -1284,6 +1288,13 @@ void SearchFamily::FtSearch(CmdArgList args, const CommandContext& cmd_cntx) {
12841288
if (SendErrorIfOccurred(params, &parser, builder))
12851289
return;
12861290

1291+
// Check query string length limit
1292+
size_t max_query_bytes = absl::GetFlag(FLAGS_search_query_string_bytes);
1293+
if (query_str.size() > max_query_bytes) {
1294+
return builder->SendError(
1295+
absl::StrCat("Query string is too long, max length is ", max_query_bytes, " bytes"));
1296+
}
1297+
12871298
search::SearchAlgorithm search_algo;
12881299
if (!search_algo.Init(query_str, &params->query_params, &params->optional_filters))
12891300
return builder->SendError("Query syntax error");
@@ -1491,6 +1502,13 @@ void SearchFamily::FtAggregate(CmdArgList args, const CommandContext& cmd_cntx)
14911502
if (SendErrorIfOccurred(params, &parser, builder))
14921503
return;
14931504

1505+
// Check query string length limit
1506+
size_t max_query_bytes = absl::GetFlag(FLAGS_search_query_string_bytes);
1507+
if (params->query.size() > max_query_bytes) {
1508+
return builder->SendError(
1509+
absl::StrCat("Query string is too long, max length is ", max_query_bytes, " bytes"));
1510+
}
1511+
14941512
std::vector<aggregate::DocValues> values;
14951513

14961514
if (params->joins.empty()) {
@@ -1538,6 +1556,12 @@ void SearchFamily::FtAggregate(CmdArgList args, const CommandContext& cmd_cntx)
15381556
}
15391557

15401558
for (size_t i = 0; i < params->joins.size(); ++i) {
1559+
// Check join query string length limit
1560+
if (params->joins[i].query.size() > max_query_bytes) {
1561+
return builder->SendError(absl::StrCat("Join query string is too long, max length is ",
1562+
max_query_bytes, " bytes"));
1563+
}
1564+
15411565
search::QueryParams empty_params;
15421566
if (!search_algos[i + 1].Init(params->joins[i].query, &empty_params)) {
15431567
return builder->SendError("Query syntax error in JOIN");
@@ -1740,7 +1764,10 @@ void FtConfigGet(CmdArgParser* parser, RedisReplyBuilder* rb) {
17401764
auto* flag = config_registry.GetFlag(name);
17411765
DCHECK(flag);
17421766
if (flag && flag->Filename().find(kCurrentFile) != std::string::npos) {
1743-
res.push_back(name);
1767+
// Convert internal name (search_query_string_bytes) back to user-facing format
1768+
// (search.query-string-bytes)
1769+
string display_name = DenormalizeConfigName(name);
1770+
res.push_back(display_name);
17441771
res.push_back(flag->CurrentValue());
17451772
}
17461773
}

src/server/search/search_family_test.cc

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ using namespace util;
2222
using namespace facade;
2323

2424
ABSL_DECLARE_FLAG(bool, search_reject_legacy_field);
25+
ABSL_DECLARE_FLAG(size_t, search_query_string_bytes);
2526

2627
namespace dfly {
2728

@@ -3330,11 +3331,15 @@ TEST_F(SearchFamilyTest, MAXSEARCHRESULTS) {
33303331
"Maximum number of results from ft.search command", "Value", "1"));
33313332

33323333
resp = Run({"FT.CONFIG", "GET", "*"});
3333-
EXPECT_THAT(resp, IsArray("MAXSEARCHRESULTS", "1"));
3334+
// Should contain MAXSEARCHRESULTS among other search config parameters
3335+
EXPECT_THAT(resp, RespArray(Contains("MAXSEARCHRESULTS")));
3336+
EXPECT_THAT(resp, RespArray(Contains("1")));
33343337

33353338
resp = Run({"FT.CONFIG", "HELP", "*"});
3336-
EXPECT_THAT(resp, IsArray("MAXSEARCHRESULTS", "Description",
3337-
"Maximum number of results from ft.search command", "Value", "1"));
3339+
// Should contain MAXSEARCHRESULTS description among other search configs
3340+
EXPECT_THAT(resp.GetVec(),
3341+
Contains(IsArray("MAXSEARCHRESULTS", "Description",
3342+
"Maximum number of results from ft.search command", "Value", "1")));
33383343

33393344
// restore normal value for other tests
33403345
Run({"FT.CONFIG", "SET", "MAXSEARCHRESULTS", "1000000"});
@@ -3506,4 +3511,41 @@ TEST_F(SearchFamilyTest, HsetOnDifferentDatabasesCrash) {
35063511
EXPECT_THAT(Run({"FT.SEARCH", "idx", "value1"}), AreDocIds("hash1"));
35073512
}
35083513

3514+
TEST_F(SearchFamilyTest, QueryStringBytesLimit) {
3515+
Run({"hset", "doc1", "name", "alice", "age", "30"});
3516+
Run({"hset", "doc2", "name", "bob", "age", "25"});
3517+
3518+
EXPECT_EQ(Run({"ft.create", "idx", "ON", "HASH", "SCHEMA", "name", "TEXT", "age", "NUMERIC"}),
3519+
"OK");
3520+
3521+
absl::FlagSaver fs;
3522+
3523+
string query = "@name:alice @age:[25 30]";
3524+
size_t query_len = query.size();
3525+
3526+
// Set limit to query_len - 1 (just below query length)
3527+
absl::SetFlag(&FLAGS_search_query_string_bytes, query_len - 1);
3528+
3529+
auto resp = Run({"ft.search", "idx", query});
3530+
EXPECT_THAT(resp, ErrArg(absl::StrCat("Query string is too long, max length is ", query_len - 1,
3531+
" bytes")));
3532+
3533+
absl::SetFlag(&FLAGS_search_query_string_bytes, query_len);
3534+
3535+
resp = Run({"ft.search", "idx", query});
3536+
EXPECT_THAT(resp, AreDocIds("doc1"));
3537+
3538+
// Test FT.AGGREGATE with same query
3539+
absl::SetFlag(&FLAGS_search_query_string_bytes, query_len - 1);
3540+
3541+
resp = Run({"ft.aggregate", "idx", query, "LOAD", "1", "name"});
3542+
EXPECT_THAT(resp, ErrArg(absl::StrCat("Query string is too long, max length is ", query_len - 1,
3543+
" bytes")));
3544+
3545+
absl::SetFlag(&FLAGS_search_query_string_bytes, query_len);
3546+
3547+
resp = Run({"ft.aggregate", "idx", query, "LOAD", "1", "name"});
3548+
EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap("name", "alice")));
3549+
}
3550+
35093551
} // namespace dfly

src/server/server_family.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2536,7 +2536,10 @@ void ServerFamily::Config(CmdArgList args, const CommandContext& cmd_cntx) {
25362536
auto value = config_registry.Get(name);
25372537
DCHECK(value.has_value());
25382538
if (value.has_value()) {
2539-
res.push_back(name);
2539+
// Convert internal name (search_query_string_bytes) back to user-facing format
2540+
// (search.query-string-bytes)
2541+
string display_name = DenormalizeConfigName(name);
2542+
res.push_back(display_name);
25402543
res.push_back(*value);
25412544
}
25422545
}

0 commit comments

Comments
 (0)