Skip to content

Commit 5517db5

Browse files
authored
Pass index settings as is (ydb-platform#24606)
1 parent 9b32775 commit 5517db5

File tree

5 files changed

+318
-76
lines changed

5 files changed

+318
-76
lines changed

ydb/core/base/kmeans_clusters.cpp

Lines changed: 50 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,15 @@
99
namespace NKikimr::NKMeans {
1010

1111
namespace {
12+
constexpr ui64 MinVectorDimension = 1;
13+
constexpr ui64 MaxVectorDimension = 16384;
14+
constexpr ui64 MinLevels = 1;
15+
constexpr ui64 MaxLevels = 16;
16+
constexpr ui64 MinClusters = 2;
17+
constexpr ui64 MaxClusters = 2048;
18+
constexpr ui64 MaxClustersPowLevels = ui64(1) << 30;
19+
constexpr ui64 MaxVectorDimensionMultiplyClusters = ui64(4) << 20; // 4 bytes per dimension for float vector type ~= 16 MB
20+
1221
bool ValidateSettingInRange(const TString& name, std::optional<ui64> value, ui64 minValue, ui64 maxValue, TString& error) {
1322
if (!value.has_value()) {
1423
error = TStringBuilder() << name << " should be set";
@@ -23,31 +32,35 @@ namespace {
2332
return false;
2433
};
2534

26-
Ydb::Table::VectorIndexSettings_Metric ParseDistance(const TString& distance, TString& error) {
35+
Ydb::Table::VectorIndexSettings_Metric ParseDistance(const TString& distance_, TString& error) {
36+
37+
const TString distance = to_lower(distance_);
2738
if (distance == "cosine")
2839
return Ydb::Table::VectorIndexSettings::DISTANCE_COSINE;
2940
else if (distance == "manhattan")
3041
return Ydb::Table::VectorIndexSettings::DISTANCE_MANHATTAN;
3142
else if (distance == "euclidean")
3243
return Ydb::Table::VectorIndexSettings::DISTANCE_EUCLIDEAN;
3344
else {
34-
error = TStringBuilder() << "Invalid distance: " << distance;
45+
error = TStringBuilder() << "Invalid distance: " << distance_;
3546
return Ydb::Table::VectorIndexSettings::METRIC_UNSPECIFIED;
3647
}
3748
};
3849

39-
Ydb::Table::VectorIndexSettings_Metric ParseSimilarity(const TString& similarity, TString& error) {
50+
Ydb::Table::VectorIndexSettings_Metric ParseSimilarity(const TString& similarity_, TString& error) {
51+
const TString similarity = to_lower(similarity_);
4052
if (similarity == "cosine")
4153
return Ydb::Table::VectorIndexSettings::SIMILARITY_COSINE;
4254
else if (similarity == "inner_product")
4355
return Ydb::Table::VectorIndexSettings::SIMILARITY_INNER_PRODUCT;
4456
else {
45-
error = TStringBuilder() << "Invalid similarity: " << similarity;
57+
error = TStringBuilder() << "Invalid similarity: " << similarity_;
4658
return Ydb::Table::VectorIndexSettings::METRIC_UNSPECIFIED;
4759
}
4860
};
4961

50-
Ydb::Table::VectorIndexSettings_VectorType ParseVectorType(const TString& vectorType, TString& error) {
62+
Ydb::Table::VectorIndexSettings_VectorType ParseVectorType(const TString& vectorType_, TString& error) {
63+
const TString vectorType = to_lower(vectorType_);
5164
if (vectorType == "float")
5265
return Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT;
5366
else if (vectorType == "uint8")
@@ -57,16 +70,18 @@ namespace {
5770
else if (vectorType == "bit")
5871
return Ydb::Table::VectorIndexSettings::VECTOR_TYPE_BIT;
5972
else {
60-
error = TStringBuilder() << "Invalid vector_type: " << vectorType;
73+
error = TStringBuilder() << "Invalid vector_type: " << vectorType_;
6174
return Ydb::Table::VectorIndexSettings::VECTOR_TYPE_UNSPECIFIED;
6275
}
6376
};
6477

65-
ui32 ParseUInt32(const TString& name, const TString& value, TString& error) {
78+
ui32 ParseUInt32(const TString& name, const TString& value, ui64 minValue, ui64 maxValue, TString& error) {
6679
ui32 result = 0;
6780
if (!TryFromString(value, result)) {
6881
error = TStringBuilder() << "Invalid " << name << ": " << value;
82+
return result;
6983
}
84+
ValidateSettingInRange(name, result, minValue, maxValue, error);
7085
return result;
7186
}
7287
}
@@ -436,12 +451,7 @@ std::unique_ptr<IClusters> CreateClusters(const Ydb::Table::VectorIndexSettings&
436451
}
437452

438453
bool ValidateSettings(const Ydb::Table::KMeansTreeSettings& settings, TString& error) {
439-
constexpr ui64 MinLevels = 1;
440-
constexpr ui64 MaxLevels = 16;
441-
constexpr ui64 MinClusters = 2;
442-
constexpr ui64 MaxClusters = 2048;
443-
constexpr ui64 MaxClustersPowLevels = ui64(1) << 30;
444-
constexpr ui64 MaxVectorDimensionMultiplyClusters = ui64(4) << 20; // 4 bytes per dimension for float vector type ~= 16 MB
454+
error = "";
445455

446456
if (!settings.has_settings()) {
447457
error = TStringBuilder() << "vector index settings should be set";
@@ -487,8 +497,7 @@ bool ValidateSettings(const Ydb::Table::KMeansTreeSettings& settings, TString& e
487497
}
488498

489499
bool ValidateSettings(const Ydb::Table::VectorIndexSettings& settings, TString& error) {
490-
constexpr ui64 MinVectorDimension = 1;
491-
constexpr ui64 MaxVectorDimension = 16384;
500+
error = "";
492501

493502
if (!settings.has_metric() || settings.metric() == Ydb::Table::VectorIndexSettings::METRIC_UNSPECIFIED) {
494503
error = TStringBuilder() << "either distance or similarity should be set";
@@ -519,43 +528,37 @@ bool ValidateSettings(const Ydb::Table::VectorIndexSettings& settings, TString&
519528
return true;
520529
}
521530

522-
Ydb::Table::KMeansTreeSettings FillSettings(const TVector<std::pair<TString, TString>>& settings, TString& error) {
523-
Ydb::Table::KMeansTreeSettings result;
531+
bool FillSetting(Ydb::Table::KMeansTreeSettings& settings, const TString& name, const TString& value, TString& error) {
532+
error = "";
524533

525-
for (const auto& [name, value] : settings) {
526-
if (name == "distance") {
527-
if (result.mutable_settings()->has_metric()) {
528-
error = "only one of distance or similarity should be set, not both";
529-
return result;
530-
}
531-
result.mutable_settings()->set_metric(ParseDistance(value, error));
532-
} else if (name == "similarity") {
533-
if (result.mutable_settings()->has_metric()) {
534-
error = "only one of distance or similarity should be set, not both";
535-
return result;
536-
}
537-
result.mutable_settings()->set_metric(ParseSimilarity(value, error));
538-
} else if (name =="vector_type") {
539-
result.mutable_settings()->set_vector_type(ParseVectorType(value, error));
540-
} else if (name =="vector_dimension") {
541-
result.mutable_settings()->set_vector_dimension(ParseUInt32(name, value, error));
542-
} else if (name =="clusters") {
543-
result.set_clusters(ParseUInt32(name, value, error));
544-
} else if (name =="levels") {
545-
result.set_levels(ParseUInt32(name, value, error));
546-
} else {
547-
error = TStringBuilder() << "Unknown index setting: " << name;
548-
return result;
534+
const TString nameLower = to_lower(name);
535+
if (nameLower == "distance") {
536+
if (settings.mutable_settings()->has_metric()) {
537+
error = "only one of distance or similarity should be set, not both";
538+
return false;
549539
}
550-
551-
if (error) {
552-
return result;
540+
settings.mutable_settings()->set_metric(ParseDistance(value, error));
541+
} else if (nameLower == "similarity") {
542+
if (settings.mutable_settings()->has_metric()) {
543+
error = "only one of distance or similarity should be set, not both";
544+
return false;
553545
}
546+
settings.mutable_settings()->set_metric(ParseSimilarity(value, error));
547+
} else if (nameLower =="vector_type") {
548+
settings.mutable_settings()->set_vector_type(ParseVectorType(value, error));
549+
} else if (nameLower =="vector_dimension") {
550+
settings.mutable_settings()->set_vector_dimension(ParseUInt32(name, value, MinVectorDimension, MaxVectorDimension, error));
551+
552+
} else if (nameLower =="clusters") {
553+
settings.set_clusters(ParseUInt32(name, value, MinClusters, MaxClusters, error));
554+
} else if (nameLower =="levels") {
555+
settings.set_levels(ParseUInt32(name, value, MinLevels, MaxLevels, error));
556+
} else {
557+
error = TStringBuilder() << "Unknown index setting: " << name;
558+
return false;
554559
}
555560

556-
ValidateSettings(result, error);
557-
558-
return result;
561+
return !error;
559562
}
560563

561564
}

ydb/core/base/kmeans_clusters.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,6 @@ std::unique_ptr<IClusters> CreateClusters(const Ydb::Table::VectorIndexSettings&
4747

4848
bool ValidateSettings(const Ydb::Table::VectorIndexSettings& settings, TString& error);
4949
bool ValidateSettings(const Ydb::Table::KMeansTreeSettings& settings, TString& error);
50-
Ydb::Table::KMeansTreeSettings FillSettings(const TVector<std::pair<TString, TString>>& values, TString& error);
50+
bool FillSetting(Ydb::Table::KMeansTreeSettings& settings, const TString& name, const TString& value, TString& error);
5151

5252
}

ydb/core/kqp/provider/yql_kikimr_exec.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2025,17 +2025,21 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer<TKi
20252025
}
20262026
} else if (name == "indexSettings") {
20272027
YQL_ENSURE(add_index->type_case() == Ydb::Table::TableIndex::kGlobalVectorKmeansTreeIndex);
2028-
auto indexSettings = columnTuple.Item(1).Cast<TCoAtomList>();
2029-
TVector<std::pair<TString, TString>> settings(::Reserve(indexSettings.Size()));
2030-
for (const auto& vectorSetting : indexSettings.Cast<TCoNameValueTupleList>()) {
2031-
YQL_ENSURE(vectorSetting.Value().Maybe<TCoAtom>());
2032-
settings.emplace_back(vectorSetting.Name().Value(), vectorSetting.Value().Cast<TCoAtom>().StringValue());
2033-
}
2028+
2029+
Ydb::Table::KMeansTreeSettings& settings = *add_index->mutable_global_vector_kmeans_tree_index()->mutable_vector_settings();
20342030
TString error;
2035-
*add_index->mutable_global_vector_kmeans_tree_index()->mutable_vector_settings() = NKikimr::NKMeans::FillSettings(settings, error);
2036-
if (error) {
2037-
ctx.AddError(TIssue(ctx.GetPosition(nameNode.Pos()), error));
2038-
return SyncError();
2031+
2032+
auto indexSettings = columnTuple.Item(1).Cast<TCoAtomList>();
2033+
for (const auto& indexSetting : indexSettings.Cast<TCoNameValueTupleList>()) {
2034+
YQL_ENSURE(indexSetting.Value().Maybe<TCoAtom>());
2035+
const auto& name = indexSetting.Name();
2036+
const auto& value = indexSetting.Value().Cast<TCoAtom>();
2037+
2038+
if (!NKikimr::NKMeans::FillSetting(settings, name.StringValue(), value.StringValue(), error))
2039+
{
2040+
ctx.AddError(TIssue(ctx.GetPosition(value.Pos()), error));
2041+
return SyncError();
2042+
}
20392043
}
20402044
}
20412045
else {
@@ -2046,6 +2050,14 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer<TKi
20462050
YQL_ENSURE(add_index->name());
20472051
YQL_ENSURE(add_index->type_case() != Ydb::Table::TableIndex::TYPE_NOT_SET);
20482052
YQL_ENSURE(add_index->index_columns_size());
2053+
2054+
if (add_index->type_case() == Ydb::Table::TableIndex::kGlobalVectorKmeansTreeIndex) {
2055+
TString error;
2056+
if (!NKikimr::NKMeans::ValidateSettings(add_index->global_vector_kmeans_tree_index().vector_settings(), error)) {
2057+
ctx.AddError(TIssue(ctx.GetPosition(action.Pos()), error));
2058+
return SyncError();
2059+
}
2060+
}
20492061
} else if (name == "alterIndex") {
20502062
if (maybeAlter.Cast().Actions().Size() > 1) {
20512063
ctx.AddError(

ydb/core/kqp/provider/yql_kikimr_type_ann.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,15 +1020,22 @@ virtual TStatus HandleCreateTable(TKiCreateTable create, TExprContext& ctx) over
10201020

10211021
TIndexDescription::TSpecializedIndexDescription specializedIndexDescription;
10221022
if (indexType == TIndexDescription::EType::GlobalSyncVectorKMeansTree) {
1023-
TVector<std::pair<TString, TString>> settings(::Reserve(index.IndexSettings().Size()));
1023+
Ydb::Table::KMeansTreeSettings& settings = *specializedIndexDescription.emplace<NKikimrKqp::TVectorIndexKmeansTreeDescription>().MutableSettings();
1024+
TString error;
1025+
10241026
for (const auto& indexSetting : index.IndexSettings()) {
1025-
settings.emplace_back(indexSetting.Name().Value(), indexSetting.Value().Cast<TCoAtom>().StringValue());
1027+
const auto& name = indexSetting.Name();
1028+
const auto& value = indexSetting.Value().Cast<TCoAtom>();
1029+
1030+
if (!NKikimr::NKMeans::FillSetting(settings, name.StringValue(), value.StringValue(), error))
1031+
{
1032+
ctx.AddError(TIssue(ctx.GetPosition(value.Pos()), error));
1033+
return IGraphTransformer::TStatus::Error;
1034+
}
10261035
}
1027-
TString error;
1028-
*specializedIndexDescription.emplace<NKikimrKqp::TVectorIndexKmeansTreeDescription>()
1029-
.MutableSettings() = NKikimr::NKMeans::FillSettings(settings, error);
1030-
if (error) {
1031-
ctx.AddError(TIssue(ctx.GetPosition(index.Pos()), error));
1036+
1037+
if (!NKikimr::NKMeans::ValidateSettings(settings, error)) {
1038+
ctx.AddError(TIssue(ctx.GetPosition(index.IndexSettings().Pos()), error));
10321039
return IGraphTransformer::TStatus::Error;
10331040
}
10341041
}

0 commit comments

Comments
 (0)