99namespace NKikimr ::NKMeans {
1010
1111namespace {
12+ constexpr ui64 MinVectorDimension = 1 ;
13+ constexpr ui64 MaxVectorDimension = 16384 ;
14+ constexpr ui64 MinLevels = 1 ;
15+ constexpr ui64 MaxLevels = 16 ;
16+ constexpr ui64 MinClusters = 2 ;
17+ constexpr ui64 MaxClusters = 2048 ;
18+ constexpr ui64 MaxClustersPowLevels = ui64(1 ) << 30 ;
19+ constexpr ui64 MaxVectorDimensionMultiplyClusters = ui64(4 ) << 20 ; // 4 bytes per dimension for float vector type ~= 16 MB
20+
1221 bool ValidateSettingInRange (const TString& name, std::optional<ui64> value, ui64 minValue, ui64 maxValue, TString& error) {
1322 if (!value.has_value ()) {
1423 error = TStringBuilder () << name << " should be set" ;
@@ -23,31 +32,35 @@ namespace {
2332 return false ;
2433 };
2534
26- Ydb::Table::VectorIndexSettings_Metric ParseDistance (const TString& distance, TString& error) {
35+ Ydb::Table::VectorIndexSettings_Metric ParseDistance (const TString& distance_, TString& error) {
36+
37+ const TString distance = to_lower (distance_);
2738 if (distance == " cosine" )
2839 return Ydb::Table::VectorIndexSettings::DISTANCE_COSINE;
2940 else if (distance == " manhattan" )
3041 return Ydb::Table::VectorIndexSettings::DISTANCE_MANHATTAN;
3142 else if (distance == " euclidean" )
3243 return Ydb::Table::VectorIndexSettings::DISTANCE_EUCLIDEAN;
3344 else {
34- error = TStringBuilder () << " Invalid distance: " << distance ;
45+ error = TStringBuilder () << " Invalid distance: " << distance_ ;
3546 return Ydb::Table::VectorIndexSettings::METRIC_UNSPECIFIED;
3647 }
3748 };
3849
39- Ydb::Table::VectorIndexSettings_Metric ParseSimilarity (const TString& similarity, TString& error) {
50+ Ydb::Table::VectorIndexSettings_Metric ParseSimilarity (const TString& similarity_, TString& error) {
51+ const TString similarity = to_lower (similarity_);
4052 if (similarity == " cosine" )
4153 return Ydb::Table::VectorIndexSettings::SIMILARITY_COSINE;
4254 else if (similarity == " inner_product" )
4355 return Ydb::Table::VectorIndexSettings::SIMILARITY_INNER_PRODUCT;
4456 else {
45- error = TStringBuilder () << " Invalid similarity: " << similarity ;
57+ error = TStringBuilder () << " Invalid similarity: " << similarity_ ;
4658 return Ydb::Table::VectorIndexSettings::METRIC_UNSPECIFIED;
4759 }
4860 };
4961
50- Ydb::Table::VectorIndexSettings_VectorType ParseVectorType (const TString& vectorType, TString& error) {
62+ Ydb::Table::VectorIndexSettings_VectorType ParseVectorType (const TString& vectorType_, TString& error) {
63+ const TString vectorType = to_lower (vectorType_);
5164 if (vectorType == " float" )
5265 return Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT;
5366 else if (vectorType == " uint8" )
@@ -57,16 +70,18 @@ namespace {
5770 else if (vectorType == " bit" )
5871 return Ydb::Table::VectorIndexSettings::VECTOR_TYPE_BIT;
5972 else {
60- error = TStringBuilder () << " Invalid vector_type: " << vectorType ;
73+ error = TStringBuilder () << " Invalid vector_type: " << vectorType_ ;
6174 return Ydb::Table::VectorIndexSettings::VECTOR_TYPE_UNSPECIFIED;
6275 }
6376 };
6477
65- ui32 ParseUInt32 (const TString& name, const TString& value, TString& error) {
78+ ui32 ParseUInt32 (const TString& name, const TString& value, ui64 minValue, ui64 maxValue, TString& error) {
6679 ui32 result = 0 ;
6780 if (!TryFromString (value, result)) {
6881 error = TStringBuilder () << " Invalid " << name << " : " << value;
82+ return result;
6983 }
84+ ValidateSettingInRange (name, result, minValue, maxValue, error);
7085 return result;
7186 }
7287}
@@ -436,12 +451,7 @@ std::unique_ptr<IClusters> CreateClusters(const Ydb::Table::VectorIndexSettings&
436451}
437452
438453bool ValidateSettings (const Ydb::Table::KMeansTreeSettings& settings, TString& error) {
439- constexpr ui64 MinLevels = 1 ;
440- constexpr ui64 MaxLevels = 16 ;
441- constexpr ui64 MinClusters = 2 ;
442- constexpr ui64 MaxClusters = 2048 ;
443- constexpr ui64 MaxClustersPowLevels = ui64 (1 ) << 30 ;
444- constexpr ui64 MaxVectorDimensionMultiplyClusters = ui64 (4 ) << 20 ; // 4 bytes per dimension for float vector type ~= 16 MB
454+ error = " " ;
445455
446456 if (!settings.has_settings ()) {
447457 error = TStringBuilder () << " vector index settings should be set" ;
@@ -487,8 +497,7 @@ bool ValidateSettings(const Ydb::Table::KMeansTreeSettings& settings, TString& e
487497}
488498
489499bool ValidateSettings (const Ydb::Table::VectorIndexSettings& settings, TString& error) {
490- constexpr ui64 MinVectorDimension = 1 ;
491- constexpr ui64 MaxVectorDimension = 16384 ;
500+ error = " " ;
492501
493502 if (!settings.has_metric () || settings.metric () == Ydb::Table::VectorIndexSettings::METRIC_UNSPECIFIED) {
494503 error = TStringBuilder () << " either distance or similarity should be set" ;
@@ -519,43 +528,37 @@ bool ValidateSettings(const Ydb::Table::VectorIndexSettings& settings, TString&
519528 return true ;
520529}
521530
522- Ydb::Table::KMeansTreeSettings FillSettings ( const TVector<std::pair< TString, TString>>& settings , TString& error) {
523- Ydb::Table::KMeansTreeSettings result ;
531+ bool FillSetting ( Ydb::Table::KMeansTreeSettings& settings, const TString& name, const TString& value , TString& error) {
532+ error = " " ;
524533
525- for (const auto & [name, value] : settings) {
526- if (name == " distance" ) {
527- if (result.mutable_settings ()->has_metric ()) {
528- error = " only one of distance or similarity should be set, not both" ;
529- return result;
530- }
531- result.mutable_settings ()->set_metric (ParseDistance (value, error));
532- } else if (name == " similarity" ) {
533- if (result.mutable_settings ()->has_metric ()) {
534- error = " only one of distance or similarity should be set, not both" ;
535- return result;
536- }
537- result.mutable_settings ()->set_metric (ParseSimilarity (value, error));
538- } else if (name ==" vector_type" ) {
539- result.mutable_settings ()->set_vector_type (ParseVectorType (value, error));
540- } else if (name ==" vector_dimension" ) {
541- result.mutable_settings ()->set_vector_dimension (ParseUInt32 (name, value, error));
542- } else if (name ==" clusters" ) {
543- result.set_clusters (ParseUInt32 (name, value, error));
544- } else if (name ==" levels" ) {
545- result.set_levels (ParseUInt32 (name, value, error));
546- } else {
547- error = TStringBuilder () << " Unknown index setting: " << name;
548- return result;
534+ const TString nameLower = to_lower (name);
535+ if (nameLower == " distance" ) {
536+ if (settings.mutable_settings ()->has_metric ()) {
537+ error = " only one of distance or similarity should be set, not both" ;
538+ return false ;
549539 }
550-
551- if (error) {
552- return result;
540+ settings.mutable_settings ()->set_metric (ParseDistance (value, error));
541+ } else if (nameLower == " similarity" ) {
542+ if (settings.mutable_settings ()->has_metric ()) {
543+ error = " only one of distance or similarity should be set, not both" ;
544+ return false ;
553545 }
546+ settings.mutable_settings ()->set_metric (ParseSimilarity (value, error));
547+ } else if (nameLower ==" vector_type" ) {
548+ settings.mutable_settings ()->set_vector_type (ParseVectorType (value, error));
549+ } else if (nameLower ==" vector_dimension" ) {
550+ settings.mutable_settings ()->set_vector_dimension (ParseUInt32 (name, value, MinVectorDimension, MaxVectorDimension, error));
551+
552+ } else if (nameLower ==" clusters" ) {
553+ settings.set_clusters (ParseUInt32 (name, value, MinClusters, MaxClusters, error));
554+ } else if (nameLower ==" levels" ) {
555+ settings.set_levels (ParseUInt32 (name, value, MinLevels, MaxLevels, error));
556+ } else {
557+ error = TStringBuilder () << " Unknown index setting: " << name;
558+ return false ;
554559 }
555560
556- ValidateSettings (result, error);
557-
558- return result;
561+ return !error;
559562}
560563
561564}
0 commit comments