Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/import_generation.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
16
17
2 changes: 1 addition & 1 deletion .github/last_commit.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
b6b887dc9b0107368d53dff40e8ddcbc04001b57
ca39c1dc5e3592adab111f61e4aaec2021bfa95b
2 changes: 2 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,7 @@ add_subdirectory(pagination)
add_subdirectory(secondary_index)
add_subdirectory(secondary_index_builtin)
add_subdirectory(topic_reader)
add_subdirectory(topic_writer/transaction)
add_subdirectory(ttl)
add_subdirectory(vector_index)
add_subdirectory(vector_index_builtin)
36 changes: 36 additions & 0 deletions examples/topic_writer/transaction/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
add_executable(topic_writer_transaction)

target_link_libraries(topic_writer_transaction
PUBLIC
yutil
YDB-CPP-SDK::Topic
YDB-CPP-SDK::Query
)

target_sources(topic_writer_transaction
PRIVATE
main.cpp
)

vcs_info(topic_writer_transaction)

if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64")
target_link_libraries(topic_writer_transaction PUBLIC
cpuid_check
)
endif()

if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
target_link_options(topic_writer_transaction PRIVATE
-ldl
-lrt
-Wl,--no-as-needed
-lpthread
)
elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin")
target_link_options(topic_writer_transaction PRIVATE
-Wl,-platform_version,macos,11.0,11.0
-framework
CoreFoundation
)
endif()
13 changes: 7 additions & 6 deletions examples/vector_index/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
add_executable(vector_index)

target_link_libraries(vector_index PUBLIC
yutil
getopt
YDB-CPP-SDK::Table
target_link_libraries(vector_index
PUBLIC
yutil
getopt
YDB-CPP-SDK::Table
)

target_sources(vector_index PRIVATE
${YDB_SDK_SOURCE_DIR}/examples/vector_index/main.cpp
${YDB_SDK_SOURCE_DIR}/examples/vector_index/vector_index.cpp
main.cpp
vector_index.cpp
)

vcs_info(vector_index)
Expand Down
39 changes: 39 additions & 0 deletions examples/vector_index_builtin/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
add_executable(vector_index_builtin)

target_link_libraries(vector_index_builtin
PUBLIC
yutil
getopt
YDB-CPP-SDK::Query
YDB-CPP-SDK::Table
YDB-CPP-SDK::Helpers
)

target_sources(vector_index_builtin
PRIVATE
main.cpp
vector_index.cpp
)

vcs_info(vector_index_builtin)

if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64")
target_link_libraries(vector_index_builtin PUBLIC
cpuid_check
)
endif()

if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
target_link_options(vector_index_builtin PRIVATE
-ldl
-lrt
-Wl,--no-as-needed
-lpthread
)
elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin")
target_link_options(vector_index_builtin PRIVATE
-Wl,-platform_version,macos,11.0,11.0
-framework
CoreFoundation
)
endif()
71 changes: 71 additions & 0 deletions examples/vector_index_builtin/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#include "vector_index.h"

#include <ydb-cpp-sdk/client/helpers/helpers.h>


void PrintResults(const std::vector<TResultItem>& items)
{
if (items.empty()) {
std::cout << "No items found" << std::endl;
return;
}

for (const auto& item : items) {
std::cout << "[score=" << item.Score << "] " << item.Id << ": " << item.Document << std::endl;
}
}

void VectorExample(
const std::string& endpoint,
const std::string& database,
const std::string& tableName,
const std::string& indexName)
{
auto driverConfig = NYdb::CreateFromEnvironment(endpoint + "/?database=" + database);
NYdb::TDriver driver(driverConfig);
NYdb::NQuery::TQueryClient client(driver);

try {
DropVectorTable(client, tableName);
CreateVectorTable(client, tableName);
std::vector<TItem> items = {
{.Id = "1", .Document = "document 1", .Embedding = {0.98, 0.1, 0.01}},
{.Id = "2", .Document = "document 2", .Embedding = {1.0, 0.05, 0.05}},
{.Id = "3", .Document = "document 3", .Embedding = {0.9, 0.1, 0.1}},
{.Id = "4", .Document = "document 4", .Embedding = {0.03, 0.0, 0.99}},
{.Id = "5", .Document = "document 5", .Embedding = {0.0, 0.0, 0.99}},
{.Id = "6", .Document = "document 6", .Embedding = {0.0, 0.02, 1.0}},
{.Id = "7", .Document = "document 7", .Embedding = {0.0, 1.05, 0.05}},
{.Id = "8", .Document = "document 8", .Embedding = {0.02, 0.98, 0.1}},
{.Id = "9", .Document = "document 9", .Embedding = {0.0, 1.0, 0.05}},
};
InsertItems(client, tableName, items);
PrintResults(SearchItems(client, tableName, {1.0, 0.0, 0.0}, "CosineSimilarity", 3));
AddIndex(driver, client, database, tableName, indexName, "similarity=cosine", 3, 1, 3);
PrintResults(SearchItems(client, tableName, {1.0, 0.0, 0.0}, "CosineSimilarity", 3, indexName));
} catch (const std::exception& e) {
std::cerr << "Execution failed: " << e.what() << std::endl;
}

driver.Stop(true);
}

int main(int argc, char** argv) {
std::string endpoint;
std::string database;
std::string tableName;
std::string indexName;

NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default();

opts.AddLongOption('e', "endpoint", "YDB endpoint").Required().RequiredArgument("HOST:PORT").StoreResult(&endpoint);
opts.AddLongOption('d', "database", "YDB database").Required().RequiredArgument("DATABASE").StoreResult(&database);
opts.AddLongOption("table", "table name").Required().RequiredArgument("TABLE").StoreResult(&tableName);
opts.AddLongOption("index", "index name").Required().RequiredArgument("INDEX").StoreResult(&indexName);

opts.SetFreeArgsMin(0);
NLastGetopt::TOptsParseResult result(&opts, argc, argv);

VectorExample(endpoint, database, tableName, indexName);
return 0;
}
177 changes: 177 additions & 0 deletions examples/vector_index_builtin/vector_index.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#include "vector_index.h"

#include <format>


void DropVectorTable(NYdb::NQuery::TQueryClient& client, const std::string& tableName)
{
NYdb::NStatusHelpers::ThrowOnError(client.RetryQuerySync([&](NYdb::NQuery::TSession session) {
return session.ExecuteQuery(std::format("DROP TABLE IF EXISTS {}", tableName), NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync();
}));

std::cout << "Vector table dropped: " << tableName << std::endl;
}

void CreateVectorTable(NYdb::NQuery::TQueryClient& client, const std::string& tableName)
{
std::string query = std::format(R"(
CREATE TABLE IF NOT EXISTS `{}` (
id Utf8,
document Utf8,
embedding String,
PRIMARY KEY (id)
))", tableName);

NYdb::NStatusHelpers::ThrowOnError(client.RetryQuerySync([&](NYdb::NQuery::TSession session) {
return session.ExecuteQuery(query, NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync();
}));

std::cout << "Vector table created: " << tableName << std::endl;
}

void InsertItems(
NYdb::NQuery::TQueryClient& client,
const std::string& tableName,
const std::vector<TItem>& items)
{
std::string query = std::format(R"(
DECLARE $items AS List<Struct<
id: Utf8,
document: Utf8,
embedding: List<Float>
>>;

UPSERT INTO `{0}`
(
id,
document,
embedding
)
SELECT
id,
document,
Untag(Knn::ToBinaryStringFloat(embedding), "FloatVector"),
FROM AS_TABLE($items);
)", tableName);

NYdb::TParamsBuilder paramsBuilder;
auto& valueBuilder = paramsBuilder.AddParam("$items");
valueBuilder.BeginList();
for (const auto& item : items) {
valueBuilder.AddListItem();
valueBuilder.BeginStruct();
valueBuilder.AddMember("id").Utf8(item.Id);
valueBuilder.AddMember("document").Utf8(item.Document);
valueBuilder.AddMember("embedding").BeginList();
for (const auto& value : item.Embedding) {
valueBuilder.AddListItem().Float(value);
}
valueBuilder.EndList();
valueBuilder.EndStruct();
}
valueBuilder.EndList();
valueBuilder.Build();

NYdb::NStatusHelpers::ThrowOnError(client.RetryQuerySync([params = paramsBuilder.Build(), &query](NYdb::NQuery::TSession session) {
return session.ExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx(NYdb::NQuery::TTxSettings::SerializableRW()).CommitTx(), params).ExtractValueSync();
}));

std::cout << items.size() << " items inserted" << std::endl;
}

void AddIndex(
NYdb::TDriver& driver,
NYdb::NQuery::TQueryClient& client,
const std::string& database,
const std::string& tableName,
const std::string& indexName,
const std::string& strategy,
std::uint64_t dim,
std::uint64_t levels,
std::uint64_t clusters)
{
std::string query = std::format(R"(
ALTER TABLE `{0}`
ADD INDEX {1}__temp
GLOBAL USING vector_kmeans_tree
ON (embedding)
WITH (
{2},
vector_type="Float",
vector_dimension={3},
levels={4},
clusters={5}
);
)", tableName, indexName, strategy, dim, levels, clusters);

NYdb::NStatusHelpers::ThrowOnError(client.RetryQuerySync([&](NYdb::NQuery::TSession session) {
return session.ExecuteQuery(query, NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync();
}));

NYdb::NTable::TTableClient tableClient(driver);
NYdb::NStatusHelpers::ThrowOnError(tableClient.RetryOperationSync([&](NYdb::NTable::TSession session) {
return session.AlterTable(database + "/" + tableName, NYdb::NTable::TAlterTableSettings()
.AppendRenameIndexes(NYdb::NTable::TRenameIndex{
.SourceName_ = indexName + "__temp",
.DestinationName_ = indexName,
.ReplaceDestination_ = true
})
).ExtractValueSync();
}));

std::cout << "Table index `" << indexName << "` for table `" << tableName << "` added" << std::endl;
}

std::vector<TResultItem> SearchItems(
NYdb::NQuery::TQueryClient& client,
const std::string& tableName,
const std::vector<float>& embedding,
const std::string& strategy,
std::uint64_t limit,
const std::optional<std::string>& indexName)
{
std::string viewIndex = indexName ? "VIEW " + *indexName : "";
std::string sortOrder = strategy.ends_with("Similarity") ? "DESC" : "ASC";

std::string query = std::format(R"(
DECLARE $embedding as List<Float>;

$TargetEmbedding = Knn::ToBinaryStringFloat($embedding);

SELECT
id,
document,
Knn::{2}(embedding, $TargetEmbedding) as score
FROM {0} {1}
ORDER BY score
{3}
LIMIT {4};
)", tableName, viewIndex, strategy, sortOrder, limit);

NYdb::TParamsBuilder paramsBuilder;
auto& valueBuilder = paramsBuilder.AddParam("$embedding");
valueBuilder.BeginList();
for (auto value : embedding) {
valueBuilder.AddListItem().Float(value);
}
valueBuilder.EndList().Build();

std::vector<TResultItem> result;

NYdb::NStatusHelpers::ThrowOnError(client.RetryQuerySync([params = paramsBuilder.Build(), &query, &result](NYdb::NQuery::TSession session) {
auto execResult = session.ExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx(NYdb::NQuery::TTxSettings::SerializableRW()).CommitTx(), params).ExtractValueSync();
if (execResult.IsSuccess()) {
auto parser = execResult.GetResultSetParser(0);
while (parser.TryNextRow()) {
result.push_back({
.Id = *parser.ColumnParser(0).GetOptionalUtf8(),
.Document = *parser.ColumnParser(1).GetOptionalUtf8(),
.Score = *parser.ColumnParser(2).GetOptionalFloat()
});
}
}
return execResult;
}));

return result;
}
Loading
Loading