@@ -29,6 +29,7 @@ constexpr const char* k_tables_name = "tables";
2929constexpr const char * k_columns_name = " columns" ;
3030constexpr const char * k_indexes_name = " indexes" ;
3131constexpr const char * k_meta_name = " meta" ;
32+ constexpr const char * k_databases_name = " databases" ;
3233
3334std::string join_path (const std::string& root, const std::string& name)
3435{
@@ -131,6 +132,7 @@ int64_t ensure_catalog(const std::string& root_path, icm::string_map<> creds)
131132 const auto columns_path = join_path (root_path, k_columns_name);
132133 const auto indexes_path = join_path (root_path, k_indexes_name);
133134 const auto meta_path = join_path (root_path, k_meta_name);
135+ const auto databases_path = join_path (root_path, k_databases_name);
134136
135137 try {
136138 // Build schemas for all catalog tables
@@ -165,9 +167,20 @@ int64_t ensure_catalog(const std::string& root_path, icm::string_map<> creds)
165167 .add (" updated_at" , deeplake_core::type::generic (nd::type::scalar (nd::dtype::int64)))
166168 .set_primary_key (" catalog_version" );
167169
168- // Launch all 4 open_or_create operations in parallel
170+ deeplake_api::catalog_table_schema databases_schema;
171+ databases_schema.add (" db_name" , deeplake_core::type::text (codecs::compression::null))
172+ .add (" owner" , deeplake_core::type::text (codecs::compression::null))
173+ .add (" encoding" , deeplake_core::type::text (codecs::compression::null))
174+ .add (" lc_collate" , deeplake_core::type::text (codecs::compression::null))
175+ .add (" lc_ctype" , deeplake_core::type::text (codecs::compression::null))
176+ .add (" template_db" , deeplake_core::type::text (codecs::compression::null))
177+ .add (" state" , deeplake_core::type::text (codecs::compression::null))
178+ .add (" updated_at" , deeplake_core::type::generic (nd::type::scalar (nd::dtype::int64)))
179+ .set_primary_key (" db_name" );
180+
181+ // Launch all 5 open_or_create operations in parallel
169182 icm::vector<async::promise<std::shared_ptr<deeplake_api::catalog_table>>> promises;
170- promises.reserve (4 );
183+ promises.reserve (5 );
171184 promises.push_back (
172185 deeplake_api::open_or_create_catalog_table (tables_path, std::move (tables_schema), icm::string_map<>(creds)));
173186 promises.push_back (
@@ -176,12 +189,14 @@ int64_t ensure_catalog(const std::string& root_path, icm::string_map<> creds)
176189 deeplake_api::open_or_create_catalog_table (indexes_path, std::move (indexes_schema), icm::string_map<>(creds)));
177190 promises.push_back (
178191 deeplake_api::open_or_create_catalog_table (meta_path, std::move (meta_schema), icm::string_map<>(creds)));
192+ promises.push_back (
193+ deeplake_api::open_or_create_catalog_table (databases_path, std::move (databases_schema), icm::string_map<>(creds)));
179194
180195 // Wait for all to complete
181196 auto results = async::combine (std::move (promises)).get_future ().get ();
182- if (results.size () != 4 ) {
197+ if (results.size () != 5 ) {
183198 elog (ERROR,
184- " Failed to initialize catalog at %s: expected 4 catalog tables, got %zu" ,
199+ " Failed to initialize catalog at %s: expected 5 catalog tables, got %zu" ,
185200 root_path.c_str (),
186201 static_cast <size_t >(results.size ()));
187202 }
@@ -499,6 +514,83 @@ void upsert_columns(const std::string& root_path, icm::string_map<> creds, const
499514 table->upsert_many (std::move (rows)).get_future ().get ();
500515}
501516
517+ std::vector<database_meta> load_databases (const std::string& root_path, icm::string_map<> creds)
518+ {
519+ std::vector<database_meta> out;
520+ try {
521+ auto table = open_catalog_table (root_path, k_databases_name, std::move (creds));
522+ if (!table) {
523+ return out;
524+ }
525+ auto snapshot = table->read ().get_future ().get ();
526+ if (snapshot.row_count () == 0 ) {
527+ return out;
528+ }
529+
530+ std::unordered_map<std::string, database_meta> latest;
531+ for (const auto & row : snapshot.rows ()) {
532+ auto db_name_it = row.find (" db_name" );
533+ auto owner_it = row.find (" owner" );
534+ auto encoding_it = row.find (" encoding" );
535+ auto lc_collate_it = row.find (" lc_collate" );
536+ auto lc_ctype_it = row.find (" lc_ctype" );
537+ auto template_it = row.find (" template_db" );
538+ auto state_it = row.find (" state" );
539+ auto updated_it = row.find (" updated_at" );
540+ if (db_name_it == row.end () || state_it == row.end ()) {
541+ continue ;
542+ }
543+
544+ database_meta meta;
545+ meta.db_name = deeplake_api::array_to_string (db_name_it->second );
546+ if (owner_it != row.end ()) meta.owner = deeplake_api::array_to_string (owner_it->second );
547+ if (encoding_it != row.end ()) meta.encoding = deeplake_api::array_to_string (encoding_it->second );
548+ if (lc_collate_it != row.end ()) meta.lc_collate = deeplake_api::array_to_string (lc_collate_it->second );
549+ if (lc_ctype_it != row.end ()) meta.lc_ctype = deeplake_api::array_to_string (lc_ctype_it->second );
550+ if (template_it != row.end ()) meta.template_db = deeplake_api::array_to_string (template_it->second );
551+ meta.state = deeplake_api::array_to_string (state_it->second );
552+ if (updated_it != row.end ()) {
553+ auto updated_vec = load_int64_vector (updated_it->second );
554+ meta.updated_at = updated_vec.empty () ? 0 : updated_vec.front ();
555+ }
556+
557+ auto it = latest.find (meta.db_name );
558+ if (it == latest.end () || it->second .updated_at <= meta.updated_at ) {
559+ latest[meta.db_name ] = std::move (meta);
560+ }
561+ }
562+
563+ out.reserve (latest.size ());
564+ for (auto & [_, meta] : latest) {
565+ if (meta.state == " ready" ) {
566+ out.push_back (std::move (meta));
567+ }
568+ }
569+ return out;
570+ } catch (const std::exception& e) {
571+ elog (WARNING, " Failed to load catalog databases: %s" , e.what ());
572+ return out;
573+ } catch (...) {
574+ elog (WARNING, " Failed to load catalog databases: unknown error" );
575+ return out;
576+ }
577+ }
578+
579+ void upsert_database (const std::string& root_path, icm::string_map<> creds, const database_meta& meta)
580+ {
581+ auto table = open_catalog_table (root_path, k_databases_name, std::move (creds));
582+ icm::string_map<nd::array> row;
583+ row[" db_name" ] = nd::adapt (meta.db_name );
584+ row[" owner" ] = nd::adapt (meta.owner );
585+ row[" encoding" ] = nd::adapt (meta.encoding );
586+ row[" lc_collate" ] = nd::adapt (meta.lc_collate );
587+ row[" lc_ctype" ] = nd::adapt (meta.lc_ctype );
588+ row[" template_db" ] = nd::adapt (meta.template_db );
589+ row[" state" ] = nd::adapt (meta.state );
590+ row[" updated_at" ] = nd::adapt (meta.updated_at == 0 ? now_ms () : meta.updated_at );
591+ table->upsert (std::move (row)).get_future ().get ();
592+ }
593+
502594int64_t get_catalog_version (const std::string& root_path, icm::string_map<> creds)
503595{
504596 try {
0 commit comments