Skip to content

Commit 8e294b9

Browse files
Merge pull request ClickHouse#78244 from ClickHouse/Add_system_iceberg_history_table
Add system iceberg history table
2 parents 0b28963 + 5f852ec commit 8e294b9

File tree

11 files changed

+287
-10
lines changed

11 files changed

+287
-10
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
---
2+
description: 'System iceberg snapshot history'
3+
keywords: ['system iceberg_history']
4+
slug: /operations/system-tables/iceberg_history
5+
title: 'system.iceberg_history'
6+
---
7+
8+
# system.iceberg_history
9+
10+
Contains snapshot history of iceberg table.
11+
12+
Columns:
13+
14+
- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in.
15+
16+
- `name` ([String](../../sql-reference/data-types/string.md)) — Table name.
17+
18+
- `made_current_at` ([DateTime](../../sql-reference/data-types/uuid.md)) — Time when the snapshot was made current snapshot.
19+
20+
- `snapshot_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — Snapshot id.
21+
22+
- `parent_id` ([Int64](../../sql-reference/data-types/int-uint.md)) - Snapshot id of the parent snapshot.
23+
24+
- `is_current_ancestor` ([Bool](../../sql-reference/data-types/boolean.md)) - Flag that indicates whether this snapshot is an ancestor of the current snapshot.

src/Storages/ObjectStorage/DataLakes/DataLakeConfiguration.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ class DataLakeConfiguration : public BaseStorageConfiguration, public std::enabl
108108
&& current_metadata->supportsSchemaEvolution();
109109
}
110110

111+
IDataLakeMetadata * getExternalMetadata() const override { return current_metadata.get(); }
112+
111113
ColumnsDescription updateAndGetCurrentSchema(
112114
ObjectStoragePtr object_storage,
113115
ContextPtr context) override

src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,85 @@ ManifestListPtr IcebergMetadata::getManifestList(const String & filename) const
707707
return manifest_list_ptr;
708708
}
709709

710+
IcebergMetadata::IcebergHistory IcebergMetadata::getHistory() const
711+
{
712+
auto configuration_ptr = configuration.lock();
713+
714+
const auto [metadata_version, metadata_file_path] = getLatestOrExplicitMetadataFileAndVersion(object_storage, *configuration_ptr, getContext(), log.get());
715+
716+
chassert(metadata_version == last_metadata_version);
717+
718+
auto metadata_object = readJSON(metadata_file_path, object_storage, getContext(), log);
719+
720+
chassert(format_version == metadata_object->getValue<int>(FORMAT_VERSION_FIELD));
721+
722+
/// History
723+
std::vector<Iceberg::IcebergHistoryRecord> iceberg_history;
724+
725+
auto snapshots = metadata_object->get("snapshots").extract<Poco::JSON::Array::Ptr>();
726+
auto snapshot_logs = metadata_object->get("snapshot-log").extract<Poco::JSON::Array::Ptr>();
727+
728+
std::vector<Int64> ancestors;
729+
std::map<Int64, Int64> parents_list;
730+
for (size_t i = 0; i < snapshots->size(); ++i)
731+
{
732+
const auto snapshot = snapshots->getObject(static_cast<UInt32>(i));
733+
auto snapshot_id = snapshot->getValue<Int64>("snapshot-id");
734+
735+
if (snapshot->has("parent-snapshot-id") && !snapshot->isNull("parent-snapshot-id"))
736+
parents_list[snapshot_id] = snapshot->getValue<Int64>("parent-snapshot-id");
737+
else
738+
parents_list[snapshot_id] = 0;
739+
}
740+
741+
auto current_snapshot_id = metadata_object->getValue<Int64>("current-snapshot-id");
742+
743+
/// Add current snapshot-id to ancestors list
744+
ancestors.push_back(current_snapshot_id);
745+
while (parents_list[current_snapshot_id] != 0)
746+
{
747+
ancestors.push_back(parents_list[current_snapshot_id]);
748+
current_snapshot_id = parents_list[current_snapshot_id];
749+
}
750+
751+
for (size_t i = 0; i < snapshots->size(); ++i)
752+
{
753+
IcebergHistoryRecord history_record;
754+
755+
const auto snapshot = snapshots->getObject(static_cast<UInt32>(i));
756+
history_record.snapshot_id = snapshot->getValue<Int64>("snapshot-id");
757+
758+
if (snapshot->has("parent-snapshot-id") && !snapshot->isNull("parent-snapshot-id"))
759+
history_record.parent_id = snapshot->getValue<Int64>("parent-snapshot-id");
760+
else
761+
history_record.parent_id = 0;
762+
763+
for (size_t j = 0; j < snapshot_logs->size(); ++j)
764+
{
765+
const auto snapshot_log = snapshot_logs->getObject(static_cast<UInt32>(j));
766+
if (snapshot_log->getValue<Int64>("snapshot-id") == history_record.snapshot_id)
767+
{
768+
auto value = snapshot_log->getValue<std::string>("timestamp-ms");
769+
ReadBufferFromString in(value);
770+
DateTime64 time = 0;
771+
readDateTime64Text(time, 6, in);
772+
773+
history_record.made_current_at = time;
774+
break;
775+
}
776+
}
777+
778+
if (std::find(ancestors.begin(), ancestors.end(), history_record.snapshot_id) != ancestors.end())
779+
history_record.is_current_ancestor = true;
780+
else
781+
history_record.is_current_ancestor = false;
782+
783+
iceberg_history.push_back(history_record);
784+
}
785+
786+
return iceberg_history;
787+
}
788+
710789
ManifestFilePtr IcebergMetadata::getManifestFile(const String & filename, Int64 inherited_sequence_number) const
711790
{
712791
auto configuration_ptr = configuration.lock();

src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class IcebergMetadata : public IDataLakeMetadata, private WithContext
2929
public:
3030
using ConfigurationObserverPtr = StorageObjectStorage::ConfigurationObserverPtr;
3131
using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
32-
32+
using IcebergHistory = std::vector<Iceberg::IcebergHistoryRecord>;
3333

3434
static constexpr auto name = "Iceberg";
3535

@@ -82,6 +82,8 @@ class IcebergMetadata : public IDataLakeMetadata, private WithContext
8282

8383
bool update(const ContextPtr & local_context) override;
8484

85+
IcebergHistory getHistory() const;
86+
8587
std::optional<size_t> totalRows() const override;
8688
std::optional<size_t> totalBytes() const override;
8789

src/Storages/ObjectStorage/DataLakes/Iceberg/Snapshot.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#if USE_AVRO
55

66
#include <Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.h>
7+
#include <DataTypes/DataTypeDateTime64.h>
78

89
namespace Iceberg
910
{
@@ -18,6 +19,15 @@ struct IcebergSnapshot
1819
std::optional<size_t> total_rows;
1920
std::optional<size_t> total_bytes;
2021
};
22+
23+
struct IcebergHistoryRecord
24+
{
25+
Int64 snapshot_id;
26+
DB::DateTime64 made_current_at;
27+
Int64 parent_id;
28+
bool is_current_ancestor;
29+
};
30+
2131
}
2232

2333
#endif

src/Storages/ObjectStorage/StorageObjectStorage.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,11 @@ bool StorageObjectStorage::hasExternalDynamicMetadata() const
197197
return configuration->hasExternalDynamicMetadata();
198198
}
199199

200+
IDataLakeMetadata * StorageObjectStorage::getExternalMetadata() const
201+
{
202+
return configuration->getExternalMetadata();
203+
}
204+
200205
void StorageObjectStorage::updateExternalDynamicMetadata(ContextPtr context_ptr)
201206
{
202207
StorageInMemoryMetadata metadata;

src/Storages/ObjectStorage/StorageObjectStorage.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <Common/threadPoolCallbackRunner.h>
1010
#include <Interpreters/ActionsDAG.h>
1111
#include <Storages/ColumnsDescription.h>
12+
#include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
1213

1314
#include <memory>
1415
namespace DB
@@ -140,6 +141,8 @@ class StorageObjectStorage : public IStorage
140141

141142
void updateExternalDynamicMetadata(ContextPtr) override;
142143

144+
IDataLakeMetadata * getExternalMetadata() const;
145+
143146
std::optional<UInt64> totalRows(ContextPtr query_context) const override;
144147
std::optional<UInt64> totalBytes(ContextPtr query_context) const override;
145148
protected:
@@ -229,6 +232,8 @@ class StorageObjectStorage::Configuration
229232

230233
virtual bool hasExternalDynamicMetadata() { return false; }
231234

235+
virtual IDataLakeMetadata * getExternalMetadata() const { return nullptr; }
236+
232237
virtual std::shared_ptr<NamesAndTypesList> getInitialSchemaByPath(const String &) const { return {}; }
233238

234239
virtual std::shared_ptr<const ActionsDAG> getSchemaTransformer(const String &) const { return {}; }
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#include <Storages/System/StorageSystemIcebergHistory.h>
2+
#include <mutex>
3+
#include <DataTypes/DataTypesNumber.h>
4+
#include <DataTypes/DataTypeString.h>
5+
#include <DataTypes/DataTypeMap.h>
6+
#include <DataTypes/DataTypeDateTime.h>
7+
#include <DataTypes/DataTypeDate.h>
8+
#include <DataTypes/DataTypeUUID.h>
9+
#include <DataTypes/DataTypeNullable.h>
10+
#include <DataTypes/DataTypeDateTime64.h>
11+
#include <Interpreters/InterpreterSelectQuery.h>
12+
#include <Processors/LimitTransform.h>
13+
#include <Processors/Port.h>
14+
#include <Processors/QueryPlan/QueryPlan.h>
15+
#include <Processors/QueryPlan/ReadFromSystemNumbersStep.h>
16+
#include <Storages/SelectQueryInfo.h>
17+
#include <Storages/ObjectStorage/StorageObjectStorage.h>
18+
#include <Access/ContextAccess.h>
19+
#include <Storages/ObjectStorage/DataLakes/DataLakeConfiguration.h>
20+
#include <Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h>
21+
#include <Interpreters/DatabaseCatalog.h>
22+
#include <Core/Settings.h>
23+
24+
static constexpr auto TIME_SCALE = 6;
25+
26+
namespace DB
27+
{
28+
29+
namespace Setting
30+
{
31+
extern const SettingsSeconds lock_acquire_timeout;
32+
}
33+
34+
ColumnsDescription StorageSystemIcebergHistory::getColumnsDescription()
35+
{
36+
return ColumnsDescription
37+
{
38+
{"database_name",std::make_shared<DataTypeString>(),"Database name"},
39+
{"table_name",std::make_shared<DataTypeString>(),"Table name."},
40+
{"made_current_at",std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime64>(TIME_SCALE)),"date & time when this snapshot was made current snapshot"},
41+
{"snapshot_id",std::make_shared<DataTypeUInt64>(),"snapshot id which is used to identify a snapshot."},
42+
{"parent_id",std::make_shared<DataTypeUInt64>(),"parent id of this snapshot."},
43+
{"is_current_ancestor",std::make_shared<DataTypeUInt8>(),"Flag that indicates if this snapshot is an ancestor of the current snapshot."}
44+
};
45+
}
46+
47+
void StorageSystemIcebergHistory::fillData([[maybe_unused]] MutableColumns & res_columns, [[maybe_unused]] ContextPtr context, const ActionsDAG::Node *, std::vector<UInt8>) const
48+
{
49+
#if USE_AVRO
50+
const auto access = context->getAccess();
51+
52+
auto add_history_record = [&](const DatabaseTablesIteratorPtr & it, StorageObjectStorage * object_storage)
53+
{
54+
if (!access->isGranted(AccessType::SHOW_TABLES, it->databaseName(), it->name()))
55+
{
56+
return;
57+
}
58+
59+
auto * current_metadata = object_storage->getExternalMetadata();
60+
61+
if (current_metadata && dynamic_cast<IcebergMetadata *>(current_metadata))
62+
{
63+
auto * iceberg_metadata = dynamic_cast<IcebergMetadata *>(current_metadata);
64+
IcebergMetadata::IcebergHistory iceberg_history_items = iceberg_metadata->getHistory();
65+
66+
for (auto & iceberg_history_item : iceberg_history_items)
67+
{
68+
size_t column_index = 0;
69+
res_columns[column_index++]->insert(it->databaseName());
70+
res_columns[column_index++]->insert(it->name());
71+
res_columns[column_index++]->insert(iceberg_history_item.made_current_at);
72+
res_columns[column_index++]->insert(iceberg_history_item.snapshot_id);
73+
res_columns[column_index++]->insert(iceberg_history_item.parent_id);
74+
res_columns[column_index++]->insert(iceberg_history_item.is_current_ancestor);
75+
}
76+
}
77+
};
78+
79+
const bool show_tables_granted = access->isGranted(AccessType::SHOW_TABLES);
80+
81+
if (show_tables_granted)
82+
{
83+
auto databases = DatabaseCatalog::instance().getDatabases();
84+
for (const auto &db: databases)
85+
{
86+
for (auto iterator = db.second->getLightweightTablesIterator(context); iterator->isValid(); iterator->next())
87+
{
88+
StoragePtr storage = iterator->table();
89+
90+
TableLockHolder lock = storage->tryLockForShare(context->getCurrentQueryId(), context->getSettingsRef()[Setting::lock_acquire_timeout]);
91+
if (!lock)
92+
// Table was dropped while acquiring the lock, skipping table
93+
continue;
94+
95+
if (auto *object_storage_table = dynamic_cast<StorageObjectStorage *>(storage.get()))
96+
{
97+
add_history_record(iterator, object_storage_table);
98+
}
99+
}
100+
}
101+
}
102+
#endif
103+
}
104+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#pragma once
2+
3+
#include <optional>
4+
#include <Parsers/ASTIdentifier.h>
5+
#include <Storages/System/IStorageSystemOneBlock.h>
6+
7+
8+
namespace DB
9+
{
10+
11+
class Context;
12+
13+
/** Implements a table engine for the system "iceberg_history".
14+
*
15+
* db_name String
16+
* table_name String
17+
* made_current_at DateTime64,
18+
* snapshot_id UInt64,
19+
* parent_id UInt64,
20+
* is_current_ancestor Bool
21+
*
22+
*/
23+
24+
class StorageSystemIcebergHistory final : public IStorageSystemOneBlock
25+
{
26+
public:
27+
std::string getName() const override { return "SystemIcebergHistory"; }
28+
29+
static ColumnsDescription getColumnsDescription();
30+
31+
protected:
32+
using IStorageSystemOneBlock::IStorageSystemOneBlock;
33+
34+
void fillData([[maybe_unused]] MutableColumns & res_columns, [[maybe_unused]] ContextPtr context, const ActionsDAG::Node *, std::vector<UInt8>) const override;
35+
};
36+
37+
}

src/Storages/System/attachSystemTables.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@
100100
#include <Storages/System/StorageSystemViewRefreshes.h>
101101
#include <Storages/System/StorageSystemDNSCache.h>
102102
#include <Storages/System/StorageSystemLatencyBuckets.h>
103+
#include <Storages/System/StorageSystemIcebergHistory.h>
103104

104105
#include <Poco/Util/LayeredConfiguration.h>
105106

@@ -234,6 +235,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b
234235
attach<StorageSystemViewRefreshes>(context, system_database, "view_refreshes", "Lists all Refreshable Materialized Views of current server.");
235236
attach<StorageSystemWorkloads>(context, system_database, "workloads", "Contains a list of all currently existing workloads.");
236237
attach<StorageSystemResources>(context, system_database, "resources", "Contains a list of all currently existing resources.");
238+
attach<StorageSystemIcebergHistory>(context, system_database, "iceberg_history", "Contains a list of iceberg history.");
237239

238240
if (has_zookeeper)
239241
{

0 commit comments

Comments
 (0)