-
Notifications
You must be signed in to change notification settings - Fork 17
Feature: Support TRUNCATE TABLE for Iceberg engine #1529
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: antalya-26.1
Are you sure you want to change the base?
Changes from all commits
de68832
601250f
27a8467
84d76f1
c81fc7f
876577b
cc1c7db
5a7e014
2148724
4681e4e
12657bb
bd1d16e
11810d8
2b66954
cf85627
e70e425
4cfc411
47c8acc
2155e32
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -113,7 +113,8 @@ MetadataGenerator::NextMetadataResult MetadataGenerator::generateNextMetadata( | |
| Int32 added_delete_files, | ||
| Int32 num_deleted_rows, | ||
| std::optional<Int64> user_defined_snapshot_id, | ||
| std::optional<Int64> user_defined_timestamp) | ||
| std::optional<Int64> user_defined_timestamp, | ||
| bool is_truncate) | ||
| { | ||
| int format_version = metadata_object->getValue<Int32>(Iceberg::f_format_version); | ||
| Poco::JSON::Object::Ptr new_snapshot = new Poco::JSON::Object; | ||
|
|
@@ -137,7 +138,16 @@ MetadataGenerator::NextMetadataResult MetadataGenerator::generateNextMetadata( | |
|
|
||
| auto parent_snapshot = getParentSnapshot(parent_snapshot_id); | ||
| Poco::JSON::Object::Ptr summary = new Poco::JSON::Object; | ||
| if (num_deleted_rows == 0) | ||
| if (is_truncate) | ||
| { | ||
| summary->set(Iceberg::f_operation, Iceberg::f_overwrite); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On AbstractionI DID try avro::DataFileWriter first and it's the organic one. But, unfortunately in the vendored Avro C++ version, writeHeader() is called eagerly in the constructor committing the binary encoder state. Calling setMetadata("avro.schema", ...) post-construction corrupts StreamWriter::next_ causing a NULL dereference on close(). I have a confirmed server segfault stack trace from this exact path. The manual OCF serialization exists because the abstraction is broken for this specific use case in this library version. On sum_with_parent_snapshotThe is_truncate guard inside sum_with_parent_snapshot already correctly zeroes cumulative totals (total_records, total_data_files, etc.) — you're correct that we could lean on it there. However deleted_records and deleted_data_files are different: they need to be populated from the parent snapshot's values (not zero), since they represent what was removed. That's why they're set separately. Glad to add a comment making this distinction explicit. |
||
| Int32 prev_total_records = parent_snapshot && parent_snapshot->has(Iceberg::f_summary) && parent_snapshot->getObject(Iceberg::f_summary)->has(Iceberg::f_total_records) ? std::stoi(parent_snapshot->getObject(Iceberg::f_summary)->getValue<String>(Iceberg::f_total_records)) : 0; | ||
| Int32 prev_total_data_files = parent_snapshot && parent_snapshot->has(Iceberg::f_summary) && parent_snapshot->getObject(Iceberg::f_summary)->has(Iceberg::f_total_data_files) ? std::stoi(parent_snapshot->getObject(Iceberg::f_summary)->getValue<String>(Iceberg::f_total_data_files)) : 0; | ||
|
|
||
| summary->set(Iceberg::f_deleted_records, std::to_string(prev_total_records)); | ||
| summary->set(Iceberg::f_deleted_data_files, std::to_string(prev_total_data_files)); | ||
| } | ||
| else if (num_deleted_rows == 0) | ||
| { | ||
| summary->set(Iceberg::f_operation, Iceberg::f_append); | ||
| summary->set(Iceberg::f_added_data_files, std::to_string(added_files)); | ||
|
|
@@ -157,7 +167,12 @@ MetadataGenerator::NextMetadataResult MetadataGenerator::generateNextMetadata( | |
|
|
||
| auto sum_with_parent_snapshot = [&](const char * field_name, Int32 snapshot_value) | ||
| { | ||
| Int32 prev_value = parent_snapshot ? std::stoi(parent_snapshot->getObject(Iceberg::f_summary)->getValue<String>(field_name)) : 0; | ||
| if (is_truncate) | ||
| { | ||
| summary->set(field_name, std::to_string(0)); | ||
| return; | ||
| } | ||
| Int32 prev_value = parent_snapshot && parent_snapshot->has(Iceberg::f_summary) && parent_snapshot->getObject(Iceberg::f_summary)->has(field_name) ? std::stoi(parent_snapshot->getObject(Iceberg::f_summary)->getValue<String>(field_name)) : 0; | ||
| summary->set(field_name, std::to_string(prev_value + snapshot_value)); | ||
| }; | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| #!/usr/bin/env python3 | ||
|
|
||
| from pyiceberg.catalog import load_catalog | ||
| from helpers.config_cluster import minio_secret_key, minio_access_key | ||
| import uuid | ||
| import pyarrow as pa | ||
| from pyiceberg.schema import Schema, NestedField | ||
| from pyiceberg.types import LongType, StringType | ||
| from pyiceberg.partitioning import PartitionSpec | ||
|
|
||
| BASE_URL_LOCAL_RAW = "http://localhost:8182" | ||
| CATALOG_NAME = "demo" | ||
|
|
||
| def load_catalog_impl(started_cluster): | ||
| return load_catalog( | ||
| CATALOG_NAME, | ||
| **{ | ||
| "uri": BASE_URL_LOCAL_RAW, | ||
| "type": "rest", | ||
| "s3.endpoint": f"http://{started_cluster.get_instance_ip('minio')}:9000", | ||
| "s3.access-key-id": minio_access_key, | ||
| "s3.secret-access-key": minio_secret_key, | ||
| }, | ||
| ) | ||
|
|
||
|
|
||
| def test_iceberg_truncate_restart(started_cluster_iceberg_no_spark): | ||
| instance = started_cluster_iceberg_no_spark.instances["node1"] | ||
| catalog = load_catalog_impl(started_cluster_iceberg_no_spark) | ||
|
|
||
| namespace = f"clickhouse_truncate_restart_{uuid.uuid4().hex}" | ||
| catalog.create_namespace(namespace) | ||
|
|
||
| schema = Schema( | ||
| NestedField(field_id=1, name="id", field_type=LongType(), required=False), | ||
| NestedField(field_id=2, name="val", field_type=StringType(), required=False), | ||
| ) | ||
| table_name = "test_truncate_restart" | ||
| catalog.create_table( | ||
| identifier=f"{namespace}.{table_name}", | ||
| schema=schema, | ||
| location=f"s3://warehouse-rest/{namespace}.{table_name}", | ||
| partition_spec=PartitionSpec(), | ||
| ) | ||
|
|
||
| ch_table_identifier = f"`{namespace}.{table_name}`" | ||
|
|
||
| instance.query(f"DROP DATABASE IF EXISTS {namespace}") | ||
| instance.query( | ||
| f""" | ||
| CREATE DATABASE {namespace} ENGINE = DataLakeCatalog('http://rest:8181/v1', 'minio', '{minio_secret_key}') | ||
| SETTINGS | ||
| catalog_type='rest', | ||
| warehouse='demo', | ||
| storage_endpoint='http://minio:9000/warehouse-rest'; | ||
| """, | ||
| settings={"allow_database_iceberg": 1} | ||
| ) | ||
|
|
||
| # 1. Insert initial data and truncate | ||
| df = pa.Table.from_pylist([{"id": 1, "val": "A"}, {"id": 2, "val": "B"}]) | ||
| catalog.load_table(f"{namespace}.{table_name}").append(df) | ||
|
|
||
| assert int(instance.query(f"SELECT count() FROM {namespace}.{ch_table_identifier}").strip()) == 2 | ||
|
|
||
| instance.query( | ||
| f"TRUNCATE TABLE {namespace}.{ch_table_identifier}", | ||
| settings={"allow_experimental_insert_into_iceberg": 1} | ||
| ) | ||
| assert int(instance.query(f"SELECT count() FROM {namespace}.{ch_table_identifier}").strip()) == 0 | ||
|
|
||
| # 2. Restart ClickHouse and verify table is still readable (count = 0) | ||
| instance.restart_clickhouse() | ||
| assert int(instance.query(f"SELECT count() FROM {namespace}.{ch_table_identifier}").strip()) == 0 | ||
|
|
||
| # 3. Insert new data after restart and verify it's readable | ||
| new_df = pa.Table.from_pylist([{"id": 3, "val": "C"}]) | ||
| catalog.load_table(f"{namespace}.{table_name}").append(new_df) | ||
| assert int(instance.query(f"SELECT count() FROM {namespace}.{ch_table_identifier}").strip()) == 1 | ||
|
|
||
| instance.query(f"DROP DATABASE {namespace}") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see in IcebergWrites the check is different, but leads to the same result. To the reader like me, it is not very clear why transactional needs a different URI.
I suggest one of the below: