apache
diff --git a/‎docs/content/primary-key-table/chain-table.md‎
Lines changed: 148 additions & 0 deletions b/‎docs/content/primary-key-table/chain-table.md‎
Lines changed: 148 additions & 0 deletions
diff --git a/‎docs/layouts/shortcodes/generated/core_configuration.html‎
Lines changed: 18 additions & 0 deletions b/‎docs/layouts/shortcodes/generated/core_configuration.html‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎docs/static/img/chain-table.png‎
201 KB b/‎docs/static/img/chain-table.png‎
201 KB
diff --git a/‎paimon-api/src/main/java/org/apache/paimon/CoreOptions.java‎
Lines changed: 34 additions & 0 deletions b/‎paimon-api/src/main/java/org/apache/paimon/CoreOptions.java‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎paimon-core/src/main/java/org/apache/paimon/io/ChainKeyValueFileReaderFactory.java‎
Lines changed: 12 additions & 1 deletion b/‎paimon-core/src/main/java/org/apache/paimon/io/ChainKeyValueFileReaderFactory.java‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java‎
Lines changed: 34 additions & 0 deletions b/‎paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java‎
Lines changed: 34 additions & 0 deletions
@@ -0,0 +1,148 @@
+---
+title: "Chain Table"
+weight: 6
+type: docs
+aliases:
+- /primary-key-table/chain-table.html
+---
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Chain Table
+
+Chain table is a new capability for primary key tables that transforms how you process incremental data.
+Imagine a scenario where you periodically store a full snapshot of data (for example, once a day), even 
+though only a small portion changes between snapshots. ODS binlog dump is a typical example of this pattern.
+
+Taking a daily binlog dump job as an example. A batch job merges yesterday’s full dataset with today’s 
+incremental changes to produce a new full dataset. This approach has two clear drawbacks:
+* Full computation: Merge operation includes all data, and it will involve shuffle, which results in poor performance.
+* Full storage: Store a full set of data every day, and the changed data usually accounts for a very small proportion.
+
+Paimon addresses this problem by directly consuming only the changed data and performing merge-on-read. 
+In this way, full computation and storage are turned into incremental mode:
+* Incremental computation: The offline ETL daily job only needs to consume the changed data of the current day and do not require merging all data.
+* Incremental Storage: Only store the changed data each day, and asynchronously compact it periodically (e.g., weekly) to build a global chain table within the lifecycle.
+  {{< img src="/img/chain-table.png">}}
+
+Based on the regular table, chain table introduces snapshot and delta branches to represent full and incremental 
+data respectively. When writing, you specify the branch to write full or incremental data. When reading, paimon 
+automatically chooses the appropriate strategy based on the read mode, such as full, incremental, or hybrid.
+
+To enable chain table, you must config `chain-table.enabled` to true in the table options when creating the
+table, and the snapshot and delta branch need to be created as well. Consider an example via Spark SQL:
+
+```sql
+CREATE TABLE default.t (
+    `t1` string ,
+    `t2` string ,
+    `t3` string
+) PARTITIONED BY (`date` string)
+TBLPROPERTIES (
+  'chain-table.enabled' = 'true',
+  -- props about primary key table  
+  'primary-key' = 'date,t1',
+  'sequence.field' = 't2',
+  'bucket-key' = 't1',
+  'bucket' = '2',
+  -- props about partition
+  'partition.timestamp-pattern' = '$date', 
+  'partition.timestamp-formatter' = 'yyyyMMdd'
+);
+
+CALL sys.create_branch('default.t', 'snapshot');
+
+CALL sys.create_branch('default.t', 'delta');
+
+ALTER TABLE default.t SET tblproperties 
+    ('scan.fallback-snapshot-branch' = 'snapshot', 
+     'scan.fallback-delta-branch' = 'delta');
+ 
+ALTER TABLE `default`.`t$branch_snapshot` SET tblproperties
+    ('scan.fallback-snapshot-branch' = 'snapshot',
+     'scan.fallback-delta-branch' = 'delta');
+
+ALTER TABLE `default`.`t$branch_delta` SET tblproperties 
+    ('scan.fallback-snapshot-branch' = 'snapshot',
+     'scan.fallback-delta-branch' = 'delta');
+```
+
+Notice that:
+- Chain table is only supported for primary key table, which means you should define `bucket` and `bucket-key` for the table.
+- Chain table should ensure that the schema of each branch is consistent.
+- Only spark support now, flink will be supported later.
+- Chain compact is not supported for now, and it will be supported later.
+
+After creating a chain table, you can read and write data in the following ways.
+
+- Full Write: Write data to t$branch_snapshot.
+```sql
+insert overwrite `default`.`t$branch_snapshot` partition (date = '20250810') 
+    values ('1', '1', '1'); 
+```
+
+- Incremental Write: Write data to t$branch_delta.
+```sql
+insert overwrite `default`.`t$branch_delta` partition (date = '20250811') 
+    values ('2', '1', '1');
+```
+
+- Full Query: If the snapshot branch has full partition, read it directly; otherwise, read on chain merge mode.
+```sql
+select t1, t2, t3 from default.t where date = '20250811'
+```
+you will get the following result:
+```text
++---+----+-----+ 
+| t1|  t2|   t3| 
++---+----+-----+ 
+| 1 |   1|   1 |           
+| 2 |   1|   1 |               
++---+----+-----+ 
+```
+
+- Incremental Query: Read the incremental partition from t$branch_delta
+```sql
+select t1, t2, t3 from `default`.`t$branch_delta` where date = '20250811'
+```
+you will get the following result:
+```text
++---+----+-----+ 
+| t1|  t2|   t3| 
++---+----+-----+      
+| 2 |   1|   1 |               
++---+----+-----+ 
+```
+
+- Hybrid Query: Read both full and incremental data simultaneously.
+```sql
+select t1, t2, t3 from default.t where date = '20250811'
+union all
+select t1, t2, t3 from `default`.`t$branch_delta` where date = '20250811'
+```
+you will get the following result:
+```text
++---+----+-----+ 
+| t1|  t2|   t3| 
++---+----+-----+ 
+| 1 |   1|   1 |           
+| 2 |   1|   1 |  
+| 2 |   1|   1 |               
++---+----+-----+ 
+```
@@ -98,6 +98,12 @@
             <td>MemorySize</td>
             <td>Memory page size for caching.</td>
         </tr>
+        <tr>
+            <td><h5>chain-table.enabled</h5></td>
+            <td style="word-wrap: break-word;">false</td>
+            <td>Boolean</td>
+            <td>Whether enabled chain table.</td>
+        </tr>
         <tr>
             <td><h5>changelog-file.compression</h5></td>
             <td style="word-wrap: break-word;">(none)</td>
@@ -1061,6 +1067,18 @@
             <td>String</td>
             <td>When a batch job queries from a table, if a partition does not exist in the current branch, the reader will try to get this partition from this fallback branch.</td>
         </tr>
+        <tr>
+            <td><h5>scan.fallback-delta-branch</h5></td>
+            <td style="word-wrap: break-word;">(none)</td>
+            <td>String</td>
+            <td>When a batch job queries from a chain table, if a partition does not exist in either main or snapshot branch, the reader will try to get this partition from chain snapshot and delta branch together.</td>
+        </tr>
+        <tr>
+            <td><h5>scan.fallback-snapshot-branch</h5></td>
+            <td style="word-wrap: break-word;">(none)</td>
+            <td>String</td>
+            <td>When a batch job queries from a chain table, if a partition does not exist in the main branch, the reader will try to get this partition from chain snapshot branch.</td>
+        </tr>
         <tr>
             <td><h5>scan.file-creation-time-millis</h5></td>
             <td style="word-wrap: break-word;">(none)</td>
 
@@ -226,6 +226,28 @@ public InlineElement getDescription() {
     public static final ConfigOption<String> BRANCH =
             key("branch").stringType().defaultValue("main").withDescription("Specify branch name.");
 
+    public static final ConfigOption<Boolean> CHAIN_TABLE_ENABLED =
+            key("chain-table.enabled")
+                    .booleanType()
+                    .defaultValue(false)
+                    .withDescription("Whether enabled chain table.");
+
+    public static final ConfigOption<String> SCAN_FALLBACK_SNAPSHOT_BRANCH =
+            key("scan.fallback-snapshot-branch")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "When a batch job queries from a chain table, if a partition does not exist in the main branch, "
+                                    + "the reader will try to get this partition from chain snapshot branch.");
+
+    public static final ConfigOption<String> SCAN_FALLBACK_DELTA_BRANCH =
+            key("scan.fallback-delta-branch")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "When a batch job queries from a chain table, if a partition does not exist in either main or snapshot branch, "
+                                    + "the reader will try to get this partition from chain snapshot and delta branch together.");
+
     public static final String FILE_FORMAT_ORC = "orc";
     public static final String FILE_FORMAT_AVRO = "avro";
     public static final String FILE_FORMAT_PARQUET = "parquet";
@@ -3254,6 +3276,18 @@ public int lookupMergeRecordsThreshold() {
         return options.get(LOOKUP_MERGE_RECORDS_THRESHOLD);
     }
 
+    public boolean isChainTable() {
+        return options.get(CHAIN_TABLE_ENABLED);
+    }
+
+    public String scanFallbackSnapshotBranch() {
+        return options.get(SCAN_FALLBACK_SNAPSHOT_BRANCH);
+    }
+
+    public String scanFallbackDeltaBranch() {
+        return options.get(SCAN_FALLBACK_DELTA_BRANCH);
+    }
+
     public boolean formatTableImplementationIsPaimon() {
         return options.get(FORMAT_TABLE_IMPLEMENTATION) == FormatTableImplementation.PAIMON;
     }
 
@@ -70,8 +70,19 @@ public ChainKeyValueFileReaderFactory(
         this.chainReadContext = chainReadContext;
         CoreOptions options = new CoreOptions(schema.options());
         this.currentBranch = options.branch();
+        String snapshotBranch = options.scanFallbackSnapshotBranch();
+        String deltaBranch = options.scanFallbackDeltaBranch();
+        SchemaManager snapshotSchemaManager =
+                snapshotBranch.equalsIgnoreCase(currentBranch)
+                        ? schemaManager
+                        : schemaManager.copyWithBranch(snapshotBranch);
+        SchemaManager deltaSchemaManager =
+                deltaBranch.equalsIgnoreCase(currentBranch)
+                        ? schemaManager
+                        : schemaManager.copyWithBranch(deltaBranch);
         this.branchSchemaManagers = new HashMap<>();
-        this.branchSchemaManagers.put(currentBranch, schemaManager);
+        this.branchSchemaManagers.put(snapshotBranch, snapshotSchemaManager);
+        this.branchSchemaManagers.put(deltaBranch, deltaSchemaManager);
     }
 
     @Override
 
@@ -21,6 +21,7 @@
 import org.apache.paimon.CoreOptions;
 import org.apache.paimon.CoreOptions.ChangelogProducer;
 import org.apache.paimon.CoreOptions.MergeEngine;
+import org.apache.paimon.TableType;
 import org.apache.paimon.factories.FactoryUtil;
 import org.apache.paimon.format.FileFormat;
 import org.apache.paimon.mergetree.compact.aggregate.FieldAggregator;
@@ -39,6 +40,7 @@
 import org.apache.paimon.types.MultisetType;
 import org.apache.paimon.types.RowType;
 import org.apache.paimon.types.TimestampType;
+import org.apache.paimon.utils.Preconditions;
 import org.apache.paimon.utils.StringUtils;
 
 import java.util.ArrayList;
@@ -241,6 +243,8 @@ public static void validateTableSchema(TableSchema schema) {
         validateRowTracking(schema, options);
 
         validateIncrementalClustering(schema, options);
+
+        validateChainTable(schema, options);
     }
 
     public static void validateFallbackBranch(SchemaManager schemaManager, TableSchema schema) {
@@ -679,4 +683,34 @@ private static void validateIncrementalClustering(TableSchema schema, CoreOption
                     PRIMARY_KEY.key());
         }
     }
+
+    public static void validateChainTable(TableSchema schema, CoreOptions options) {
+        if (options.isChainTable()) {
+            boolean isPrimaryTbl = schema.primaryKeys() != null && !schema.primaryKeys().isEmpty();
+            boolean isPartitionTbl =
+                    schema.partitionKeys() != null && !schema.partitionKeys().isEmpty();
+            ChangelogProducer changelogProducer = options.changelogProducer();
+            Preconditions.checkArgument(
+                    options.type() == TableType.TABLE, "Chain table must be table type.");
+            Preconditions.checkArgument(isPrimaryTbl, "Primary key is required for chain table.");
+            Preconditions.checkArgument(isPartitionTbl, "Chain table must be partition table.");
+            Preconditions.checkArgument(
+                    options.bucket() > 0, "Bucket number must be greater than 0 for chain table.");
+            Preconditions.checkArgument(
+                    options.sequenceField() != null, "Sequence field is required for chain table.");
+            Preconditions.checkArgument(
+                    options.mergeEngine() == MergeEngine.DEDUPLICATE,
+                    "Merge engine must be deduplicate for chain table.");
+            Preconditions.checkArgument(
+                    changelogProducer == ChangelogProducer.NONE
+                            || changelogProducer == ChangelogProducer.INPUT,
+                    "Changelog producer must be none or input for chain table.");
+            Preconditions.checkArgument(
+                    options.partitionTimestampPattern() != null,
+                    "Partition timestamp pattern is required for chain table.");
+            Preconditions.checkArgument(
+                    options.partitionTimestampFormatter() != null,
+                    "Partition timestamp formatter is required for chain table.");
+        }
+    }
 }