From e151cebb7495c500a039de2e8e0861919a2926e1 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Mon, 10 Nov 2025 18:27:15 +0800 Subject: [PATCH 1/2] docs: refresh join references --- .../20-query-syntax/04-query-join.md | 913 ++++++++++++++++- .../20-query-syntax/04-query-join.md | 955 +++++++++++++++++- 2 files changed, 1866 insertions(+), 2 deletions(-) diff --git a/docs/cn/sql-reference/10-sql-commands/20-query-syntax/04-query-join.md b/docs/cn/sql-reference/10-sql-commands/20-query-syntax/04-query-join.md index df4c4bf217..3800c3be56 100644 --- a/docs/cn/sql-reference/10-sql-commands/20-query-syntax/04-query-join.md +++ b/docs/cn/sql-reference/10-sql-commands/20-query-syntax/04-query-join.md @@ -2,4 +2,915 @@ title: JOIN(连接) --- -Databend 支持多种 JOIN(连接)。 \ No newline at end of file +## 概述 + +JOIN 用于将多个表的列合并到同一个结果集中。Databend 在实现 ANSI SQL 标准 JOIN 的基础上,还扩展了语法支持,让开发者能够用统一的方式处理维度数据、缓慢变化维度(SCD)以及时间序列等复杂场景。 + +## 支持的类型 + +* [Inner Join](#inner-join) +* [Natural Join](#natural-join) +* [Cross Join](#cross-join) +* [Left Join](#left-join) +* [Right Join](#right-join) +* [Full Outer Join](#full-outer-join) +* [Left / Right Semi Join](#left--right-semi-join) +* [Left / Right Anti Join](#left--right-anti-join) +* [ASOF Join](#asof-join) + +## 示例数据 + +### 准备表数据 + +先运行下面的 SQL,创建并填充本页所用到的所有表: + +```sql +-- VIP 资料 +CREATE OR REPLACE TABLE vip_info (client_id INT, region VARCHAR); +INSERT INTO vip_info VALUES + (101, 'Toronto'), + (102, 'Quebec'), + (103, 'Vancouver'); + +CREATE OR REPLACE TABLE purchase_records (client_id INT, item VARCHAR, qty INT); +INSERT INTO purchase_records VALUES + (100, 'Croissant', 2000), + (102, 'Donut', 3000), + (103, 'Coffee', 6000), + (106, 'Soda', 4000); + +CREATE OR REPLACE TABLE gift (gift VARCHAR); +INSERT INTO gift VALUES + ('Croissant'), ('Donut'), ('Coffee'), ('Soda'); + +-- 行情 & 交易样例 +CREATE OR REPLACE TABLE trades (symbol VARCHAR, time INT, price INT); +INSERT INTO trades VALUES + ('AAPL', 100003, 101), + ('AAPL', 100007, 103), + ('MSFT', 100002, 99), + ('TSLA', 100010, 200); + +CREATE OR REPLACE TABLE quotes (symbol VARCHAR, time INT, bid INT, ask INT); +INSERT INTO quotes VALUES + ('AAPL', 100000, 99, 102), + ('AAPL', 100005, 100, 104), + ('MSFT', 100001, 98, 101), + ('NVDA', 100006, 150, 155); + +-- ASOF 示例的物联网数据 +CREATE OR REPLACE TABLE sensor_readings ( + room VARCHAR, + reading_time TIMESTAMP, + temperature DOUBLE +); +INSERT INTO sensor_readings VALUES + ('LivingRoom', '2024-01-01 09:55:00', 22.8), + ('LivingRoom', '2024-01-01 10:00:00', 23.1), + ('LivingRoom', '2024-01-01 10:05:00', 23.3), + ('LivingRoom', '2024-01-01 10:10:00', 23.8), + ('LivingRoom', '2024-01-01 10:15:00', 24.0); + +CREATE OR REPLACE TABLE hvac_mode ( + room VARCHAR, + mode_time TIMESTAMP, + mode VARCHAR +); +INSERT INTO hvac_mode VALUES + ('LivingRoom', '2024-01-01 09:58:00', 'Cooling'), + ('LivingRoom', '2024-01-01 10:06:00', 'Fan'), + ('LivingRoom', '2024-01-01 10:30:00', 'Heating'); +``` + +### 预览数据 + +如无特别说明,下文所有示例均基于这些表数据进行演示,便于直观对比各种 JOIN 类型的差异。 + +```text +vip_info ++-----------+-----------+ +| client_id | region | ++-----------+-----------+ +| 101 | Toronto | +| 102 | Quebec | +| 103 | Vancouver | ++-----------+-----------+ + +purchase_records ++-----------+-----------+------+ +| client_id | item | qty | ++-----------+-----------+------+ +| 100 | Croissant | 2000 | +| 102 | Donut | 3000 | +| 103 | Coffee | 6000 | +| 106 | Soda | 4000 | ++-----------+-----------+------+ + +gift ++-----------+ +| gift | ++-----------+ +| Croissant | +| Donut | +| Coffee | +| Soda | ++-----------+ +``` + +时间序列示例使用如下行情快照: + +```text +trades ++--------+--------+-------+ +| symbol | time | price | ++--------+--------+-------+ +| AAPL | 100003 | 101 | +| AAPL | 100007 | 103 | +| MSFT | 100002 | 99 | +| TSLA | 100010 | 200 | ++--------+--------+-------+ + +quotes ++--------+--------+-----+-----+ +| symbol | time | bid | ask | ++--------+--------+-----+-----+ +| AAPL | 100000 | 99 | 102 | +| AAPL | 100005 | 100 | 104 | +| MSFT | 100001 | 98 | 101 | +| NVDA | 100006 | 150 | 155 | ++--------+--------+-----+-----+ + +sensor_readings ++-----------+---------------------+-------------+ +| room | reading_time | temperature | ++-----------+---------------------+-------------+ +| LivingRoom| 2024-01-01 09:55:00 | 22.8 | +| LivingRoom| 2024-01-01 10:00:00 | 23.1 | +| LivingRoom| 2024-01-01 10:05:00 | 23.3 | +| LivingRoom| 2024-01-01 10:10:00 | 23.8 | +| LivingRoom| 2024-01-01 10:15:00 | 24.0 | ++-----------+---------------------+-------------+ + +hvac_mode ++-----------+---------------------+----------+ +| room | mode_time | mode | ++-----------+---------------------+----------+ +| LivingRoom| 2024-01-01 09:58:00 | Cooling | +| LivingRoom| 2024-01-01 10:06:00 | Fan | +| LivingRoom| 2024-01-01 10:30:00 | Heating | ++-----------+---------------------+----------+ +``` + +## Inner Join + +内连接仅返回满足连接条件的记录。 + +### 可视化 + +```text +┌──────────────────────────────┐ +│ vip_info (left) │ +├──────────────────────────────┤ +│ client_id | region │ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + │ client_id = client_id + ▼ +┌──────────────────────────────┐ +│ purchase_records (right) │ +├──────────────────────────────┤ +│ client_id | item | qty │ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + │ 只保留匹配 + ▼ +┌──────────────────────────────┐ +│ INNER JOIN RESULT │ +├──────────────────────────────┤ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +└──────────────────────────────┘ +``` + +### 语法 + +```sql +SELECT select_list +FROM table_a + [INNER] JOIN table_b + ON join_condition +``` + +:::tip +`INNER` 关键字可以省略;当连接列名称相同时,可以使用 `USING(column_name)` 语法。 +::: + +### 示例 + +```sql +SELECT p.client_id, p.item, p.qty +FROM vip_info AS v +INNER JOIN purchase_records AS p + ON v.client_id = p.client_id; +``` + +结果: + +```text ++-----------+--------+------+ +| client_id | item | qty | ++-----------+--------+------+ +| 102 | Donut | 3000 | +| 103 | Coffee | 6000 | ++-----------+--------+------+ +``` + +## Natural Join + +自然连接会自动匹配两个表中所有同名列,并在结果集中只保留一份同名列。 + +### 可视化 + +```text +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ client_id | region │ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + │ 自动匹配所有同名列 + ▼ +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ client_id | item | qty │ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + │ 同名列只输出一次 + ▼ +┌──────────────────────────────┐ +│ NATURAL JOIN RESULT │ +├──────────────────────────────┤ +│ 102: Quebec + Donut + 3000 │ +│ 103: Vanc. + Coffee + 6000 │ +└──────────────────────────────┘ +``` + +### 语法 + +```sql +SELECT select_list +FROM table_a +NATURAL JOIN table_b; +``` + +### 示例 + +```sql +SELECT client_id, item, qty +FROM vip_info +NATURAL JOIN purchase_records; +``` + +结果: + +```text ++-----------+--------+------+ +| client_id | item | qty | ++-----------+--------+------+ +| 102 | Donut | 3000 | +| 103 | Coffee | 6000 | ++-----------+--------+------+ +``` + +## Cross Join + +交叉连接(笛卡尔积)会返回所有可能的表组合。 + +### 可视化 + +```text +┌──────────────────────────────┐ +│ vip_info (3 行) │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + │ 与所有礼品组合 + ▼ +┌──────────────────────────────┐ +│ gift (4 行) │ +├──────────────────────────────┤ +│ Croissant │ +│ Donut │ +│ Coffee │ +│ Soda │ +└──────────────────────────────┘ + │ 3 × 4 = 12 + ▼ +┌──────────────────────────────┐ +│ CROSS JOIN RESULT(节选) │ +├──────────────────────────────┤ +│ 101 | Toronto | Croissant │ +│ 101 | Toronto | Donut │ +│ 101 | Toronto | Coffee │ +│ ... | ... | ... │ +└──────────────────────────────┘ +``` + +### 语法 + +```sql +SELECT select_list +FROM table_a +CROSS JOIN table_b; +``` + +### 示例 + +```sql +SELECT v.client_id, v.region, g.gift +FROM vip_info AS v +CROSS JOIN gift AS g; +``` + +结果(部分): + +```text ++-----------+----------+-----------+ +| client_id | region | gift | ++-----------+----------+-----------+ +| 101 | Toronto | Croissant | +| 101 | Toronto | Donut | +| 101 | Toronto | Coffee | +| 101 | Toronto | Soda | +| ... | ... | ... | ++-----------+----------+-----------+ +``` + +## Left Join + +左连接会保留左表的所有记录,右表只显示匹配的记录;对于未匹配的记录,右表列值为 `NULL`。 + +### 可视化 + +```text +┌──────────────────────────────┐ +│ vip_info(左表保留) │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + │ 按 client_id 连接 + ▼ +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + │ 未匹配行填 NULL + ▼ +┌──────────────────────────────┐ +│ LEFT JOIN RESULT │ +├──────────────────────────────┤ +│ 101 | Toronto | NULL | NULL │ +│ 102 | Quebec | Donut | 3000 │ +│ 103 | Vanc. | Coffee| 6000 │ +└──────────────────────────────┘ +``` + +### 语法 + +```sql +SELECT select_list +FROM table_a +LEFT [OUTER] JOIN table_b + ON join_condition; +``` + +:::tip +`OUTER` 关键字是可选的。 +::: + +### 示例 + +```sql +SELECT v.client_id, p.item, p.qty +FROM vip_info AS v +LEFT JOIN purchase_records AS p + ON v.client_id = p.client_id; +``` + +结果: + +```text ++-----------+--------+------+ +| client_id | item | qty | ++-----------+--------+------+ +| 101 | NULL | NULL | +| 102 | Donut | 3000 | +| 103 | Coffee | 6000 | ++-----------+--------+------+ +``` + +## Right Join + +右连接是左连接的镜像:右表记录全部保留,左表未匹配的记录列值为 `NULL`。 + +### 可视化 + +```text +┌──────────────────────────────┐ +│ purchase_records(右表) │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + ▲ 右表全部保留 + │ client_id 匹配 +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + ▼ 左侧缺失填 NULL +┌──────────────────────────────┐ +│ RIGHT JOIN RESULT │ +├──────────────────────────────┤ +│ 100 | Croissant | vip=NULL │ +│ 102 | Donut | region=QC │ +│ 103 | Coffee | region=VAN │ +│ 106 | Soda | vip=NULL │ +└──────────────────────────────┘ +``` + +### 语法 + +```sql +SELECT select_list +FROM table_a +RIGHT [OUTER] JOIN table_b + ON join_condition; +``` + +### 示例 + +```sql +SELECT v.client_id, v.region +FROM vip_info AS v +RIGHT JOIN purchase_records AS p + ON v.client_id = p.client_id; +``` + +结果: + +```text ++-----------+-----------+ +| client_id | region | ++-----------+-----------+ +| NULL | NULL | +| 102 | Quebec | +| 103 | Vancouver | +| NULL | NULL | ++-----------+-----------+ +``` + +## Full Outer Join + +全外连接相当于左连接和右连接的并集:两个表的所有记录都会出现在结果中,未匹配的列显示为 `NULL`。 + +### 可视化 + +```text +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + │ 左匹配 + 左独有 + 右独有 + ▼ +┌──────────────────────────────┐ +│ FULL OUTER JOIN RESULT │ +├──────────────────────────────┤ +│ Toronto | NULL │ +│ Quebec | Donut │ +│ Vanc. | Coffee │ +│ NULL | Croissant │ +│ NULL | Soda │ +└──────────────────────────────┘ +``` + +### 语法 + +```sql +SELECT select_list +FROM table_a +FULL [OUTER] JOIN table_b + ON join_condition; +``` + +### 示例 + +```sql +SELECT v.region, p.item +FROM vip_info AS v +FULL OUTER JOIN purchase_records AS p + ON v.client_id = p.client_id; +``` + +结果: + +```text ++-----------+-----------+ +| region | item | ++-----------+-----------+ +| Toronto | NULL | +| Quebec | Donut | +| Vancouver | Coffee | +| NULL | Croissant | +| NULL | Soda | ++-----------+-----------+ +``` + +## Left / Right Semi Join + +半连接只返回保留侧(左侧或右侧)的列,主要用于存在性过滤:左半连接返回左表中存在匹配的记录,右半连接则返回右表中存在匹配的记录。 + +### 可视化 + +```text +LEFT SEMI JOIN +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + │ 仅保留找到匹配的行 + ▼ +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + ▼ +┌──────────────────────────────┐ +│ LEFT SEMI RESULT │ +├──────────────────────────────┤ +│ 102 | Quebec │ +│ 103 | Vanc. │ +└──────────────────────────────┘ + +RIGHT SEMI JOIN +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + │ 只保留能匹配 VIP 的行 + ▼ +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + ▼ +┌──────────────────────────────┐ +│ RIGHT SEMI RESULT │ +├──────────────────────────────┤ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +└──────────────────────────────┘ +``` + +### 语法 + +```sql +-- Left Semi Join +SELECT select_list +FROM table_a +LEFT SEMI JOIN table_b + ON join_condition; + +-- Right Semi Join +SELECT select_list +FROM table_a +RIGHT SEMI JOIN table_b + ON join_condition; +``` + +### 示例 + +左半连接:筛选出有购买记录的 VIP 客户: + +```sql +SELECT * +FROM vip_info +LEFT SEMI JOIN purchase_records + ON vip_info.client_id = purchase_records.client_id; +``` + +右半连接:筛选出 VIP 客户的购买记录: + +```sql +SELECT * +FROM vip_info +RIGHT SEMI JOIN purchase_records + ON vip_info.client_id = purchase_records.client_id; +``` + +## Left / Right Anti Join + +反连接与半连接正好相反:左反连接返回左表中无法匹配右表的记录,右反连接则返回右表中无法匹配左表的记录。 + +### 可视化 + +```text +LEFT ANTI JOIN +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + │ 去掉所有匹配行 + ▼ +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + ▼ +┌──────────────────────────────┐ +│ LEFT ANTI RESULT │ +├──────────────────────────────┤ +│ 101 | Toronto │ +└──────────────────────────────┘ + +RIGHT ANTI JOIN +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + │ 去掉能匹配 VIP 的行 + ▼ +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + ▼ +┌──────────────────────────────┐ +│ RIGHT ANTI RESULT │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ +``` + +### 语法 + +```sql +-- Left Anti Join +SELECT select_list +FROM table_a +LEFT ANTI JOIN table_b + ON join_condition; + +-- Right Anti Join +SELECT select_list +FROM table_a +RIGHT ANTI JOIN table_b + ON join_condition; +``` + +### 示例 + +左反连接:找出没有任何购买记录的 VIP 客户: + +```sql +SELECT * +FROM vip_info +LEFT ANTI JOIN purchase_records + ON vip_info.client_id = purchase_records.client_id; +``` + +右反连接:找出非 VIP 客户的购买记录: + +```sql +SELECT * +FROM vip_info +RIGHT ANTI JOIN purchase_records + ON vip_info.client_id = purchase_records.client_id; +``` + +## ASOF Join + +ASOF(近似排序合并)连接会将左表的每一行与右表中"时间不晚于当前行"的最新记录进行匹配。简单来说,就是为每个事件找到"发生前最新的上下文"。这种连接方式常用于将最新报价关联到交易记录,或将最新的 HVAC 模式关联到温度采样数据等场景。 + +### 匹配规则 + +1. 首先按照等值列(如 `room`、`symbol`)对两个表进行分组 +2. 在每个分组内按时间列排序 +3. 遍历左表记录时,找到右表中时间 `<=` 当前记录时间的最新记录;若不存在则填充 `NULL` + +### 快速示例:温度读数 VS HVAC 模式 + +```text +┌──────────────────────────────┐ +│ sensor_readings (left table) │ +├──────────────────────────────┤ +│ room | time | temperature │ +│ LR | 09:55 | 22.8C │ +│ LR | 10:00 | 23.1C │ +│ LR | 10:05 | 23.3C │ +│ LR | 10:10 | 23.8C │ +│ LR | 10:15 | 24.0C │ +└──────────────────────────────┘ + +┌──────────────────────────────┐ +│ hvac_mode (right table) │ +├──────────────────────────────┤ +│ room | time | mode │ +│ LR | 09:58 | Cooling │ +│ LR | 10:06 | Fan │ +│ LR | 10:30 | Heating │ +└──────────────────────────────┘ + +┌────────────────────────────────────────────────────────────┐ +│ Result of ASOF JOIN │ +│ ON r.room = m.room │ +│ AND r.reading_time >= m.mode_time │ +├────────────────────────────────────────────────────────────┤ +│ 10:00 reading -> latest mode 09:58 (<= 10:00) │ +│ 10:05 reading -> still mode 09:58 (no change yet) │ +│ 10:10 reading -> mode 10:06 │ +│ 10:15 reading -> still mode 10:06 │ +│ 09:55 reading -> no match (behaves like INNER JOIN) │ +└────────────────────────────────────────────────────────────┘ + +上表演示了 ASOF 在 HVAC 场景中的匹配:每条温度读数都会拿到“时间不晚于自身”的最新模式;如果某条读数发生在第一条模式之前(09:55),普通 ASOF JOIN 会将其过滤掉。 +``` + +左 ASOF 连接会保留所有的传感器读数(比如 09:55 的记录仍会保留,只是模式为 `NULL`);右 ASOF 连接会保留所有的 HVAC 模式(即使暂时没有任何读数与之匹配)。 + +### 语法 + +```sql +SELECT select_list +FROM table_a +ASOF [LEFT | RIGHT] JOIN table_b + ON table_a.time >= table_b.time + [AND table_a.key = table_b.key]; +``` + +### 示例数据 + +如果只想重现 HVAC 场景,可以单独执行以下语句: + +```sql +CREATE OR REPLACE TABLE sensor_readings ( + reading_time TIMESTAMP, + temperature DOUBLE +); +INSERT INTO sensor_readings VALUES + ('2024-01-01 10:00:00', 23.1), + ('2024-01-01 10:05:00', 23.3), + ('2024-01-01 10:10:00', 23.8), + ('2024-01-01 10:15:00', 24.0); + +CREATE OR REPLACE TABLE hvac_mode ( + mode_time TIMESTAMP, + mode VARCHAR +); +INSERT INTO hvac_mode VALUES + ('2024-01-01 09:58:00', 'Cooling'), + ('2024-01-01 10:06:00', 'Fan'), + ('2024-01-01 10:30:00', 'Heating'); +``` + +### 示例 + +将每条温度读数与其之前最新的 HVAC 模式进行关联: + +```sql +SELECT r.reading_time, r.temperature, m.mode +FROM sensor_readings AS r +ASOF JOIN hvac_mode AS m + ON r.room = m.room + AND r.reading_time >= m.mode_time +ORDER BY r.reading_time; +``` + +结果: + +```text +┌─────────────────────┬─────────────┬────────────┐ +│ reading_time │ temperature │ mode │ +├─────────────────────┼─────────────┼────────────┤ +│ 2024-01-01 10:00:00 │ 23.1C │ Cooling │ +│ 2024-01-01 10:05:00 │ 23.3C │ Cooling │ +│ 2024-01-01 10:10:00 │ 23.8C │ Fan │ +│ 2024-01-01 10:15:00 │ 24.0C │ Fan │ +└─────────────────────┴─────────────┴────────────┘ +``` + +ASOF 左连接:即使当时还没有 HVAC 模式记录,也会保留所有的温度读数: + +```sql +SELECT r.reading_time, r.temperature, m.mode +FROM sensor_readings AS r +ASOF LEFT JOIN hvac_mode AS m + ON r.room = m.room + AND r.reading_time >= m.mode_time +ORDER BY r.reading_time; +``` + +结果: + +```text +┌─────────────────────┬─────────────┬────────────┐ +│ reading_time │ temperature │ mode │ +├─────────────────────┼─────────────┼────────────┤ +│ 2024-01-01 09:55:00 │ 22.8C │ NULL │ ← 在第一条 HVAC 模式之前 +│ 2024-01-01 10:00:00 │ 23.1C │ Cooling │ +│ 2024-01-01 10:05:00 │ 23.3C │ Cooling │ +│ 2024-01-01 10:10:00 │ 23.8C │ Fan │ +│ 2024-01-01 10:15:00 │ 24.0C │ Fan │ +└─────────────────────┴─────────────┴────────────┘ +``` + +ASOF 右连接:所有的 HVAC 模式都会出现在结果中,即使暂时没有被任何读数引用: + +```sql +SELECT r.reading_time, r.temperature, m.mode_time, m.mode +FROM sensor_readings AS r +ASOF RIGHT JOIN hvac_mode AS m + ON r.room = m.room + AND r.reading_time >= m.mode_time +ORDER BY m.mode_time, r.reading_time; +``` + +结果: + +```text +┌─────────────────────┬─────────────┬─────────────────────┬────────────┐ +│ reading_time │ temperature │ mode_time │ mode │ +├─────────────────────┼─────────────┼─────────────────────┼────────────┤ +│ 2024-01-01 10:00:00 │ 23.1C │ 2024-01-01 09:58:00 │ Cooling │ +│ 2024-01-01 10:05:00 │ 23.3C │ 2024-01-01 09:58:00 │ Cooling │ +│ 2024-01-01 10:10:00 │ 23.8C │ 2024-01-01 10:06:00 │ Fan │ +│ 2024-01-01 10:15:00 │ 24.0C │ 2024-01-01 10:06:00 │ Fan │ +│ NULL │ NULL │ 2024-01-01 10:30:00 │ Heating │ ← 等待新的读数 +└─────────────────────┴─────────────┴─────────────────────┴────────────┘ +``` + +在同一个 HVAC 区间内可能包含多条读数,因此右 ASOF 连接可能会对同一个 `mode_time` 输出多行记录;最后的 `NULL` 行表示该模式暂时还没有匹配的读数。 diff --git a/docs/en/sql-reference/10-sql-commands/20-query-syntax/04-query-join.md b/docs/en/sql-reference/10-sql-commands/20-query-syntax/04-query-join.md index 3e8120ba52..acb92e3811 100644 --- a/docs/en/sql-reference/10-sql-commands/20-query-syntax/04-query-join.md +++ b/docs/en/sql-reference/10-sql-commands/20-query-syntax/04-query-join.md @@ -2,4 +2,957 @@ title: JOIN --- -Databend supports a variety of JOINs. \ No newline at end of file +## Overview + +Joins combine columns from two or more tables into one result set. Databend implements both ANSI SQL joins and Databend-specific extensions, allowing you to work with dimensional data, slowly changing facts, and time-series streams using the same syntax. + +## Supported Join Types + +* [Inner Join](#inner-join) +* [Natural Join](#natural-join) +* [Cross Join](#cross-join) +* [Left Join](#left-join) +* [Right Join](#right-join) +* [Full Outer Join](#full-outer-join) +* [Left / Right Semi Join](#left--right-semi-join) +* [Left / Right Anti Join](#left--right-anti-join) +* [Asof Join](#asof-join) + +## Sample Data + +### Prepare the Tables + +Run the following SQL once to create and populate the tables used throughout this page: + +```sql +-- VIP profile tables +CREATE OR REPLACE TABLE vip_info (client_id INT, region VARCHAR); +INSERT INTO vip_info VALUES + (101, 'Toronto'), + (102, 'Quebec'), + (103, 'Vancouver'); + +CREATE OR REPLACE TABLE purchase_records (client_id INT, item VARCHAR, qty INT); +INSERT INTO purchase_records VALUES + (100, 'Croissant', 2000), + (102, 'Donut', 3000), + (103, 'Coffee', 6000), + (106, 'Soda', 4000); + +CREATE OR REPLACE TABLE gift (gift VARCHAR); +INSERT INTO gift VALUES + ('Croissant'), ('Donut'), ('Coffee'), ('Soda'); + +-- Time-series market data +CREATE OR REPLACE TABLE trades (symbol VARCHAR, time INT, price INT); +INSERT INTO trades VALUES + ('AAPL', 100003, 101), + ('AAPL', 100007, 103), + ('MSFT', 100002, 99), + ('TSLA', 100010, 200); + +CREATE OR REPLACE TABLE quotes (symbol VARCHAR, time INT, bid INT, ask INT); +INSERT INTO quotes VALUES + ('AAPL', 100000, 99, 102), + ('AAPL', 100005, 100, 104), + ('MSFT', 100001, 98, 101), + ('NVDA', 100006, 150, 155); + +-- IoT-style readings for ASOF examples +CREATE OR REPLACE TABLE sensor_readings ( + room VARCHAR, + reading_time TIMESTAMP, + temperature DOUBLE +); +INSERT INTO sensor_readings VALUES + ('LivingRoom', '2024-01-01 09:55:00', 22.8), + ('LivingRoom', '2024-01-01 10:00:00', 23.1), + ('LivingRoom', '2024-01-01 10:05:00', 23.3), + ('LivingRoom', '2024-01-01 10:10:00', 23.8), + ('LivingRoom', '2024-01-01 10:15:00', 24.0); + +CREATE OR REPLACE TABLE hvac_mode ( + room VARCHAR, + mode_time TIMESTAMP, + mode VARCHAR +); +INSERT INTO hvac_mode VALUES + ('LivingRoom', '2024-01-01 09:58:00', 'Cooling'), + ('LivingRoom', '2024-01-01 10:06:00', 'Fan'), + ('LivingRoom', '2024-01-01 10:30:00', 'Heating'); +``` + +### Preview the Data + +Unless stated otherwise, the examples below reuse the same tables so that you can compare the effect of each join type directly. + +```text +vip_info ++-----------+-----------+ +| client_id | region | ++-----------+-----------+ +| 101 | Toronto | +| 102 | Quebec | +| 103 | Vancouver | ++-----------+-----------+ + +purchase_records ++-----------+-----------+------+ +| client_id | item | qty | ++-----------+-----------+------+ +| 100 | Croissant | 2000 | +| 102 | Donut | 3000 | +| 103 | Coffee | 6000 | +| 106 | Soda | 4000 | ++-----------+-----------+------+ + +gift ++-----------+ +| gift | ++-----------+ +| Croissant | +| Donut | +| Coffee | +| Soda | ++-----------+ +``` + +Time-series examples use the following market data snapshots: + +```text +trades ++--------+--------+-------+ +| symbol | time | price | ++--------+--------+-------+ +| AAPL | 100003 | 101 | +| AAPL | 100007 | 103 | +| MSFT | 100002 | 99 | +| TSLA | 100010 | 200 | ++--------+--------+-------+ + +quotes ++--------+--------+-----+-----+ +| symbol | time | bid | ask | ++--------+--------+-----+-----+ +| AAPL | 100000 | 99 | 102 | +| AAPL | 100005 | 100 | 104 | +| MSFT | 100001 | 98 | 101 | +| NVDA | 100006 | 150 | 155 | ++--------+--------+-----+-----+ + +sensor_readings ++-----------+---------------------+-------------+ +| room | reading_time | temperature | ++-----------+---------------------+-------------+ +| LivingRoom| 2024-01-01 09:55:00 | 22.8 | +| LivingRoom| 2024-01-01 10:00:00 | 23.1 | +| LivingRoom| 2024-01-01 10:05:00 | 23.3 | +| LivingRoom| 2024-01-01 10:10:00 | 23.8 | +| LivingRoom| 2024-01-01 10:15:00 | 24.0 | ++-----------+---------------------+-------------+ + +hvac_mode ++-----------+---------------------+----------+ +| room | mode_time | mode | ++-----------+---------------------+----------+ +| LivingRoom| 2024-01-01 09:58:00 | Cooling | +| LivingRoom| 2024-01-01 10:06:00 | Fan | +| LivingRoom| 2024-01-01 10:30:00 | Heating | ++-----------+---------------------+----------+ +``` + +## Inner Join + +An inner join returns rows that satisfy all join predicates. + +### Visual + +```text +┌──────────────────────────────┐ +│ vip_info (left) │ +├──────────────────────────────┤ +│ client_id | region │ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + │ client_id = client_id + ▼ +┌──────────────────────────────┐ +│ purchase_records (right) │ +├──────────────────────────────┤ +│ client_id | item | qty │ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + │ keep matches only + ▼ +┌──────────────────────────────┐ +│ INNER JOIN RESULT │ +├──────────────────────────────┤ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +└──────────────────────────────┘ +``` + +### Syntax + +```sql +SELECT select_list +FROM table_a + [INNER] JOIN table_b + ON join_condition +``` + +:::tip +`INNER` is optional. When the join columns share the same name, `USING(column_name)` can replace `ON table_a.column = table_b.column`. +::: + +### Example + +```sql +SELECT p.client_id, p.item, p.qty +FROM vip_info AS v +INNER JOIN purchase_records AS p + ON v.client_id = p.client_id; +``` + +Result: + +```text ++-----------+--------+------+ +| client_id | item | qty | ++-----------+--------+------+ +| 102 | Donut | 3000 | +| 103 | Coffee | 6000 | ++-----------+--------+------+ +``` + +## Natural Join + +A natural join automatically matches columns that have the same name in both tables. Only one copy of each matched column appears in the result. + +### Visual + +```text +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ client_id | region │ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + │ auto-match shared column names + ▼ +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ client_id | item | qty │ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + │ emit shared columns once + ▼ +┌──────────────────────────────┐ +│ NATURAL JOIN RESULT │ +├──────────────────────────────┤ +│ 102: Quebec + Donut + 3000 │ +│ 103: Vanc. + Coffee + 6000 │ +└──────────────────────────────┘ +``` + +### Syntax + +```sql +SELECT select_list +FROM table_a +NATURAL JOIN table_b; +``` + +### Example + +```sql +SELECT client_id, item, qty +FROM vip_info +NATURAL JOIN purchase_records; +``` + +Result: + +```text ++-----------+--------+------+ +| client_id | item | qty | ++-----------+--------+------+ +| 102 | Donut | 3000 | +| 103 | Coffee | 6000 | ++-----------+--------+------+ +``` + +## Cross Join + +A cross join (Cartesian product) returns every combination of rows from the participating tables. + +### Visual + +```text +┌──────────────────────────────┐ +│ vip_info (3 rows) │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + │ pair with every gift + ▼ +┌──────────────────────────────┐ +│ gift (4 rows) │ +├──────────────────────────────┤ +│ Croissant │ +│ Donut │ +│ Coffee │ +│ Soda │ +└──────────────────────────────┘ + │ 3 × 4 combinations + ▼ +┌──────────────────────────────┐ +│ CROSS JOIN RESULT (snippet) │ +├──────────────────────────────┤ +│ 101 | Toronto | Croissant │ +│ 101 | Toronto | Donut │ +│ 101 | Toronto | Coffee │ +│ ... | ... | ... │ +└──────────────────────────────┘ +``` + +### Syntax + +```sql +SELECT select_list +FROM table_a +CROSS JOIN table_b; +``` + +### Example + +```sql +SELECT v.client_id, v.region, g.gift +FROM vip_info AS v +CROSS JOIN gift AS g; +``` + +Result (first few rows): + +```text ++-----------+----------+-----------+ +| client_id | region | gift | ++-----------+----------+-----------+ +| 101 | Toronto | Croissant | +| 101 | Toronto | Donut | +| 101 | Toronto | Coffee | +| 101 | Toronto | Soda | +| ... | ... | ... | ++-----------+----------+-----------+ +``` + +## Left Join + +A left join returns every row from the left table and the matching rows from the right table. When no match exists, the right-side columns are `NULL`. + +### Visual + +```text +┌──────────────────────────────┐ +│ vip_info (left preserved) │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + │ join on client_id + ▼ +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + │ unmatched right rows -> NULLs + ▼ +┌──────────────────────────────┐ +│ LEFT JOIN RESULT │ +├──────────────────────────────┤ +│ 101 | Toronto | NULL | NULL │ +│ 102 | Quebec | Donut | 3000 │ +│ 103 | Vanc. | Coffee | 6000│ +└──────────────────────────────┘ +``` + +### Syntax + +```sql +SELECT select_list +FROM table_a +LEFT [OUTER] JOIN table_b + ON join_condition; +``` + +:::tip +`OUTER` is optional. +::: + +### Example + +```sql +SELECT v.client_id, p.item, p.qty +FROM vip_info AS v +LEFT JOIN purchase_records AS p + ON v.client_id = p.client_id; +``` + +Result: + +```text ++-----------+--------+------+ +| client_id | item | qty | ++-----------+--------+------+ +| 101 | NULL | NULL | +| 102 | Donut | 3000 | +| 103 | Coffee | 6000 | ++-----------+--------+------+ +``` + +## Right Join + +A right join mirrors the left join: all rows from the right table appear, and unmatched rows from the left table produce `NULL`s. + +### Visual + +```text +┌──────────────────────────────┐ +│ purchase_records (right) │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + ▲ right table preserved + │ join on client_id +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + ▼ fill missing VIP data with NULL +┌──────────────────────────────┐ +│ RIGHT JOIN RESULT │ +├──────────────────────────────┤ +│ 100 | Croissant | vip=NULL │ +│ 102 | Donut | region=Quebec │ +│ 103 | Coffee | region=Vanc. │ +│ 106 | Soda | vip=NULL │ +└──────────────────────────────┘ +``` + +### Syntax + +```sql +SELECT select_list +FROM table_a +RIGHT [OUTER] JOIN table_b + ON join_condition; +``` + +### Example + +```sql +SELECT v.client_id, v.region +FROM vip_info AS v +RIGHT JOIN purchase_records AS p + ON v.client_id = p.client_id; +``` + +Result: + +```text ++-----------+-----------+ +| client_id | region | ++-----------+-----------+ +| NULL | NULL | +| 102 | Quebec | +| 103 | Vancouver | +| NULL | NULL | ++-----------+-----------+ +``` + +## Full Outer Join + +A full outer join returns the union of left and right joins: every row from both tables, with `NULL`s where no match exists. + +### Visual + +```text +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + │ combine matches + left-only + right-only + ▼ +┌──────────────────────────────┐ +│ FULL OUTER JOIN RESULT │ +├──────────────────────────────┤ +│ Toronto | NULL │ +│ Quebec | Donut │ +│ Vanc. | Coffee │ +│ NULL | Croissant │ +│ NULL | Soda │ +└──────────────────────────────┘ +``` + +### Syntax + +```sql +SELECT select_list +FROM table_a +FULL [OUTER] JOIN table_b + ON join_condition; +``` + +### Example + +```sql +SELECT v.region, p.item +FROM vip_info AS v +FULL OUTER JOIN purchase_records AS p + ON v.client_id = p.client_id; +``` + +Result: + +```text ++-----------+-----------+ +| region | item | ++-----------+-----------+ +| Toronto | NULL | +| Quebec | Donut | +| Vancouver | Coffee | +| NULL | Croissant | +| NULL | Soda | ++-----------+-----------+ +``` + +## Left / Right Semi Join + +Semi joins filter the left (or right) table to rows that have at least one match in the opposite table. Unlike inner joins, only columns from the preserved side are returned. + +### Visual + +```text +LEFT SEMI JOIN +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + │ keep rows that find matches + ▼ +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + ▼ +┌──────────────────────────────┐ +│ LEFT SEMI RESULT │ +├──────────────────────────────┤ +│ 102 | Quebec │ +│ 103 | Vanc. │ +└──────────────────────────────┘ + +RIGHT SEMI JOIN +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + │ keep rows with VIP matches + ▼ +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + ▼ +┌──────────────────────────────┐ +│ RIGHT SEMI RESULT │ +├──────────────────────────────┤ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +└──────────────────────────────┘ +``` + +### Syntax + +```sql +-- Left Semi Join +SELECT select_list +FROM table_a +LEFT SEMI JOIN table_b + ON join_condition; + +-- Right Semi Join +SELECT select_list +FROM table_a +RIGHT SEMI JOIN table_b + ON join_condition; +``` + +### Examples + +Left semi join—return VIP clients with purchases: + +```sql +SELECT * +FROM vip_info +LEFT SEMI JOIN purchase_records + ON vip_info.client_id = purchase_records.client_id; +``` + +Result: + +```text ++-----------+-----------+ +| client_id | region | ++-----------+-----------+ +| 102 | Quebec | +| 103 | Vancouver | ++-----------+-----------+ +``` + +Right semi join—return purchase rows that belong to VIP clients: + +```sql +SELECT * +FROM vip_info +RIGHT SEMI JOIN purchase_records + ON vip_info.client_id = purchase_records.client_id; +``` + +Result: + +```text ++-----------+--------+------+ +| client_id | item | qty | ++-----------+--------+------+ +| 102 | Donut | 3000 | +| 103 | Coffee | 6000 | ++-----------+--------+------+ +``` + +## Left / Right Anti Join + +Anti joins return rows that do **not** have a matching row on the other side, making them ideal for existence checks. + +### Visual + +```text +LEFT ANTI JOIN +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + │ remove rows with matches + ▼ +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + ▼ +┌──────────────────────────────┐ +│ LEFT ANTI RESULT │ +├──────────────────────────────┤ +│ 101 | Toronto │ +└──────────────────────────────┘ + +RIGHT ANTI JOIN +┌──────────────────────────────┐ +│ purchase_records │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 102 | Donut | 3000 │ +│ 103 | Coffee | 6000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ + │ remove rows with VIP matches + ▼ +┌──────────────────────────────┐ +│ vip_info │ +├──────────────────────────────┤ +│ 101 | Toronto │ +│ 102 | Quebec │ +│ 103 | Vancouver │ +└──────────────────────────────┘ + ▼ +┌──────────────────────────────┐ +│ RIGHT ANTI RESULT │ +├──────────────────────────────┤ +│ 100 | Croissant | 2000 │ +│ 106 | Soda | 4000 │ +└──────────────────────────────┘ +``` + +### Syntax + +```sql +-- Left Anti Join +SELECT select_list +FROM table_a +LEFT ANTI JOIN table_b + ON join_condition; + +-- Right Anti Join +SELECT select_list +FROM table_a +RIGHT ANTI JOIN table_b + ON join_condition; +``` + +### Examples + +Left anti join—VIP clients with no purchases: + +```sql +SELECT * +FROM vip_info +LEFT ANTI JOIN purchase_records + ON vip_info.client_id = purchase_records.client_id; +``` + +Result: + +```text ++-----------+---------+ +| client_id | region | ++-----------+---------+ +| 101 | Toronto | ++-----------+---------+ +``` + +Right anti join—purchase records that do not belong to a VIP client: + +```sql +SELECT * +FROM vip_info +RIGHT ANTI JOIN purchase_records + ON vip_info.client_id = purchase_records.client_id; +``` + +Result: + +```text ++-----------+-----------+------+ +| client_id | item | qty | ++-----------+-----------+------+ +| 100 | Croissant | 2000 | +| 106 | Soda | 4000 | ++-----------+-----------+------+ +``` + +## Asof Join + +An ASOF (Approximate Sort-Merge) join matches each row in a left-ordered stream to the most recent row on the right whose timestamp is **less than or equal to** the left timestamp. Optional equality predicates (for keys such as `symbol`) can further constrain the match. ASOF joins power analytics like attaching the latest quote to each trade. + +Think of ASOF as "give me the latest contextual row that happened **before or at** this event." + +### Matching Rules + +1. Partition both tables by the equality keys (for example, `symbol`). +2. Within each partition, ensure both tables are sorted by the inequality column (for example, `time`). +3. When visiting a left row, attach the latest right row whose timestamp is `<=` the left timestamp; if none exists, the right columns are `NULL`. + +### Quick Example (Room Temperature vs HVAC Mode) + +```text +┌──────────────────────────────┐ +│ sensor_readings (left table) │ +├──────────────────────────────┤ +│ room | time | temperature │ +│ LR | 09:55 | 22.8C │ +│ LR | 10:00 | 23.1C │ +│ LR | 10:05 | 23.3C │ +│ LR | 10:10 | 23.8C │ +│ LR | 10:15 | 24.0C │ +└──────────────────────────────┘ + +┌──────────────────────────────┐ +│ hvac_mode (right table) │ +├──────────────────────────────┤ +│ room | time | mode │ +│ LR | 09:58 | Cooling │ +│ LR | 10:06 | Fan │ +│ LR | 10:30 | Heating │ +└──────────────────────────────┘ + +┌────────────────────────────────────────────────────────────┐ +│ Result of ASOF JOIN ON r.room = m.room │ +│ AND r.reading_time >= m.mode_time │ +├────────────────────────────────────────────────────────────┤ +│ 10:00 reading -> matches 09:58 mode (latest <= 10:00) │ +│ 10:05 reading -> still matches 09:58 (no newer mode yet) │ +│ 10:10 reading -> matches 10:06 mode │ +│ 10:15 reading -> matches 10:06 mode │ +│ 09:55 reading -> no row (ASOF behaves like INNER JOIN) │ +└────────────────────────────────────────────────────────────┘ +``` + +In LEFT ASOF joins every sensor reading stays (for example, the 09:55 reading keeps `NULL` because no HVAC mode has started yet). In RIGHT ASOF joins you keep all HVAC changes (even if no reading has happened yet to reference them). + +### Syntax + +```sql +SELECT select_list +FROM table_a +ASOF [LEFT | RIGHT] JOIN table_b + ON table_a.time >= table_b.time + [AND table_a.key = table_b.key]; +``` + +### Example Tables + +Run the following once to reproduce the HVAC scenario shown below: + +```sql +CREATE OR REPLACE TABLE sensor_readings ( + reading_time TIMESTAMP, + temperature DOUBLE +); +INSERT INTO sensor_readings VALUES + ('2024-01-01 10:00:00', 23.1), + ('2024-01-01 10:05:00', 23.3), + ('2024-01-01 10:10:00', 23.8), + ('2024-01-01 10:15:00', 24.0); + +CREATE OR REPLACE TABLE hvac_mode ( + mode_time TIMESTAMP, + mode VARCHAR +); +INSERT INTO hvac_mode VALUES + ('2024-01-01 09:58:00', 'Cooling'), + ('2024-01-01 10:06:00', 'Fan'), + ('2024-01-01 10:30:00', 'Heating'); +``` + +### Examples + +Match each temperature reading with the latest HVAC mode that started before it: + +```sql +SELECT r.reading_time, r.temperature, m.mode +FROM sensor_readings AS r +ASOF JOIN hvac_mode AS m + ON r.room = m.room + AND r.reading_time >= m.mode_time +ORDER BY r.reading_time; +``` + +Result: + +```text +┌─────────────────────┬─────────────┬────────────┐ +│ reading_time │ temperature │ mode │ +├─────────────────────┼─────────────┼────────────┤ +│ 2024-01-01 10:00:00 │ 23.1C │ Cooling │ +│ 2024-01-01 10:05:00 │ 23.3C │ Cooling │ +│ 2024-01-01 10:10:00 │ 23.8C │ Fan │ +│ 2024-01-01 10:15:00 │ 24.0C │ Fan │ +└─────────────────────┴─────────────┴────────────┘ +``` + +ASOF left join—keep all sensor readings even if no HVAC mode was active yet: + +```sql +SELECT r.reading_time, r.temperature, m.mode +FROM sensor_readings AS r +ASOF LEFT JOIN hvac_mode AS m + ON r.room = m.room + AND r.reading_time >= m.mode_time +ORDER BY r.reading_time; +``` + +Result: + +```text +┌─────────────────────┬─────────────┬────────────┐ +│ reading_time │ temperature │ mode │ +├─────────────────────┼─────────────┼────────────┤ +│ 2024-01-01 09:55:00 │ 22.8C │ NULL │ ← before first HVAC mode +│ 2024-01-01 10:00:00 │ 23.1C │ Cooling │ +│ 2024-01-01 10:05:00 │ 23.3C │ Cooling │ +│ 2024-01-01 10:10:00 │ 23.8C │ Fan │ +│ 2024-01-01 10:15:00 │ 24.0C │ Fan │ +└─────────────────────┴─────────────┴────────────┘ +``` + +ASOF right join—keep all HVAC mode changes even if no later sensor reading references them: + +```sql +SELECT r.reading_time, r.temperature, m.mode_time, m.mode +FROM sensor_readings AS r +ASOF RIGHT JOIN hvac_mode AS m + ON r.room = m.room + AND r.reading_time >= m.mode_time +ORDER BY m.mode_time, r.reading_time; +``` + +Result: + +```text +┌─────────────────────┬─────────────┬─────────────────────┬────────────┐ +│ reading_time │ temperature │ mode_time │ mode │ +├─────────────────────┼─────────────┼─────────────────────┼────────────┤ +│ 2024-01-01 10:00:00 │ 23.1C │ 2024-01-01 09:58:00 │ Cooling │ +│ 2024-01-01 10:05:00 │ 23.3C │ 2024-01-01 09:58:00 │ Cooling │ +│ 2024-01-01 10:10:00 │ 23.8C │ 2024-01-01 10:06:00 │ Fan │ +│ 2024-01-01 10:15:00 │ 24.0C │ 2024-01-01 10:06:00 │ Fan │ +│ NULL │ NULL │ 2024-01-01 10:30:00 │ Heating │ ← waiting for reading +└─────────────────────┴─────────────┴─────────────────────┴────────────┘ +``` + +Multiple readings can land in the same HVAC interval, so a RIGHT ASOF join can emit more than one row per mode; the final `NULL` row shows the newly scheduled `Heating` mode that has not yet matched a reading. From f4138f12ede5074e9a14104d99dd1ebcca26f65d Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Mon, 10 Nov 2025 18:29:46 +0800 Subject: [PATCH 2/2] docs(cn): simplify join title --- .../10-sql-commands/20-query-syntax/04-query-join.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cn/sql-reference/10-sql-commands/20-query-syntax/04-query-join.md b/docs/cn/sql-reference/10-sql-commands/20-query-syntax/04-query-join.md index 3800c3be56..d152356d6d 100644 --- a/docs/cn/sql-reference/10-sql-commands/20-query-syntax/04-query-join.md +++ b/docs/cn/sql-reference/10-sql-commands/20-query-syntax/04-query-join.md @@ -1,5 +1,5 @@ --- -title: JOIN(连接) +title: JOIN --- ## 概述