Skip to content

Commit c69996f

Browse files
author
longshan.lu
committed
feat: Add LeftSemi and LeftAnti join types to JoinType enum and implement corresponding logic in join processing
1 parent 8197b2d commit c69996f

File tree

22 files changed

+1006
-232
lines changed

22 files changed

+1006
-232
lines changed

Makefile

Lines changed: 11 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -1,174 +1,36 @@
1-
# Qurious Makefile
2-
# Simplified development workflow
31
CARGO = cargo
4-
RUSTFMT = rustfmt
52
DOCKER = docker
6-
PROJECT_NAME = qurious
73
TPCH_DATA_DIR = qurious/tests/tpch/data
84
TPCH_DOCKER_IMAGE = ghcr.io/scalytics/tpch-docker:main
95

10-
# Default target
116
.PHONY: help
12-
help:
13-
@echo "Qurious Development Tools"
14-
@echo "========================"
7+
help: ## Show available commands
8+
@echo "Qurious Makefile (minimal)"
159
@echo ""
1610
@echo "Available commands:"
17-
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-15s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
18-
19-
# Code checking
20-
.PHONY: check
21-
check: ## Check code syntax and dependencies
22-
$(CARGO) check --all-features
23-
24-
.PHONY: check-all
25-
check-all: ## Check all workspace members
26-
$(CARGO) check --workspace --all-features
27-
28-
# Build
29-
.PHONY: build
30-
build: ## Build project (debug mode)
31-
$(CARGO) build
32-
33-
.PHONY: build-release
34-
build-release: ## Build project (release mode)
35-
$(CARGO) build --release
36-
37-
.PHONY: build-all
38-
build-all: ## Build all workspace members
39-
$(CARGO) build --workspace
11+
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-18s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
4012

4113
.PHONY: test
42-
test: ## Run unit tests
14+
# Tests in `qurious/tests/tpch/` run against the TPC-H dataset and require
15+
# data generation ahead of time (default scale factor is 0.01 here).
16+
# Generate data from the repository root with:
17+
# make tpch-data
18+
test: ## Run unit tests (includes TPC-H tests when available)
4319
INCLUDE_TPCH=true $(CARGO) test
4420

45-
# Code formatting
46-
.PHONY: fmt
47-
fmt: ## Check code formatting
48-
$(CARGO) fmt -- --check
49-
50-
.PHONY: fmt-fix
51-
fmt-fix: ## Format code and auto-fix
52-
$(CARGO) fmt
53-
54-
# Code quality
55-
.PHONY: clippy
56-
clippy: ## Run clippy code checks
57-
$(CARGO) clippy --all-features -- -D warnings
58-
59-
.PHONY: clippy-fix
60-
clippy-fix: ## Run clippy and auto-fix
61-
$(CARGO) clippy --fix --all-features
62-
63-
# Clean
64-
.PHONY: clean
65-
clean: ## Clean build artifacts
66-
$(CARGO) clean
67-
68-
.PHONY: clean-all
69-
clean-all: clean ## Clean all build artifacts and temp files
70-
rm -rf target/
71-
find . -name "*.orig" -delete
72-
find . -name "*.rej" -delete
73-
74-
# TPC-H data generation
7521
.PHONY: tpch-data
76-
tpch-data: ## Generate TPC-H test data
22+
tpch-data: ## Generate TPC-H test data (scale factor 0.01)
7723
mkdir -p $(TPCH_DATA_DIR)
7824
$(DOCKER) run -it -v "$(realpath $(TPCH_DATA_DIR))":/data $(TPCH_DOCKER_IMAGE) -vf -s 0.01
7925

8026
.PHONY: tpch-data-small
81-
tpch-data-small: ## Generate small TPC-H test data
27+
tpch-data-small: ## Generate small TPC-H test data (scale factor 0.001)
8228
mkdir -p $(TPCH_DATA_DIR)
8329
$(DOCKER) run -it -v "$(realpath $(TPCH_DATA_DIR))":/data $(TPCH_DOCKER_IMAGE) -vf -s 0.001
8430

8531
.PHONY: tpch-data-large
86-
tpch-data-large: ## Generate large TPC-H test data
32+
tpch-data-large: ## Generate large TPC-H test data (scale factor 0.1)
8733
mkdir -p $(TPCH_DATA_DIR)
8834
$(DOCKER) run -it -v "$(realpath $(TPCH_DATA_DIR))":/data $(TPCH_DOCKER_IMAGE) -vf -s 0.1
8935

90-
# Development workflow
91-
.PHONY: dev-setup
92-
dev-setup: ## Setup development environment
93-
rustup component add rustfmt
94-
rustup component add clippy
95-
$(CARGO) install cargo-watch
96-
97-
.PHONY: dev
98-
dev: ## Development mode: watch files and run tests
99-
cargo watch -x check -x test
100-
101-
.PHONY: dev-test
102-
dev-test: ## Development mode: watch files and run tests
103-
cargo watch -x test
104-
105-
# Documentation
106-
.PHONY: doc
107-
doc: ## Generate documentation
108-
$(CARGO) doc --no-deps
109-
110-
.PHONY: doc-open
111-
doc-open: ## Generate and open documentation
112-
$(CARGO) doc --no-deps --open
113-
114-
# Benchmark
115-
.PHONY: bench
116-
bench: ## Run benchmarks
117-
$(CARGO) bench
118-
119-
# Dependency management
120-
.PHONY: update
121-
update: ## Update dependencies
122-
$(CARGO) update
123-
124-
.PHONY: audit
125-
audit: ## Check dependency security vulnerabilities
126-
$(CARGO) audit
127-
128-
# Release preparation
129-
.PHONY: release-check
130-
release-check: check-all clippy test-all ## Pre-release checks
131-
@echo "All checks passed, ready to release!"
132-
133-
.PHONY: release-build
134-
release-build: ## Build release version
135-
$(CARGO) build --release
136-
@echo "Release build completed: target/release/$(PROJECT_NAME)"
137-
138-
# Database
139-
.PHONY: db-start
140-
db-start: ## Start database services
141-
$(DOCKER) compose up -d
142-
143-
.PHONY: db-stop
144-
db-stop: ## Stop database services
145-
$(DOCKER) compose down
146-
147-
.PHONY: db-reset
148-
db-reset: ## Reset database
149-
$(DOCKER) compose down -v
150-
$(DOCKER) compose up -d
151-
152-
# Utilities
153-
.PHONY: size
154-
size: build-release ## Show binary file size
155-
@echo "Binary file size:"
156-
@ls -lh target/release/$(PROJECT_NAME)
157-
158-
.PHONY: deps-tree
159-
deps-tree: ## Show dependency tree
160-
$(CARGO) tree
161-
162-
.PHONY: outdated
163-
outdated: ## Check outdated dependencies
164-
$(CARGO) install-update -a
165-
166-
# Quick development command combinations
167-
.PHONY: quick-check
168-
quick-check: fmt clippy test ## Quick check: format, clippy, test
169-
170-
.PHONY: full-check
171-
full-check: fmt clippy test-all audit ## Full check: format, clippy, all tests, security audit
172-
173-
# Default goal
17436
.DEFAULT_GOAL := help

qurious/src/common/join_type.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ pub enum JoinType {
66
Right,
77
Inner,
88
Full,
9+
LeftSemi,
10+
LeftAnti,
911
}
1012

1113
impl Display for JoinType {
@@ -15,6 +17,8 @@ impl Display for JoinType {
1517
JoinType::Right => write!(f, "Right Join"),
1618
JoinType::Inner => write!(f, "Inner Join"),
1719
JoinType::Full => write!(f, "Full Join"),
20+
JoinType::LeftSemi => write!(f, "Left Semi Join"),
21+
JoinType::LeftAnti => write!(f, "Left Anti Join"),
1822
}
1923
}
2024
}

qurious/src/datatypes/scalar.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -288,9 +288,14 @@ impl Display for ScalarValue {
288288
ScalarValue::Decimal128(v, p, s) => format_decimal!(f, v, "Decimal128", p, s),
289289
ScalarValue::Decimal256(v, p, s) => format_decimal!(f, v, "Decimal256", p, s),
290290
ScalarValue::Utf8(v) => format_string!(f, v, "Utf8"),
291-
ScalarValue::IntervalMonthDayNano(v) => {
292-
format_string!(f, v.map(|v| format!("{:?}", v)), "IntervalMonthDayNano")
293-
}
291+
ScalarValue::IntervalMonthDayNano(v) => match v {
292+
Some(val) => write!(
293+
f,
294+
"interval(months={}, days={}, nanoseconds={})",
295+
val.months, val.days, val.nanoseconds
296+
),
297+
None => write!(f, "interval(null)"),
298+
},
294299
}
295300
}
296301
}

qurious/src/execution/session.rs

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ impl ExecuteSession {
9898
LogicalPlan::Dml(stmt) => self.execute_dml(stmt),
9999
plan => {
100100
let plan = self.optimizer.optimize(plan)?;
101-
println!("plan: {}", crate::utils::format(&plan, 0));
102101
self.planner.create_physical_plan(&plan)?.execute()
103102
}
104103
}
@@ -445,11 +444,11 @@ order by
445444
execute_and_assert(
446445
"SELECT * FROM read_csv('./tests/testdata/file/case1.csv')",
447446
vec![
448-
"+----+---------------+--------------------+",
449-
"| id | location | name |",
450-
"+----+---------------+--------------------+",
451-
"| 1 | China BeiJing | BeiJing University |",
452-
"+----+---------------+--------------------+",
447+
"+----+--------------------+---------------+",
448+
"| id | name | location |",
449+
"+----+--------------------+---------------+",
450+
"| 1 | BeiJing University | China BeiJing |",
451+
"+----+--------------------+---------------+",
453452
],
454453
);
455454
}
@@ -475,11 +474,11 @@ order by
475474
execute_and_assert(
476475
"SELECT * FROM read_parquet('./tests/testdata/file/case2.parquet') limit 1",
477476
vec![
478-
"+------------+----------+--------+-------+",
479-
"| counter_id | currency | market | type |",
480-
"+------------+----------+--------+-------+",
481-
"| ST/SZ/001 | HKD | SZ | STOCK |",
482-
"+------------+----------+--------+-------+",
477+
"+------------+--------+-------+----------+",
478+
"| counter_id | market | type | currency |",
479+
"+------------+--------+-------+----------+",
480+
"| ST/SZ/001 | SZ | STOCK | HKD |",
481+
"+------------+--------+-------+----------+",
483482
],
484483
);
485484
}

qurious/src/logical/builder.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,10 @@ fn build_join_schema(join_type: JoinType, left: &TableSchemaRef, right: &TableSc
177177
.into_iter()
178178
.chain(nullify_fields(right_fields))
179179
.collect(),
180+
// Left Semi/Anti joins only return left side columns
181+
JoinType::LeftSemi | JoinType::LeftAnti => {
182+
left_fields.map(|(a, b)| (a.cloned(), b.clone())).collect::<Vec<_>>()
183+
}
180184
};
181185

182186
TableSchema::try_new(qualified_fields).map(Arc::new)
@@ -210,7 +214,7 @@ mod tests {
210214
assert_plan(
211215
"SELECT * FROM users a JOIN repos b ON a.id = b.owner_id",
212216
vec![
213-
"Projection: (a.email, a.id, b.id, a.name, b.name, b.owner_id)",
217+
"Projection: (a.id, a.name, a.email, b.id, b.name, b.owner_id)",
214218
" Inner Join: Filter: a.id = b.owner_id",
215219
" SubqueryAlias: a",
216220
" TableScan: users",
@@ -225,7 +229,7 @@ mod tests {
225229
assert_plan(
226230
"SELECT * FROM users a LEFT JOIN repos b ON a.id = b.owner_id",
227231
vec![
228-
"Projection: (a.email, a.id, b.id, a.name, b.name, b.owner_id)",
232+
"Projection: (a.id, a.name, a.email, b.id, b.name, b.owner_id)",
229233
" Left Join: Filter: a.id = b.owner_id",
230234
" SubqueryAlias: a",
231235
" TableScan: users",
@@ -240,7 +244,7 @@ mod tests {
240244
assert_plan(
241245
"SELECT * FROM users a RIGHT JOIN repos b ON a.id = b.owner_id",
242246
vec![
243-
"Projection: (a.email, a.id, b.id, a.name, b.name, b.owner_id)",
247+
"Projection: (b.id, b.name, b.owner_id, a.id, a.name, a.email)",
244248
" Right Join: Filter: a.id = b.owner_id",
245249
" SubqueryAlias: a",
246250
" TableScan: users",
@@ -255,7 +259,7 @@ mod tests {
255259
assert_plan(
256260
"SELECT * FROM users a FULL JOIN repos b ON a.id = b.owner_id",
257261
vec![
258-
"Projection: (a.email, a.id, b.id, a.name, b.name, b.owner_id)",
262+
"Projection: (a.id, a.name, a.email, b.id, b.name, b.owner_id)",
259263
" Full Join: Filter: a.id = b.owner_id",
260264
" SubqueryAlias: a",
261265
" TableScan: users",
@@ -270,7 +274,7 @@ mod tests {
270274
assert_plan(
271275
"SELECT * FROM users a JOIN repos b ON a.id = b.owner_id WHERE a.name = 'test'",
272276
vec![
273-
"Projection: (a.email, a.id, b.id, a.name, b.name, b.owner_id)",
277+
"Projection: (a.id, a.name, a.email, b.id, b.name, b.owner_id)",
274278
" Filter: a.name = Utf8('test')",
275279
" Inner Join: Filter: a.id = b.owner_id",
276280
" SubqueryAlias: a",
@@ -286,7 +290,7 @@ mod tests {
286290
assert_plan(
287291
"SELECT * FROM users a JOIN repos b ON a.id = b.owner_id AND a.name = b.name",
288292
vec![
289-
"Projection: (a.email, a.id, b.id, a.name, b.name, b.owner_id)",
293+
"Projection: (a.id, a.name, a.email, b.id, b.name, b.owner_id)",
290294
" Inner Join: Filter: a.id = b.owner_id AND a.name = b.name",
291295
" SubqueryAlias: a",
292296
" TableScan: users",

0 commit comments

Comments
 (0)