Skip to content

Commit 32cf89b

Browse files
committed
refactor: exprimental parquet reader
1 parent 271ce7d commit 32cf89b

File tree

24 files changed

+2517
-22
lines changed

24 files changed

+2517
-22
lines changed

Cargo.toml

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ databend-common-meta-types = { path = "src/meta/types" }
161161
databend-common-metrics = { path = "src/common/metrics" }
162162
databend-common-native = { path = "src/common/native" }
163163
databend-common-openai = { path = "src/common/openai" }
164+
databend-common-parquet-reader-experimental = { path = "src/common/experimental_parquet_reader" }
164165
databend-common-pipeline-core = { path = "src/query/pipeline/core" }
165166
databend-common-pipeline-sinks = { path = "src/query/pipeline/sinks" }
166167
databend-common-pipeline-sources = { path = "src/query/pipeline/sources" }
@@ -232,16 +233,16 @@ anyerror = { version = "=0.1.13" }
232233
anyhow = { version = "1.0.65" }
233234
apache-avro = { version = "0.17.0", features = ["snappy", "zstandard", "xz", "snappy", "bzip"] }
234235
approx = "0.5.1"
235-
arrow = { version = "55" }
236-
arrow-array = { version = "55" }
237-
arrow-buffer = { version = "55" }
238-
arrow-cast = { version = "55", features = ["prettyprint"] }
239-
arrow-data = { version = "55" }
240-
arrow-flight = { version = "55", features = ["flight-sql-experimental", "tls"] }
241-
arrow-ipc = { version = "55" }
242-
arrow-ord = { version = "55" }
243-
arrow-schema = { version = "55", features = ["serde"] }
244-
arrow-select = { version = "55" }
236+
arrow = { version = "55.2" }
237+
arrow-array = { version = "55.2" }
238+
arrow-buffer = { version = "55.2" }
239+
arrow-cast = { version = "55.2", features = ["prettyprint"] }
240+
arrow-data = { version = "55.2" }
241+
arrow-flight = { version = "55.2", features = ["flight-sql-experimental", "tls"] }
242+
arrow-ipc = { version = "55.2" }
243+
arrow-ord = { version = "55.2" }
244+
arrow-schema = { version = "55.2", features = ["serde"] }
245+
arrow-select = { version = "55.2" }
245246
arrow-udf-runtime = { version = "0.8.0", default-features = false }
246247
async-backtrace = "0.2"
247248
async-channel = "1.7.1"

scripts/ci/deploy/config/databend-query-node-1.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ join_spilling_memory_ratio = 60
8484
[log]
8585

8686
[log.file]
87-
level = "DEBUG"
87+
level = "INFO"
8888
format = "text"
8989
dir = "./.databend/logs_1"
9090
limit = 12 # 12 files, 1 file per hour
@@ -153,6 +153,9 @@ data_path = "./.databend/stateless_test_data"
153153
# use "disk" to enabled disk cache
154154
data_cache_storage = "none"
155155

156+
157+
table_data_deserialized_data_bytes = 1073741824
158+
156159
[cache.disk]
157160
# cache path
158161
path = "./.databend/_cache"

src/common/column/src/binview/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,9 @@ impl<T: ViewType + ?Sized> Clone for BinaryViewColumnGeneric<T> {
133133
}
134134
}
135135

136-
unsafe impl<T: ViewType + ?Sized> Send for BinaryViewColumnGeneric<T> {}
136+
// impl<T: ViewType + ?Sized> Send for BinaryViewColumnGeneric<T> {}
137137

138-
unsafe impl<T: ViewType + ?Sized> Sync for BinaryViewColumnGeneric<T> {}
138+
// unsafe impl<T: ViewType + ?Sized> Sync for BinaryViewColumnGeneric<T> {}
139139

140140
impl<T: ViewType + ?Sized> BinaryViewColumnGeneric<T> {
141141
pub fn new_unchecked(
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
[package]
2+
name = "databend-common-parquet-reader-experimental"
3+
version = { workspace = true }
4+
authors = { workspace = true }
5+
license = { workspace = true }
6+
publish = { workspace = true }
7+
edition = { workspace = true }
8+
9+
[features]
10+
11+
[dependencies]
12+
databend-common-column = { workspace = true }
13+
databend-common-exception = { workspace = true }
14+
databend-common-expression = { workspace = true }
15+
databend-storages-common-table-meta = { workspace = true }
16+
17+
bytes = { workspace = true }
18+
lz4_flex = { version = "^0.9" }
19+
parquet2 = { version = "0.17.0", default-features = false, features = ["serde_types", "async", "zstd", "snappy", "lz4"] }
20+
parquet-format-safe = "0.2.0"
21+
parquet = { workspace = true, features = ["experimental"] }
22+
streaming-decompression = "0.1.2"
23+
zstd = { workspace = true }
24+
25+
[dev-dependencies]
26+
# used to test async readers
27+
28+
[package.metadata.cargo-machete]
29+
ignored = ["match-template"]
30+
31+
[lints]
32+
workspace = true

0 commit comments

Comments
 (0)