Skip to content

Commit 3d84543

Browse files
authored
feat: skip reading empty files when load/query location. (#17522)
feat: skip reading empty files.
1 parent bd277c2 commit 3d84543

File tree

6 files changed

+53
-2
lines changed

6 files changed

+53
-2
lines changed

src/query/storages/parquet/src/parquet_rs/copy_into_table/table.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ impl ParquetTableForCopy {
6060
);
6161
let file_infos = files
6262
.iter()
63+
.filter(|f| f.size > 0)
6364
.map(|f| (f.path.clone(), f.size))
6465
.collect::<Vec<_>>();
6566
let total_size = file_infos.iter().map(|(_, size)| *size as usize).sum();

src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ impl ParquetRSTable {
5656
match &self.files_to_read {
5757
Some(files) => files
5858
.iter()
59+
.filter(|f| f.size > 0)
5960
.map(|f| (f.path.clone(), f.size))
6061
.collect::<Vec<_>>(),
6162
None => self
@@ -75,7 +76,7 @@ impl ParquetRSTable {
7576
};
7677

7778
// If a file size is less than `parquet_fast_read_bytes`,
78-
// we treat it as a small file and it will be totally loaded into memory.
79+
// we treat it as a small file, and it will be totally loaded into memory.
7980
let fast_read_bytes = ctx.get_settings().get_parquet_fast_read_bytes()?;
8081
let mut large_files = vec![];
8182
let mut large_file_indices = vec![];
@@ -85,7 +86,7 @@ impl ParquetRSTable {
8586
if size > fast_read_bytes {
8687
large_files.push((location, size));
8788
large_file_indices.push(index);
88-
} else {
89+
} else if size > 0 {
8990
small_files.push((location, size));
9091
small_file_indices.push(index);
9192
}

src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ impl Table for ParquetRSTable {
291291
let file_locations = match &self.files_to_read {
292292
Some(files) => files
293293
.iter()
294+
.filter(|f| f.size > 0)
294295
.map(|f| (f.path.clone(), f.size))
295296
.collect::<Vec<_>>(),
296297
None => self

src/query/storages/stage/src/stage_table.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ impl StageTable {
109109

110110
let partitions = files
111111
.into_iter()
112+
.filter(|f| f.size > 0)
112113
.map(|v| {
113114
let part = SingleFilePartition {
114115
path: v.path.clone(),

tests/data/empty.txt

Whitespace-only changes.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
query ok
2+
select $1 from @data (files=>( 'empty.txt'), file_format=>'csv')
3+
----
4+
5+
query ok
6+
select $1 from @data (files=>('csv/it.csv', 'empty.txt'), file_format=>'csv')
7+
----
8+
1
9+
2
10+
11+
query ok
12+
select $1 from @data (files=>('parquet/ii/f2.parquet', 'empty.txt'), file_format=>'parquet');
13+
----
14+
3
15+
4
16+
17+
statement ok
18+
create or replace table t (c1 int, id string);
19+
20+
query ok
21+
copy into t(c1) from @data files=('parquet/ii/f2.parquet', 'empty.txt') file_format=(type='parquet')
22+
----
23+
parquet/ii/f2.parquet 2 0 NULL NULL
24+
25+
query ok
26+
select * from t
27+
----
28+
3 NULL
29+
4 NULL
30+
31+
query ok
32+
copy into t from @data files=('csv/it.csv', 'empty.txt') file_format=(type='csv')
33+
----
34+
csv/it.csv 2 0 NULL NULL
35+
36+
query ok
37+
select * from t order by c1
38+
----
39+
1 b
40+
2 d
41+
3 NULL
42+
4 NULL
43+
44+
query ok
45+
select name from list_stage(location=>'@data', pattern=>'emp.*')
46+
----
47+
empty.txt

0 commit comments

Comments
 (0)