Skip to content

Commit ca2adc8

Browse files
committed
Fix windows issue and other touchups
Signed-off-by: Adam Gutglick <[email protected]>
1 parent 29bd646 commit ca2adc8

File tree

3 files changed

+35
-46
lines changed

3 files changed

+35
-46
lines changed

vortex-datafusion/src/persistent/mod.rs

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ mod tests {
6666
use vortex::array::arrays::VarBinArray;
6767
use vortex::array::validity::Validity;
6868
use vortex::buffer::buffer;
69-
use vortex::error::vortex_err;
7069
use vortex::file::WriteOptionsSessionExt;
7170
use vortex::session::VortexSession;
7271

@@ -75,10 +74,8 @@ mod tests {
7574
use crate::persistent::register_vortex_format_factory;
7675

7776
#[rstest]
78-
#[case(Some(1))]
79-
#[case(None)]
8077
#[tokio::test]
81-
async fn query_file(#[case] limit: Option<usize>) -> anyhow::Result<()> {
78+
async fn test_query_file(#[values(Some(1), None)] limit: Option<usize>) -> anyhow::Result<()> {
8279
let session = VortexSession::default();
8380
let temp_dir = tempdir()?;
8481
let strings = ChunkedArray::from_iter([
@@ -100,28 +97,26 @@ mod tests {
10097
Validity::NonNullable,
10198
)?;
10299

103-
let filepath = temp_dir.path().join("data.vortex");
104-
105100
let mut f = OpenOptions::new()
106101
.write(true)
107-
.create(true)
108-
.truncate(true)
109-
.open(&filepath)
102+
.create_new(true)
103+
.open(temp_dir.path().join("data.vortex"))
110104
.await?;
111105

112-
session
106+
let summary = session
113107
.write_options()
114108
.write(&mut f, st.to_array_stream())
115109
.await?;
116110

111+
assert_eq!(summary.row_count(), 8);
112+
113+
// For reasons I don't understand, this is suddenly important on windows
114+
drop(f);
115+
117116
let ctx = SessionContext::default();
118117
let format = Arc::new(VortexFormat::new(session));
119-
let table_url = ListingTableUrl::parse(
120-
temp_dir
121-
.path()
122-
.to_str()
123-
.ok_or_else(|| vortex_err!("Path is not valid UTF-8"))?,
124-
)?;
118+
119+
let table_url = ListingTableUrl::parse(temp_dir.path().to_str().unwrap())?;
125120
assert!(table_url.is_collection());
126121

127122
let config = ListingTableConfig::new(table_url)

vortex-datafusion/src/persistent/opener.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ impl FileOpener for VortexOpener {
335335
})
336336
.try_flatten()
337337
.map(move |batch| {
338-
if projector.projection().column_indices().is_empty() {
338+
if projector.projection().as_ref().is_empty() {
339339
batch
340340
} else {
341341
batch.and_then(|b| projector.project_batch(&b))

vortex-datafusion/src/persistent/sink.rs

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,12 @@
33

44
use std::any::Any;
55
use std::sync::Arc;
6-
use std::sync::atomic::AtomicU64;
7-
use std::sync::atomic::Ordering;
86

97
use arrow_schema::SchemaRef;
108
use async_trait::async_trait;
119
use datafusion_common::DataFusionError;
1210
use datafusion_common::Result as DFResult;
11+
use datafusion_common::exec_datafusion_err;
1312
use datafusion_common_runtime::JoinSet;
1413
use datafusion_common_runtime::SpawnedTask;
1514
use datafusion_datasource::file_sink_config::FileSink;
@@ -33,6 +32,7 @@ use vortex::dtype::DType;
3332
use vortex::dtype::arrow::FromArrowType;
3433
use vortex::error::VortexResult;
3534
use vortex::file::WriteOptionsSessionExt;
35+
use vortex::file::WriteSummary;
3636
use vortex::io::ObjectStoreWriter;
3737
use vortex::io::VortexWrite;
3838
use vortex::session::VortexSession;
@@ -108,58 +108,52 @@ impl FileSink for VortexSink {
108108
mut file_stream_rx: DemuxedStreamReceiver,
109109
object_store: Arc<dyn ObjectStore>,
110110
) -> DFResult<u64> {
111-
// This is a hack
112-
let row_counter = Arc::new(AtomicU64::new(0));
113-
114-
let mut file_write_tasks: JoinSet<DFResult<Path>> = JoinSet::new();
111+
let mut file_write_tasks: JoinSet<DFResult<(Path, WriteSummary)>> = JoinSet::new();
115112

116113
// TODO(adamg):
117114
// 1. We can probably be better at signaling how much memory we're consuming (potentially when reading too), see ParquetSink::spawn_writer_tasks_and_join.
118115
while let Some((path, rx)) = file_stream_rx.recv().await {
119116
let session = self.session.clone();
120-
let row_counter = row_counter.clone();
121117
let object_store = object_store.clone();
122118
let writer_schema = get_writer_schema(&self.config);
123119
let dtype = DType::from_arrow(writer_schema);
124120

125121
// We need to spawn work because there's a dependency between the different files. If one file has too many batches buffered,
126122
// the demux task might deadlock itself.
127123
file_write_tasks.spawn(async move {
128-
let stream = ReceiverStream::new(rx).map(move |rb| {
129-
row_counter.fetch_add(rb.num_rows() as u64, Ordering::Relaxed);
130-
VortexResult::Ok(ArrayRef::from_arrow(rb, false))
131-
});
124+
let stream = ReceiverStream::new(rx)
125+
.map(move |rb| VortexResult::Ok(ArrayRef::from_arrow(rb, false)));
132126

133127
let stream_adapter = ArrayStreamAdapter::new(dtype, stream);
134128

135-
let mut sink = ObjectStoreWriter::new(object_store.clone(), &path)
129+
let mut object_writer = ObjectStoreWriter::new(object_store, &path)
136130
.await
137-
.map_err(|e| {
138-
DataFusionError::Execution(format!(
139-
"Failed to create ObjectStoreWriter: {e}"
140-
))
141-
})?;
131+
.map_err(|e| exec_datafusion_err!("Failed to create ObjectStoreWriter: {e}"))?;
142132

143-
session
133+
let summary = session
144134
.write_options()
145-
.write(&mut sink, stream_adapter)
135+
.write(&mut object_writer, stream_adapter)
146136
.await
147-
.map_err(|e| {
148-
DataFusionError::Execution(format!("Failed to write Vortex file: {e}"))
149-
})?;
137+
.map_err(|e| exec_datafusion_err!("Failed to write Vortex file: {e}"))?;
150138

151-
sink.shutdown().await.map_err(|e| {
152-
DataFusionError::Execution(format!("Failed to shutdown Vortex writer: {e}"))
153-
})?;
139+
object_writer
140+
.shutdown()
141+
.await
142+
.map_err(|e| exec_datafusion_err!("Failed to shutdown Vortex writer: {e}"))?;
154143

155-
Ok(path)
144+
Ok((path, summary))
156145
});
157146
}
158147

148+
let mut row_count = 0;
149+
159150
while let Some(result) = file_write_tasks.join_next().await {
160151
match result {
161-
Ok(path) => {
162-
let path = path?;
152+
Ok(r) => {
153+
let (path, summary) = r?;
154+
155+
row_count += summary.row_count();
156+
163157
tracing::info!(path = %path, "Successfully written file");
164158
}
165159
Err(e) => {
@@ -177,7 +171,7 @@ impl FileSink for VortexSink {
177171
.await
178172
.map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??;
179173

180-
Ok(row_counter.load(Ordering::SeqCst))
174+
Ok(row_count)
181175
}
182176
}
183177

0 commit comments

Comments
 (0)