@@ -10,6 +10,8 @@ use crate::pipeline::Source;
1010use crate :: pipeline:: Step ;
1111use crate :: pipeline:: VecRecordBatchReader ;
1212use crate :: pipeline:: WriteArgs ;
13+ use crate :: pipeline:: batch_write:: BatchWriteSink ;
14+ use crate :: pipeline:: batch_write:: write_record_batches_with_sink;
1315
1416/// Pipeline step that reads a CSV file and produces a record batch reader.
1517/// Uses DataFusion for schema inference and type detection.
@@ -47,21 +49,43 @@ pub struct WriteCsvStep {
4749/// Result of successfully writing a CSV file.
4850pub struct WriteCsvResult { }
4951
52+ /// Write record batches from a reader to a CSV file.
53+ pub fn write_record_batches ( path : & str , reader : & mut dyn RecordBatchReader ) -> Result < ( ) > {
54+ write_record_batches_with_sink ( path, reader, CsvSink :: new)
55+ }
56+
57+ struct CsvSink {
58+ writer : arrow:: csv:: Writer < std:: fs:: File > ,
59+ }
60+
61+ impl CsvSink {
62+ fn new ( path : & str , _schema : arrow:: datatypes:: SchemaRef ) -> Result < Self > {
63+ let file = std:: fs:: File :: create ( path) . map_err ( Error :: IoError ) ?;
64+ Ok ( Self {
65+ writer : arrow:: csv:: Writer :: new ( file) ,
66+ } )
67+ }
68+ }
69+
70+ impl BatchWriteSink for CsvSink {
71+ fn write_batch ( & mut self , batch : & arrow:: record_batch:: RecordBatch ) -> Result < ( ) > {
72+ self . writer . write ( batch) . map_err ( Error :: ArrowError )
73+ }
74+
75+ fn finish ( self ) -> Result < ( ) > {
76+ Ok ( ( ) )
77+ }
78+ }
79+
5080#[ async_trait( ?Send ) ]
5181impl Step for WriteCsvStep {
5282 type Input = ( ) ;
5383 type Output = WriteCsvResult ;
5484
5585 async fn execute ( self , _input : Self :: Input ) -> Result < Self :: Output > {
56- let path = self . args . path . as_str ( ) ;
57- let file = std:: fs:: File :: create ( path) . map_err ( Error :: IoError ) ?;
58- let mut writer = arrow:: csv:: Writer :: new ( file) ;
5986 let mut source = self . source ;
60- let reader = source. get ( ) ?;
61- for batch in reader {
62- let batch = batch. map_err ( Error :: ArrowError ) ?;
63- writer. write ( & batch) . map_err ( Error :: ArrowError ) ?;
64- }
87+ let mut reader = source. get ( ) ?;
88+ write_record_batches ( self . args . path . as_str ( ) , & mut * reader) ?;
6589 Ok ( WriteCsvResult { } )
6690 }
6791}
0 commit comments