@@ -19,6 +19,7 @@ use pg_bigdecimal::PgNumeric;
1919use postgres:: error:: SqlState ;
2020use postgres:: types:: { Kind , Type as PgType , FromSql } ;
2121use postgres:: { self , Client , RowIter , Row , Column , Statement , NoTls } ;
22+ use postgres:: binary_copy:: { BinaryCopyOutIter , BinaryCopyOutRow } ;
2223use postgres:: fallible_iterator:: FallibleIterator ;
2324use parquet:: schema:: types:: { Type as ParquetType , TypePtr , GroupTypeBuilder } ;
2425
@@ -31,7 +32,7 @@ use crate::datatypes::money::PgMoney;
3132use crate :: datatypes:: numeric:: { new_decimal_bytes_appender, new_decimal_int_appender} ;
3233use crate :: myfrom:: { MyFrom , self } ;
3334use crate :: parquet_writer:: { WriterStats , ParquetRowWriter , WriterSettings } ;
34- use crate :: pg_custom_types:: { PgEnum , PgRawRange , PgAbstractRow , PgRawRecord , PgAny , PgAnyRef , UnclonableHack } ;
35+ use crate :: pg_custom_types:: { PgAbstractRow , PgAny , PgEnum , PgRawRecord , PgRawRange , PgAnyRef , UnclonableHack } ;
3536
3637type ResolvedColumn < TRow > = ( DynColumnAppender < TRow > , ParquetType ) ;
3738
@@ -216,33 +217,93 @@ fn pg_connect(args: &PostgresConnArgs) -> Result<Client, String> {
216217 Ok ( client)
217218}
218219
219- pub fn execute_copy ( pg_args : & PostgresConnArgs , query : & str , output_file : & PathBuf , output_props : WriterPropertiesPtr , quiet : bool , schema_settings : & SchemaSettings ) -> Result < WriterStats , String > {
220-
220+ pub fn execute_copy_query ( pg_args : & PostgresConnArgs , query : & str , output_file : & PathBuf , output_props : WriterPropertiesPtr , quiet : bool , schema_settings : & SchemaSettings ) -> Result < WriterStats , String > {
221221 let mut client = pg_connect ( pg_args) ?;
222+
222223 let statement = client. prepare ( query) . map_err ( |db_err| { db_err. to_string ( ) } ) ?;
224+ let ( row_appender, schema) = map_schema_root :: < Row > ( statement. columns ( ) , schema_settings) ?;
225+
226+ execute_with_writer ( output_file, output_props, quiet, schema, row_appender, |row_writer| {
227+ let rows: RowIter = client. query_raw :: < Statement , & i32 , & [ i32 ] > ( & statement, & [ ] )
228+ . map_err ( |err| format ! ( "Failed to execute the SQL query: {}" , err) ) ?;
229+ for row in rows. iterator ( ) {
230+ let row = row. map_err ( |err| err. to_string ( ) ) ?;
231+ let row = Arc :: new ( row) ;
232+ row_writer. write_row ( row) ?;
233+ }
234+ Ok ( ( ) )
235+ } )
236+ }
223237
224- let ( row_appender, schema) = map_schema_root ( statement. columns ( ) , schema_settings) ?;
238+ pub fn execute_copy_table ( pg_args : & PostgresConnArgs , table_name : & str , output_file : & PathBuf , output_props : WriterPropertiesPtr , quiet : bool , schema_settings : & SchemaSettings ) -> Result < WriterStats , String > {
239+ let mut client = pg_connect ( pg_args) ?;
240+
241+ if !quiet {
242+ println ! ( "Copying from table {} to {} using COPY with binary format..." , table_name, output_file. display( ) ) ;
243+ }
244+
245+ // Get the table schema using a LIMIT 0 query
246+ let schema_query = format ! ( "SELECT * FROM {} LIMIT 0" , table_name) ;
247+ let statement = client. prepare ( & schema_query)
248+ . map_err ( |err| format ! ( "Failed to prepare schema query: {}" , err) ) ?;
249+
250+ let ( row_appender, schema) = map_schema_root :: < Arc < BinaryCopyOutRow > > ( statement. columns ( ) , schema_settings) ?;
251+
252+ execute_with_writer ( output_file, output_props, quiet, schema, row_appender, |row_writer| {
253+ // Execute COPY TO STDOUT with binary format
254+ let copy_query = format ! ( "COPY {} TO STDOUT (FORMAT BINARY)" , table_name) ;
255+ let copy_reader = client. copy_out ( & copy_query)
256+ . map_err ( |err| format ! ( "Failed to execute COPY command: {}" , err) ) ?;
257+
258+ // Get column types for the binary copy reader
259+ let column_types: Vec < postgres:: types:: Type > = statement. columns ( )
260+ . iter ( )
261+ . map ( |col| col. type_ ( ) . clone ( ) )
262+ . collect ( ) ;
263+
264+ let mut binary_iter = BinaryCopyOutIter :: new ( copy_reader, & column_types) ;
265+
266+ // Process each binary row
267+ while let Some ( binary_row) = binary_iter. next ( )
268+ . map_err ( |err| format ! ( "Failed to read binary row: {}" , err) ) ? {
269+
270+ // Wrap in Arc for the generic system
271+ let row = Arc :: new ( Arc :: new ( binary_row) ) ;
272+ row_writer. write_row ( row) ?;
273+ }
274+ Ok ( ( ) )
275+ } )
276+ }
277+
278+ fn execute_with_writer < T : PgAbstractRow + Clone , F > (
279+ output_file : & PathBuf ,
280+ output_props : WriterPropertiesPtr ,
281+ quiet : bool ,
282+ schema : ParquetType ,
283+ row_appender : DynColumnAppender < Arc < T > > ,
284+ data_processor : F
285+ ) -> Result < WriterStats , String >
286+ where
287+ F : FnOnce ( & mut ParquetRowWriter < std:: fs:: File , T > ) -> Result < ( ) , String >
288+ {
225289 if !quiet {
226290 eprintln ! ( "Schema: {}" , format_schema( & schema, 0 ) ) ;
227291 }
228292 let schema = Arc :: new ( schema) ;
229293
230- let settings = WriterSettings { row_group_byte_limit : 500 * 1024 * 1024 , row_group_row_limit : output_props. max_row_group_size ( ) } ;
294+ let settings = WriterSettings {
295+ row_group_byte_limit : 500 * 1024 * 1024 ,
296+ row_group_row_limit : output_props. max_row_group_size ( )
297+ } ;
231298
232- let output_file_f = std:: fs:: File :: create ( output_file) . unwrap ( ) ;
299+ let output_file_f = std:: fs:: File :: create ( output_file)
300+ . map_err ( |e| format ! ( "Failed to create output file: {}" , e) ) ?;
233301 let pq_writer = SerializedFileWriter :: new ( output_file_f, schema. clone ( ) , output_props)
234302 . map_err ( |e| format ! ( "Failed to create parquet writer: {}" , e) ) ?;
235303 let mut row_writer = ParquetRowWriter :: new ( pq_writer, schema. clone ( ) , row_appender, quiet, settings)
236304 . map_err ( |e| format ! ( "Failed to create row writer: {}" , e) ) ?;
237305
238- let rows: RowIter = client. query_raw :: < Statement , & i32 , & [ i32 ] > ( & statement, & [ ] )
239- . map_err ( |err| format ! ( "Failed to execute the SQL query: {}" , err) ) ?;
240- for row in rows. iterator ( ) {
241- let row = row. map_err ( |err| err. to_string ( ) ) ?;
242- let row = Arc :: new ( row) ;
243-
244- row_writer. write_row ( row) ?;
245- }
306+ data_processor ( & mut row_writer) ?;
246307
247308 Ok ( row_writer. close ( ) ?)
248309}
@@ -329,8 +390,8 @@ fn count_columns(p: &ParquetType) -> usize {
329390}
330391
331392
332- fn map_schema_root < ' a > ( row : & [ Column ] , s : & SchemaSettings ) -> Result < ResolvedColumn < Arc < Row > > , String > {
333- let mut fields: Vec < ResolvedColumn < Arc < Row > > > = vec ! [ ] ;
393+ fn map_schema_root < TRow : PgAbstractRow + ' static > ( row : & [ Column ] , s : & SchemaSettings ) -> Result < ResolvedColumn < Arc < TRow > > , String > {
394+ let mut fields: Vec < ResolvedColumn < Arc < TRow > > > = vec ! [ ] ;
334395 for ( col_i, c) in row. iter ( ) . enumerate ( ) {
335396
336397 let t = c. type_ ( ) ;
@@ -342,7 +403,7 @@ fn map_schema_root<'a>(row: &[Column], s: &SchemaSettings) -> Result<ResolvedCol
342403
343404 let ( column_appenders, parquet_types) : ( Vec < _ > , Vec < _ > ) = fields. into_iter ( ) . unzip ( ) ;
344405
345- let merged_appender: DynColumnAppender < Arc < Row > > = Box :: new ( DynamicMergedAppender :: new ( column_appenders, 0 , 0 ) ) ;
406+ let merged_appender: DynColumnAppender < Arc < TRow > > = Box :: new ( DynamicMergedAppender :: new ( column_appenders, 0 , 0 ) ) ;
346407 let struct_type = ParquetType :: group_type_builder ( "root" )
347408 . with_fields ( parquet_types. into_iter ( ) . map ( Arc :: new) . collect ( ) )
348409 . build ( )
0 commit comments