1- use anyhow:: Result ;
21use async_trait:: async_trait;
32use datafusion:: arrow:: datatypes:: SchemaRef ;
43use datafusion:: catalog:: Session ;
@@ -10,9 +9,8 @@ use std::any::Any;
109use std:: sync:: Arc ;
1110
1211use crate :: catalog:: CatalogManager ;
13- use crate :: datafetch:: DataFetcher ;
12+ use crate :: datafetch:: FetchOrchestrator ;
1413use crate :: source:: Source ;
15- use crate :: storage:: StorageManager ;
1614
1715/// A lazy table provider that defers data fetching until scan() is called.
1816///
@@ -22,33 +20,29 @@ use crate::storage::StorageManager;
2220#[ derive( Debug ) ]
2321pub struct LazyTableProvider {
2422 schema : SchemaRef ,
25- fetcher : Arc < dyn DataFetcher > ,
2623 source : Arc < Source > ,
2724 catalog : Arc < dyn CatalogManager > ,
28- storage : Arc < dyn StorageManager > ,
25+ orchestrator : Arc < FetchOrchestrator > ,
2926 connection_id : i32 ,
3027 schema_name : String ,
3128 table_name : String ,
3229}
3330
3431impl LazyTableProvider {
35- #[ allow( clippy:: too_many_arguments) ]
3632 pub fn new (
3733 schema : SchemaRef ,
38- fetcher : Arc < dyn DataFetcher > ,
3934 source : Arc < Source > ,
4035 catalog : Arc < dyn CatalogManager > ,
41- storage : Arc < dyn StorageManager > ,
36+ orchestrator : Arc < FetchOrchestrator > ,
4237 connection_id : i32 ,
4338 schema_name : String ,
4439 table_name : String ,
4540 ) -> Self {
4641 Self {
4742 schema,
48- fetcher,
4943 source,
5044 catalog,
51- storage ,
45+ orchestrator ,
5246 connection_id,
5347 schema_name,
5448 table_name,
@@ -96,61 +90,15 @@ impl LazyTableProvider {
9690
9791 /// Fetch the table data and update catalog
9892 async fn fetch_and_cache ( & self ) -> Result < String , DataFusionError > {
99- use crate :: datafetch:: native:: StreamingParquetWriter ;
100-
101- // Prepare cache write location
102- let write_path = self . storage . prepare_cache_write (
103- self . connection_id ,
104- & self . schema_name ,
105- & self . table_name ,
106- ) ;
107-
108- // Create writer
109- let mut writer = StreamingParquetWriter :: new ( write_path. clone ( ) ) ;
110-
111- // Fetch the table data into writer using the Source directly
112- self . fetcher
113- . fetch_table (
93+ self . orchestrator
94+ . cache_table (
11495 & self . source ,
115- None , // catalog
116- & self . schema_name ,
117- & self . table_name ,
118- & mut writer,
119- )
120- . await
121- . map_err ( |e| {
122- DataFusionError :: External ( format ! ( "Failed to fetch table: {}" , e) . into ( ) )
123- } ) ?;
124-
125- // Close writer
126- writer. close ( ) . map_err ( |e| {
127- DataFusionError :: External ( format ! ( "Failed to close writer: {}" , e) . into ( ) )
128- } ) ?;
129-
130- // Finalize cache write (uploads to S3 if needed, returns URL)
131- let parquet_url = self
132- . storage
133- . finalize_cache_write (
134- & write_path,
13596 self . connection_id ,
13697 & self . schema_name ,
13798 & self . table_name ,
13899 )
139100 . await
140- . map_err ( |e| {
141- DataFusionError :: External ( format ! ( "Failed to finalize cache write: {}" , e) . into ( ) )
142- } ) ?;
143-
144- // Update catalog with new path
145- if let Ok ( Some ( info) ) = self
146- . catalog
147- . get_table ( self . connection_id , & self . schema_name , & self . table_name )
148- . await
149- {
150- let _ = self . catalog . update_table_sync ( info. id , & parquet_url) . await ;
151- }
152-
153- Ok ( parquet_url)
101+ . map_err ( |e| DataFusionError :: External ( format ! ( "Failed to cache table: {}" , e) . into ( ) ) )
154102 }
155103}
156104
0 commit comments