@@ -26,20 +26,16 @@ use datafusion::datasource::TableProvider;
26
26
use datafusion:: prelude:: * ;
27
27
use itertools:: Itertools ;
28
28
use serde_json:: Value ;
29
- use std:: collections:: hash_map:: RandomState ;
30
- use std:: collections:: HashSet ;
31
- use std:: path:: { Path , PathBuf } ;
29
+ use std:: path:: Path ;
32
30
use std:: sync:: Arc ;
33
31
34
32
use crate :: option:: CONFIG ;
35
33
use crate :: storage:: ObjectStorageError ;
36
- use crate :: storage:: StorageDir ;
37
34
use crate :: storage:: { ObjectStorage , OBJECT_STORE_DATA_GRANULARITY } ;
38
35
use crate :: utils:: TimePeriod ;
39
36
use crate :: validator;
40
37
41
38
use self :: error:: { ExecuteError , ParseError } ;
42
- use table_provider:: QueryTableProvider ;
43
39
44
40
type Key = & ' static str ;
45
41
fn get_value ( value : & Value , key : Key ) -> Result < & str , Key > {
@@ -89,41 +85,18 @@ impl Query {
89
85
& self ,
90
86
storage : Arc < dyn ObjectStorage + Send > ,
91
87
) -> Result < ( Vec < RecordBatch > , Vec < String > ) , ExecuteError > {
92
- let dir = StorageDir :: new ( & self . stream_name ) ;
93
- // take a look at local dir and figure out what local cache we could use for this query
94
- let staging_arrows = dir
95
- . arrow_files_grouped_by_time ( )
96
- . into_iter ( )
97
- . filter ( |( path, _) | path_intersects_query ( path, self . start , self . end ) )
98
- . sorted_by ( |( a, _) , ( b, _) | Ord :: cmp ( a, b) )
99
- . collect_vec ( ) ;
100
-
101
- let staging_parquet_set: HashSet < & PathBuf , RandomState > =
102
- HashSet :: from_iter ( staging_arrows. iter ( ) . map ( |( p, _) | p) ) ;
103
-
104
- let other_staging_parquet = dir
105
- . parquet_files ( )
106
- . into_iter ( )
107
- . filter ( |path| path_intersects_query ( path, self . start , self . end ) )
108
- . filter ( |path| !staging_parquet_set. contains ( path) )
109
- . collect_vec ( ) ;
110
-
111
88
let ctx = SessionContext :: with_config_rt (
112
89
SessionConfig :: default ( ) ,
113
90
CONFIG . storage ( ) . get_datafusion_runtime ( ) ,
114
91
) ;
115
92
116
- let table = Arc :: new ( QueryTableProvider :: new (
117
- staging_arrows,
118
- other_staging_parquet,
119
- self . get_prefixes ( ) ,
120
- storage,
121
- Arc :: new ( self . get_schema ( ) . clone ( ) ) ,
122
- ) ) ;
93
+ let Some ( table) = storage. query_table ( self . get_prefixes ( ) , Arc :: new ( self . get_schema ( ) . clone ( ) ) ) ? else {
94
+ return Ok ( ( Vec :: new ( ) , Vec :: new ( ) ) ) ;
95
+ } ;
123
96
124
97
ctx. register_table (
125
98
& * self . stream_name ,
126
- Arc :: clone ( & table) as Arc < dyn TableProvider > ,
99
+ Arc :: new ( table) as Arc < dyn TableProvider > ,
127
100
)
128
101
. map_err ( ObjectStorageError :: DataFusionError ) ?;
129
102
// execute the query and collect results
@@ -144,11 +117,13 @@ impl Query {
144
117
}
145
118
}
146
119
120
+ #[ allow( unused) ]
147
121
fn path_intersects_query ( path : & Path , starttime : DateTime < Utc > , endtime : DateTime < Utc > ) -> bool {
148
122
let time = time_from_path ( path) ;
149
123
starttime <= time && time <= endtime
150
124
}
151
125
126
+ #[ allow( unused) ]
152
127
fn time_from_path ( path : & Path ) -> DateTime < Utc > {
153
128
let prefix = path
154
129
. file_name ( )
0 commit comments