@@ -22,19 +22,22 @@ use std::{
22
22
atomic:: { AtomicU8 , Ordering } ,
23
23
Arc ,
24
24
} ,
25
+ time:: Duration ,
25
26
} ;
26
27
27
28
use async_trait:: async_trait;
29
+ use chrono:: Utc ;
28
30
use datafusion:: {
31
+ common:: instant:: Instant ,
29
32
error:: DataFusionError ,
30
33
execution:: object_store:: { DefaultObjectStoreRegistry , ObjectStoreRegistry } ,
31
34
} ;
32
35
use futures:: stream:: BoxStream ;
33
36
use object_store:: {
34
- path:: Path , GetOptions , GetResult , ListResult , MultipartUpload , ObjectMeta ,
37
+ path:: Path , GetOptions , GetRange , GetResult , ListResult , MultipartUpload , ObjectMeta ,
35
38
ObjectStore , PutMultipartOptions , PutOptions , PutPayload , PutResult , Result ,
36
39
} ;
37
- use parking_lot:: RwLock ;
40
+ use parking_lot:: { Mutex , RwLock } ;
38
41
use url:: Url ;
39
42
40
43
/// The profiling mode to use for an [`InstrumentedObjectStore`] instance. Collecting profiling
@@ -81,6 +84,7 @@ impl From<u8> for InstrumentedObjectStoreMode {
81
84
pub struct InstrumentedObjectStore {
82
85
inner : Arc < dyn ObjectStore > ,
83
86
instrument_mode : AtomicU8 ,
87
+ requests : Mutex < Vec < RequestDetails > > ,
84
88
}
85
89
86
90
impl InstrumentedObjectStore {
@@ -89,12 +93,46 @@ impl InstrumentedObjectStore {
89
93
Self {
90
94
inner : object_store,
91
95
instrument_mode,
96
+ requests : Mutex :: new ( Vec :: new ( ) ) ,
92
97
}
93
98
}
94
99
95
100
fn set_instrument_mode ( & self , mode : InstrumentedObjectStoreMode ) {
96
101
self . instrument_mode . store ( mode as u8 , Ordering :: Relaxed )
97
102
}
103
+
104
+ /// Returns all [`RequestDetails`] accumulated in this [`InstrumentedObjectStore`] and clears
105
+ /// the stored requests
106
+ pub fn take_requests ( & self ) -> Vec < RequestDetails > {
107
+ let mut req = self . requests . lock ( ) ;
108
+
109
+ req. drain ( ..) . collect ( )
110
+ }
111
+
112
+ async fn instrumented_get_opts (
113
+ & self ,
114
+ location : & Path ,
115
+ options : GetOptions ,
116
+ ) -> Result < GetResult > {
117
+ let timestamp = Utc :: now ( ) ;
118
+ let range = options. range . clone ( ) ;
119
+
120
+ let start = Instant :: now ( ) ;
121
+ let ret = self . inner . get_opts ( location, options) . await ?;
122
+ let elapsed = start. elapsed ( ) ;
123
+
124
+ self . requests . lock ( ) . push ( RequestDetails {
125
+ op : Operation :: Get ,
126
+ path : location. clone ( ) ,
127
+ timestamp,
128
+ duration : Some ( elapsed) ,
129
+ size : Some ( ( ret. range . end - ret. range . start ) as usize ) ,
130
+ range,
131
+ extra_display : None ,
132
+ } ) ;
133
+
134
+ Ok ( ret)
135
+ }
98
136
}
99
137
100
138
impl fmt:: Display for InstrumentedObjectStore {
@@ -129,6 +167,12 @@ impl ObjectStore for InstrumentedObjectStore {
129
167
}
130
168
131
169
async fn get_opts ( & self , location : & Path , options : GetOptions ) -> Result < GetResult > {
170
+ if self . instrument_mode . load ( Ordering :: Relaxed )
171
+ != InstrumentedObjectStoreMode :: Disabled as u8
172
+ {
173
+ return self . instrumented_get_opts ( location, options) . await ;
174
+ }
175
+
132
176
self . inner . get_opts ( location, options) . await
133
177
}
134
178
@@ -157,6 +201,55 @@ impl ObjectStore for InstrumentedObjectStore {
157
201
}
158
202
}
159
203
204
+ #[ derive( Copy , Clone , Debug , PartialEq , Eq , Hash ) ]
205
+ enum Operation {
206
+ _Copy,
207
+ _Delete,
208
+ Get ,
209
+ _Head,
210
+ _List,
211
+ _Put,
212
+ }
213
+
214
+ /// Holds profiling details about individual requests made through an [`InstrumentedObjectStore`]
215
+ #[ derive( Debug ) ]
216
+ pub struct RequestDetails {
217
+ op : Operation ,
218
+ path : Path ,
219
+ timestamp : chrono:: DateTime < Utc > ,
220
+ duration : Option < Duration > ,
221
+ size : Option < usize > ,
222
+ range : Option < GetRange > ,
223
+ extra_display : Option < String > ,
224
+ }
225
+
226
+ impl fmt:: Display for RequestDetails {
227
+ fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
228
+ let mut output_parts = vec ! [ format!(
229
+ "{} operation={:?}" ,
230
+ self . timestamp. to_rfc3339( ) ,
231
+ self . op
232
+ ) ] ;
233
+
234
+ if let Some ( d) = self . duration {
235
+ output_parts. push ( format ! ( "duration={:.6}s" , d. as_secs_f32( ) ) ) ;
236
+ }
237
+ if let Some ( s) = self . size {
238
+ output_parts. push ( format ! ( "size={s}" ) ) ;
239
+ }
240
+ if let Some ( r) = & self . range {
241
+ output_parts. push ( format ! ( "range: {r}" ) ) ;
242
+ }
243
+ output_parts. push ( format ! ( "path={}" , self . path) ) ;
244
+
245
+ if let Some ( ed) = & self . extra_display {
246
+ output_parts. push ( ed. clone ( ) ) ;
247
+ }
248
+
249
+ write ! ( f, "{}" , output_parts. join( " " ) )
250
+ }
251
+ }
252
+
160
253
/// Provides access to [`InstrumentedObjectStore`] instances that record requests for reporting
161
254
#[ derive( Debug ) ]
162
255
pub struct InstrumentedObjectStoreRegistry {
@@ -275,4 +368,56 @@ mod tests {
275
368
assert ! ( fetched. is_ok( ) ) ;
276
369
assert_eq ! ( reg. stores( ) . len( ) , 1 ) ;
277
370
}
371
+
372
+ #[ tokio:: test]
373
+ async fn instrumented_store ( ) {
374
+ let store = Arc :: new ( object_store:: memory:: InMemory :: new ( ) ) ;
375
+ let mode = AtomicU8 :: new ( InstrumentedObjectStoreMode :: default ( ) as u8 ) ;
376
+ let instrumented = InstrumentedObjectStore :: new ( store, mode) ;
377
+
378
+ // Load the test store with some data we can read
379
+ let path = Path :: from ( "test/data" ) ;
380
+ let payload = PutPayload :: from_static ( b"test_data" ) ;
381
+ instrumented. put ( & path, payload) . await . unwrap ( ) ;
382
+
383
+ // By default no requests should be instrumented/stored
384
+ assert ! ( instrumented. requests. lock( ) . is_empty( ) ) ;
385
+ let _ = instrumented. get ( & path) . await . unwrap ( ) ;
386
+ assert ! ( instrumented. requests. lock( ) . is_empty( ) ) ;
387
+
388
+ instrumented. set_instrument_mode ( InstrumentedObjectStoreMode :: Enabled ) ;
389
+ assert ! ( instrumented. requests. lock( ) . is_empty( ) ) ;
390
+ let _ = instrumented. get ( & path) . await . unwrap ( ) ;
391
+ assert_eq ! ( instrumented. requests. lock( ) . len( ) , 1 ) ;
392
+
393
+ let mut requests = instrumented. take_requests ( ) ;
394
+ assert_eq ! ( requests. len( ) , 1 ) ;
395
+ assert ! ( instrumented. requests. lock( ) . is_empty( ) ) ;
396
+
397
+ let request = requests. pop ( ) . unwrap ( ) ;
398
+ assert_eq ! ( request. op, Operation :: Get ) ;
399
+ assert_eq ! ( request. path, path) ;
400
+ assert ! ( request. duration. is_some( ) ) ;
401
+ assert_eq ! ( request. size, Some ( 9 ) ) ;
402
+ assert_eq ! ( request. range, None ) ;
403
+ assert ! ( request. extra_display. is_none( ) ) ;
404
+ }
405
+
406
+ #[ test]
407
+ fn request_details ( ) {
408
+ let rd = RequestDetails {
409
+ op : Operation :: Get ,
410
+ path : Path :: from ( "test" ) ,
411
+ timestamp : chrono:: DateTime :: from_timestamp ( 0 , 0 ) . unwrap ( ) ,
412
+ duration : Some ( Duration :: new ( 5 , 0 ) ) ,
413
+ size : Some ( 10 ) ,
414
+ range : Some ( ( ..10 ) . into ( ) ) ,
415
+ extra_display : Some ( String :: from ( "extra info" ) ) ,
416
+ } ;
417
+
418
+ assert_eq ! (
419
+ format!( "{rd}" ) ,
420
+ "1970-01-01T00:00:00+00:00 operation=Get duration=5.000000s size=10 range: bytes=0-9 path=test extra info"
421
+ ) ;
422
+ }
278
423
}
0 commit comments