@@ -39,12 +39,20 @@ pub trait FileMetadata: Any + Send + Sync {
39
39
/// Returns the file metadata as [`Any`] so that it can be downcasted to a specific
40
40
/// implementation.
41
41
fn as_any ( & self ) -> & dyn Any ;
42
+
43
+ /// Returns the size of the metadata in bytes.
44
+ fn memory_size ( & self ) -> usize ;
42
45
}
43
46
44
47
/// Cache to store file-embedded metadata.
45
48
pub trait FileMetadataCache :
46
49
CacheAccessor < ObjectMeta , Arc < dyn FileMetadata > , Extra = ObjectMeta >
47
50
{
51
+ // Returns the cache's memory limit in bytes.
52
+ fn cache_limit ( & self ) -> usize ;
53
+
54
+ // Updates the cache with a new memory limit in bytes.
55
+ fn update_cache_limit ( & self , limit : usize ) ;
48
56
}
49
57
50
58
impl Debug for dyn CacheAccessor < Path , Arc < Statistics > , Extra = ObjectMeta > {
@@ -65,30 +73,36 @@ impl Debug for dyn FileMetadataCache {
65
73
}
66
74
}
67
75
68
- #[ derive( Default , Debug ) ]
76
+ #[ derive( Debug ) ]
69
77
pub struct CacheManager {
70
78
file_statistic_cache : Option < FileStatisticsCache > ,
71
79
list_files_cache : Option < ListFilesCache > ,
72
- file_metadata_cache : Option < Arc < dyn FileMetadataCache > > ,
80
+ file_metadata_cache : Arc < dyn FileMetadataCache > ,
73
81
}
74
82
75
83
impl CacheManager {
76
84
pub fn try_new ( config : & CacheManagerConfig ) -> Result < Arc < Self > > {
77
- let mut manager = CacheManager :: default ( ) ;
78
- if let Some ( cc) = & config. table_files_statistics_cache {
79
- manager. file_statistic_cache = Some ( Arc :: clone ( cc) )
80
- }
81
- if let Some ( lc) = & config. list_files_cache {
82
- manager. list_files_cache = Some ( Arc :: clone ( lc) )
83
- }
84
- if let Some ( mc) = & config. file_metadata_cache {
85
- manager. file_metadata_cache = Some ( Arc :: clone ( mc) ) ;
86
- } else {
87
- manager. file_metadata_cache =
88
- Some ( Arc :: new ( DefaultFilesMetadataCache :: default ( ) ) ) ;
89
- }
90
-
91
- Ok ( Arc :: new ( manager) )
85
+ let file_statistic_cache =
86
+ config. table_files_statistics_cache . as_ref ( ) . map ( Arc :: clone) ;
87
+
88
+ let list_files_cache = config. list_files_cache . as_ref ( ) . map ( Arc :: clone) ;
89
+
90
+ let file_metadata_cache = config
91
+ . file_metadata_cache
92
+ . as_ref ( )
93
+ . map ( Arc :: clone)
94
+ . unwrap_or_else ( || {
95
+ Arc :: new ( DefaultFilesMetadataCache :: new ( config. metadata_cache_limit ) )
96
+ } ) ;
97
+
98
+ // the cache memory limit might have changed, ensure the limit is updated
99
+ file_metadata_cache. update_cache_limit ( config. metadata_cache_limit ) ;
100
+
101
+ Ok ( Arc :: new ( CacheManager {
102
+ file_statistic_cache,
103
+ list_files_cache,
104
+ file_metadata_cache,
105
+ } ) )
92
106
}
93
107
94
108
/// Get the cache of listing files statistics.
@@ -102,12 +116,19 @@ impl CacheManager {
102
116
}
103
117
104
118
/// Get the file embedded metadata cache.
105
- pub fn get_file_metadata_cache ( & self ) -> Option < Arc < dyn FileMetadataCache > > {
106
- self . file_metadata_cache . clone ( )
119
+ pub fn get_file_metadata_cache ( & self ) -> Arc < dyn FileMetadataCache > {
120
+ Arc :: clone ( & self . file_metadata_cache )
121
+ }
122
+
123
+ /// Get the limit of the file embedded metadata cache.
124
+ pub fn get_metadata_cache_limit ( & self ) -> usize {
125
+ self . file_metadata_cache . cache_limit ( )
107
126
}
108
127
}
109
128
110
- #[ derive( Clone , Default ) ]
129
+ const DEFAULT_METADATA_CACHE_LIMIT : usize = 50 * 1024 * 1024 ; // 50M
130
+
131
+ #[ derive( Clone ) ]
111
132
pub struct CacheManagerConfig {
112
133
/// Enable cache of files statistics when listing files.
113
134
/// Avoid get same file statistics repeatedly in same datafusion session.
@@ -124,6 +145,19 @@ pub struct CacheManagerConfig {
124
145
/// data file (e.g., Parquet footer and page metadata).
125
146
/// If not provided, the [`CacheManager`] will create a [`DefaultFilesMetadataCache`].
126
147
pub file_metadata_cache : Option < Arc < dyn FileMetadataCache > > ,
148
+ /// Limit of the file-embedded metadata cache, in bytes.
149
+ pub metadata_cache_limit : usize ,
150
+ }
151
+
152
+ impl Default for CacheManagerConfig {
153
+ fn default ( ) -> Self {
154
+ Self {
155
+ table_files_statistics_cache : Default :: default ( ) ,
156
+ list_files_cache : Default :: default ( ) ,
157
+ file_metadata_cache : Default :: default ( ) ,
158
+ metadata_cache_limit : DEFAULT_METADATA_CACHE_LIMIT ,
159
+ }
160
+ }
127
161
}
128
162
129
163
impl CacheManagerConfig {
@@ -147,4 +181,9 @@ impl CacheManagerConfig {
147
181
self . file_metadata_cache = cache;
148
182
self
149
183
}
184
+
185
+ pub fn with_metadata_cache_limit ( mut self , limit : usize ) -> Self {
186
+ self . metadata_cache_limit = limit;
187
+ self
188
+ }
150
189
}
0 commit comments