@@ -39,12 +39,20 @@ pub trait FileMetadata: Any + Send + Sync {
3939 /// Returns the file metadata as [`Any`] so that it can be downcasted to a specific
4040 /// implementation.
4141 fn as_any ( & self ) -> & dyn Any ;
42+
43+ /// Returns the size of the metadata in bytes.
44+ fn memory_size ( & self ) -> usize ;
4245}
4346
4447/// Cache to store file-embedded metadata.
4548pub trait FileMetadataCache :
4649 CacheAccessor < ObjectMeta , Arc < dyn FileMetadata > , Extra = ObjectMeta >
4750{
51+ // Returns the cache's memory limit in bytes.
52+ fn cache_limit ( & self ) -> usize ;
53+
54+ // Updates the cache with a new memory limit in bytes.
55+ fn update_cache_limit ( & self , limit : usize ) ;
4856}
4957
5058impl Debug for dyn CacheAccessor < Path , Arc < Statistics > , Extra = ObjectMeta > {
@@ -65,30 +73,36 @@ impl Debug for dyn FileMetadataCache {
6573 }
6674}
6775
68- #[ derive( Default , Debug ) ]
76+ #[ derive( Debug ) ]
6977pub struct CacheManager {
7078 file_statistic_cache : Option < FileStatisticsCache > ,
7179 list_files_cache : Option < ListFilesCache > ,
72- file_metadata_cache : Option < Arc < dyn FileMetadataCache > > ,
80+ file_metadata_cache : Arc < dyn FileMetadataCache > ,
7381}
7482
7583impl CacheManager {
7684 pub fn try_new ( config : & CacheManagerConfig ) -> Result < Arc < Self > > {
77- let mut manager = CacheManager :: default ( ) ;
78- if let Some ( cc) = & config. table_files_statistics_cache {
79- manager. file_statistic_cache = Some ( Arc :: clone ( cc) )
80- }
81- if let Some ( lc) = & config. list_files_cache {
82- manager. list_files_cache = Some ( Arc :: clone ( lc) )
83- }
84- if let Some ( mc) = & config. file_metadata_cache {
85- manager. file_metadata_cache = Some ( Arc :: clone ( mc) ) ;
86- } else {
87- manager. file_metadata_cache =
88- Some ( Arc :: new ( DefaultFilesMetadataCache :: default ( ) ) ) ;
89- }
90-
91- Ok ( Arc :: new ( manager) )
85+ let file_statistic_cache =
86+ config. table_files_statistics_cache . as_ref ( ) . map ( Arc :: clone) ;
87+
88+ let list_files_cache = config. list_files_cache . as_ref ( ) . map ( Arc :: clone) ;
89+
90+ let file_metadata_cache = config
91+ . file_metadata_cache
92+ . as_ref ( )
93+ . map ( Arc :: clone)
94+ . unwrap_or_else ( || {
95+ Arc :: new ( DefaultFilesMetadataCache :: new ( config. metadata_cache_limit ) )
96+ } ) ;
97+
98+ // the cache memory limit might have changed, ensure the limit is updated
99+ file_metadata_cache. update_cache_limit ( config. metadata_cache_limit ) ;
100+
101+ Ok ( Arc :: new ( CacheManager {
102+ file_statistic_cache,
103+ list_files_cache,
104+ file_metadata_cache,
105+ } ) )
92106 }
93107
94108 /// Get the cache of listing files statistics.
@@ -102,12 +116,19 @@ impl CacheManager {
102116 }
103117
104118 /// Get the file embedded metadata cache.
105- pub fn get_file_metadata_cache ( & self ) -> Option < Arc < dyn FileMetadataCache > > {
106- self . file_metadata_cache . clone ( )
119+ pub fn get_file_metadata_cache ( & self ) -> Arc < dyn FileMetadataCache > {
120+ Arc :: clone ( & self . file_metadata_cache )
121+ }
122+
123+ /// Get the limit of the file embedded metadata cache.
124+ pub fn get_metadata_cache_limit ( & self ) -> usize {
125+ self . file_metadata_cache . cache_limit ( )
107126 }
108127}
109128
110- #[ derive( Clone , Default ) ]
129+ const DEFAULT_METADATA_CACHE_LIMIT : usize = 50 * 1024 * 1024 ; // 50M
130+
131+ #[ derive( Clone ) ]
111132pub struct CacheManagerConfig {
112133 /// Enable cache of files statistics when listing files.
113134 /// Avoid get same file statistics repeatedly in same datafusion session.
@@ -124,6 +145,19 @@ pub struct CacheManagerConfig {
124145 /// data file (e.g., Parquet footer and page metadata).
125146 /// If not provided, the [`CacheManager`] will create a [`DefaultFilesMetadataCache`].
126147 pub file_metadata_cache : Option < Arc < dyn FileMetadataCache > > ,
148+ /// Limit of the file-embedded metadata cache, in bytes.
149+ pub metadata_cache_limit : usize ,
150+ }
151+
152+ impl Default for CacheManagerConfig {
153+ fn default ( ) -> Self {
154+ Self {
155+ table_files_statistics_cache : Default :: default ( ) ,
156+ list_files_cache : Default :: default ( ) ,
157+ file_metadata_cache : Default :: default ( ) ,
158+ metadata_cache_limit : DEFAULT_METADATA_CACHE_LIMIT ,
159+ }
160+ }
127161}
128162
129163impl CacheManagerConfig {
@@ -147,4 +181,9 @@ impl CacheManagerConfig {
147181 self . file_metadata_cache = cache;
148182 self
149183 }
184+
185+ pub fn with_metadata_cache_limit ( mut self , limit : usize ) -> Self {
186+ self . metadata_cache_limit = limit;
187+ self
188+ }
150189}
0 commit comments