|
17 | 17 |
|
18 | 18 | //! Functions that are query-able and searchable via the `\h` command |
19 | 19 |
|
| 20 | +use datafusion_common::instant::Instant; |
20 | 21 | use std::fmt; |
21 | 22 | use std::fs::File; |
22 | 23 | use std::str::FromStr; |
23 | 24 | use std::sync::Arc; |
24 | 25 |
|
25 | | -use arrow::array::{Int64Array, StringArray, TimestampMillisecondArray, UInt64Array}; |
26 | | -use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; |
| 26 | +use arrow::array::{ |
| 27 | + DurationMillisecondArray, GenericListArray, Int64Array, StringArray, StructArray, |
| 28 | + TimestampMillisecondArray, UInt64Array, |
| 29 | +}; |
| 30 | +use arrow::buffer::{Buffer, OffsetBuffer, ScalarBuffer}; |
| 31 | +use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef, TimeUnit}; |
27 | 32 | use arrow::record_batch::RecordBatch; |
28 | 33 | use arrow::util::pretty::pretty_format_batches; |
29 | 34 | use datafusion::catalog::{Session, TableFunctionImpl}; |
@@ -697,3 +702,156 @@ impl TableFunctionImpl for StatisticsCacheFunc { |
697 | 702 | Ok(Arc::new(statistics_cache)) |
698 | 703 | } |
699 | 704 | } |
| 705 | + |
| 706 | +#[derive(Debug)] |
| 707 | +struct ListFilesCacheTable { |
| 708 | + schema: SchemaRef, |
| 709 | + batch: RecordBatch, |
| 710 | +} |
| 711 | + |
| 712 | +#[async_trait] |
| 713 | +impl TableProvider for ListFilesCacheTable { |
| 714 | + fn as_any(&self) -> &dyn std::any::Any { |
| 715 | + self |
| 716 | + } |
| 717 | + |
| 718 | + fn schema(&self) -> arrow::datatypes::SchemaRef { |
| 719 | + self.schema.clone() |
| 720 | + } |
| 721 | + |
| 722 | + fn table_type(&self) -> datafusion::logical_expr::TableType { |
| 723 | + datafusion::logical_expr::TableType::Base |
| 724 | + } |
| 725 | + |
| 726 | + async fn scan( |
| 727 | + &self, |
| 728 | + _state: &dyn Session, |
| 729 | + projection: Option<&Vec<usize>>, |
| 730 | + _filters: &[Expr], |
| 731 | + _limit: Option<usize>, |
| 732 | + ) -> Result<Arc<dyn ExecutionPlan>> { |
| 733 | + Ok(MemorySourceConfig::try_new_exec( |
| 734 | + &[vec![self.batch.clone()]], |
| 735 | + TableProvider::schema(self), |
| 736 | + projection.cloned(), |
| 737 | + )?) |
| 738 | + } |
| 739 | +} |
| 740 | + |
| 741 | +#[derive(Debug)] |
| 742 | +pub struct ListFilesCacheFunc { |
| 743 | + cache_manager: Arc<CacheManager>, |
| 744 | +} |
| 745 | + |
| 746 | +impl ListFilesCacheFunc { |
| 747 | + pub fn new(cache_manager: Arc<CacheManager>) -> Self { |
| 748 | + Self { cache_manager } |
| 749 | + } |
| 750 | +} |
| 751 | + |
| 752 | +impl TableFunctionImpl for ListFilesCacheFunc { |
| 753 | + fn call(&self, exprs: &[Expr]) -> Result<Arc<dyn TableProvider>> { |
| 754 | + if !exprs.is_empty() { |
| 755 | + return plan_err!("list_files_cache should have no arguments"); |
| 756 | + } |
| 757 | + |
| 758 | + let nested_fields = Fields::from(vec![ |
| 759 | + Field::new("file_path", DataType::Utf8, false), |
| 760 | + Field::new( |
| 761 | + "file_modified", |
| 762 | + DataType::Timestamp(TimeUnit::Millisecond, None), |
| 763 | + false, |
| 764 | + ), |
| 765 | + Field::new("file_size_bytes", DataType::UInt64, false), |
| 766 | + Field::new("e_tag", DataType::Utf8, true), |
| 767 | + Field::new("version", DataType::Utf8, true), |
| 768 | + ]); |
| 769 | + |
| 770 | + let metadata_field = |
| 771 | + Field::new("metadata", DataType::Struct(nested_fields.clone()), true); |
| 772 | + |
| 773 | + let schema = Arc::new(Schema::new(vec![ |
| 774 | + Field::new("path", DataType::Utf8, false), |
| 775 | + Field::new("metadata_size_bytes", DataType::UInt64, false), |
| 776 | + Field::new( |
| 777 | + "expires_in", |
| 778 | + DataType::Duration(TimeUnit::Millisecond), |
| 779 | + true, |
| 780 | + ), |
| 781 | + Field::new( |
| 782 | + "metadata_list", |
| 783 | + DataType::List(Arc::new(metadata_field.clone())), |
| 784 | + true, |
| 785 | + ), |
| 786 | + ])); |
| 787 | + |
| 788 | + let mut path_arr = vec![]; |
| 789 | + let mut metadata_size_bytes_arr = vec![]; |
| 790 | + let mut expires_arr = vec![]; |
| 791 | + |
| 792 | + let mut file_path_arr = vec![]; |
| 793 | + let mut file_modified_arr = vec![]; |
| 794 | + let mut file_size_bytes_arr = vec![]; |
| 795 | + let mut etag_arr = vec![]; |
| 796 | + let mut version_arr = vec![]; |
| 797 | + let mut offsets: Vec<i32> = vec![0]; |
| 798 | + |
| 799 | + if let Some(list_files_cache) = self.cache_manager.get_list_files_cache() { |
| 800 | + let now = Instant::now(); |
| 801 | + let mut current_offset: i32 = 0; |
| 802 | + |
| 803 | + for (path, entry) in list_files_cache.list_entries() { |
| 804 | + path_arr.push(path.to_string()); |
| 805 | + metadata_size_bytes_arr.push(entry.size_bytes as u64); |
| 806 | + expires_arr.push( |
| 807 | + entry |
| 808 | + .expires |
| 809 | + .map(|t| t.duration_since(now).as_millis() as i64), |
| 810 | + ); |
| 811 | + |
| 812 | + for meta in entry.metas.iter() { |
| 813 | + file_path_arr.push(meta.location.to_string()); |
| 814 | + file_modified_arr.push(meta.last_modified.timestamp_millis()); |
| 815 | + file_size_bytes_arr.push(meta.size); |
| 816 | + etag_arr.push(meta.e_tag.clone()); |
| 817 | + version_arr.push(meta.version.clone()); |
| 818 | + } |
| 819 | + current_offset += entry.metas.len() as i32; |
| 820 | + offsets.push(current_offset); |
| 821 | + } |
| 822 | + } |
| 823 | + |
| 824 | + let struct_arr = StructArray::new( |
| 825 | + nested_fields, |
| 826 | + vec![ |
| 827 | + Arc::new(StringArray::from(file_path_arr)), |
| 828 | + Arc::new(TimestampMillisecondArray::from(file_modified_arr)), |
| 829 | + Arc::new(UInt64Array::from(file_size_bytes_arr)), |
| 830 | + Arc::new(StringArray::from(etag_arr)), |
| 831 | + Arc::new(StringArray::from(version_arr)), |
| 832 | + ], |
| 833 | + None, |
| 834 | + ); |
| 835 | + |
| 836 | + let offsets_buffer: OffsetBuffer<i32> = |
| 837 | + OffsetBuffer::new(ScalarBuffer::from(Buffer::from_vec(offsets))); |
| 838 | + |
| 839 | + let batch = RecordBatch::try_new( |
| 840 | + schema.clone(), |
| 841 | + vec![ |
| 842 | + Arc::new(StringArray::from(path_arr)), |
| 843 | + Arc::new(UInt64Array::from(metadata_size_bytes_arr)), |
| 844 | + Arc::new(DurationMillisecondArray::from(expires_arr)), |
| 845 | + Arc::new(GenericListArray::new( |
| 846 | + Arc::new(metadata_field), |
| 847 | + offsets_buffer, |
| 848 | + Arc::new(struct_arr), |
| 849 | + None, |
| 850 | + )), |
| 851 | + ], |
| 852 | + )?; |
| 853 | + |
| 854 | + let list_files_cache = ListFilesCacheTable { schema, batch }; |
| 855 | + Ok(Arc::new(list_files_cache)) |
| 856 | + } |
| 857 | +} |
0 commit comments