|
| 1 | +// Copyright 2023 Datafuse Labs. |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +use common_expression::types::string::StringColumnBuilder; |
| 16 | +use common_expression::types::DataType; |
| 17 | +use common_expression::types::NumberDataType; |
| 18 | +use common_expression::types::UInt64Type; |
| 19 | +use common_expression::BlockEntry; |
| 20 | +use common_expression::ColumnId; |
| 21 | +use common_expression::FromData; |
| 22 | +use common_expression::Scalar; |
| 23 | +use common_expression::TableDataType; |
| 24 | +use common_expression::Value; |
| 25 | + |
| 26 | +// Segment and Block id Bits when generate internal column `_row_id` |
| 27 | +// Since `DEFAULT_BLOCK_PER_SEGMENT` is 1000, so `block_id` 10 bits is enough. |
| 28 | +const NUM_BLOCK_ID_BITS: usize = 10; |
| 29 | +const NUM_SEGMENT_ID_BITS: usize = 22; |
| 30 | + |
| 31 | +pub const ROW_ID: &str = "_row_id"; |
| 32 | +pub const SNAPSHOT_NAME: &str = "_snapshot_name"; |
| 33 | +pub const SEGMENT_NAME: &str = "_segment_name"; |
| 34 | +pub const BLOCK_NAME: &str = "_block_name"; |
| 35 | + |
| 36 | +// meta data for generate internal columns |
| 37 | +#[derive(Debug)] |
| 38 | +pub struct InternalColumnMeta { |
| 39 | + pub segment_id: usize, |
| 40 | + pub block_id: usize, |
| 41 | + pub block_location: String, |
| 42 | + pub segment_location: String, |
| 43 | + pub snapshot_location: String, |
| 44 | +} |
| 45 | + |
| 46 | +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq)] |
| 47 | +pub enum InternalColumnType { |
| 48 | + RowId, |
| 49 | + BlockName, |
| 50 | + SegmentName, |
| 51 | + SnapshotName, |
| 52 | +} |
| 53 | + |
| 54 | +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq)] |
| 55 | +pub struct InternalColumn { |
| 56 | + pub column_name: String, |
| 57 | + pub column_type: InternalColumnType, |
| 58 | +} |
| 59 | + |
| 60 | +impl InternalColumn { |
| 61 | + pub fn new(name: &str, column_type: InternalColumnType) -> Self { |
| 62 | + InternalColumn { |
| 63 | + column_name: name.to_string(), |
| 64 | + column_type, |
| 65 | + } |
| 66 | + } |
| 67 | + |
| 68 | + pub fn column_type(&self) -> &InternalColumnType { |
| 69 | + &self.column_type |
| 70 | + } |
| 71 | + |
| 72 | + pub fn table_data_type(&self) -> TableDataType { |
| 73 | + match &self.column_type { |
| 74 | + InternalColumnType::RowId => TableDataType::Number(NumberDataType::UInt64), |
| 75 | + InternalColumnType::BlockName => TableDataType::String, |
| 76 | + InternalColumnType::SegmentName => TableDataType::String, |
| 77 | + InternalColumnType::SnapshotName => TableDataType::String, |
| 78 | + } |
| 79 | + } |
| 80 | + |
| 81 | + pub fn data_type(&self) -> DataType { |
| 82 | + let t = &self.table_data_type(); |
| 83 | + t.into() |
| 84 | + } |
| 85 | + |
| 86 | + pub fn column_name(&self) -> &String { |
| 87 | + &self.column_name |
| 88 | + } |
| 89 | + |
| 90 | + pub fn column_id(&self) -> ColumnId { |
| 91 | + match &self.column_type { |
| 92 | + InternalColumnType::RowId => u32::MAX, |
| 93 | + InternalColumnType::BlockName => u32::MAX - 1, |
| 94 | + InternalColumnType::SegmentName => u32::MAX - 2, |
| 95 | + InternalColumnType::SnapshotName => u32::MAX - 3, |
| 96 | + } |
| 97 | + } |
| 98 | + |
| 99 | + pub fn generate_column_values(&self, meta: &InternalColumnMeta, num_rows: usize) -> BlockEntry { |
| 100 | + match &self.column_type { |
| 101 | + InternalColumnType::RowId => { |
| 102 | + let block_id = meta.block_id as u64; |
| 103 | + let seg_id = meta.segment_id as u64; |
| 104 | + let high_32bit = (seg_id << NUM_SEGMENT_ID_BITS) + (block_id << NUM_BLOCK_ID_BITS); |
| 105 | + let mut row_ids = Vec::with_capacity(num_rows); |
| 106 | + for i in 0..num_rows { |
| 107 | + let row_id = high_32bit + i as u64; |
| 108 | + row_ids.push(row_id); |
| 109 | + } |
| 110 | + BlockEntry { |
| 111 | + data_type: DataType::Number(NumberDataType::UInt64), |
| 112 | + value: Value::Column(UInt64Type::from_data(row_ids)), |
| 113 | + } |
| 114 | + } |
| 115 | + InternalColumnType::BlockName => { |
| 116 | + let mut builder = StringColumnBuilder::with_capacity(1, meta.block_location.len()); |
| 117 | + builder.put_str(&meta.block_location); |
| 118 | + builder.commit_row(); |
| 119 | + BlockEntry { |
| 120 | + data_type: DataType::String, |
| 121 | + value: Value::Scalar(Scalar::String(builder.build_scalar())), |
| 122 | + } |
| 123 | + } |
| 124 | + InternalColumnType::SegmentName => { |
| 125 | + let mut builder = |
| 126 | + StringColumnBuilder::with_capacity(1, meta.segment_location.len()); |
| 127 | + builder.put_str(&meta.segment_location); |
| 128 | + builder.commit_row(); |
| 129 | + BlockEntry { |
| 130 | + data_type: DataType::String, |
| 131 | + value: Value::Scalar(Scalar::String(builder.build_scalar())), |
| 132 | + } |
| 133 | + } |
| 134 | + InternalColumnType::SnapshotName => { |
| 135 | + let mut builder = |
| 136 | + StringColumnBuilder::with_capacity(1, meta.snapshot_location.len()); |
| 137 | + builder.put_str(&meta.snapshot_location); |
| 138 | + builder.commit_row(); |
| 139 | + BlockEntry { |
| 140 | + data_type: DataType::String, |
| 141 | + value: Value::Scalar(Scalar::String(builder.build_scalar())), |
| 142 | + } |
| 143 | + } |
| 144 | + } |
| 145 | + } |
| 146 | +} |
0 commit comments