|
12 | 12 | // See the License for the specific language governing permissions and |
13 | 13 | // limitations under the License. |
14 | 14 |
|
15 | | -use std::borrow::Cow; |
16 | 15 | use std::collections::BTreeMap; |
17 | 16 | use std::collections::BTreeSet; |
18 | 17 | use std::collections::HashMap; |
19 | 18 | use std::collections::HashSet; |
20 | 19 | use std::sync::Arc; |
21 | 20 |
|
22 | | -use common_arrow::arrow::bitmap::Bitmap; |
23 | 21 | use common_arrow::arrow::datatypes::DataType as ArrowDataType; |
24 | 22 | use common_arrow::arrow::datatypes::Field as ArrowField; |
25 | 23 | use common_arrow::arrow::datatypes::Schema as ArrowSchema; |
26 | 24 | use common_arrow::arrow::datatypes::TimeUnit; |
27 | 25 | use common_exception::ErrorCode; |
28 | 26 | use common_exception::Result; |
29 | | -use ethnum::i256; |
30 | 27 | use itertools::Itertools; |
31 | | -use jsonb::Number as JsonbNumber; |
32 | | -use jsonb::Object as JsonbObject; |
33 | | -use jsonb::Value as JsonbValue; |
34 | | -use rand::distributions::Alphanumeric; |
35 | | -use rand::distributions::DistString; |
36 | | -use rand::rngs::SmallRng; |
37 | | -use rand::Rng; |
38 | | -use rand::SeedableRng; |
39 | 28 | use serde::Deserialize; |
40 | 29 | use serde::Serialize; |
41 | 30 |
|
42 | | -use crate::types::array::ArrayColumn; |
43 | | -use crate::types::date::DATE_MAX; |
44 | | -use crate::types::date::DATE_MIN; |
45 | | -use crate::types::decimal::DecimalColumn; |
46 | 31 | use crate::types::decimal::DecimalDataType; |
47 | 32 | use crate::types::decimal::DecimalSize; |
48 | | -use crate::types::nullable::NullableColumn; |
49 | | -use crate::types::timestamp::TIMESTAMP_MAX; |
50 | | -use crate::types::timestamp::TIMESTAMP_MIN; |
51 | | -use crate::types::BooleanType; |
52 | 33 | use crate::types::DataType; |
53 | | -use crate::types::DateType; |
54 | 34 | use crate::types::NumberDataType; |
55 | | -use crate::types::NumberType; |
56 | | -use crate::types::StringType; |
57 | | -use crate::types::TimestampType; |
58 | | -use crate::types::VariantType; |
59 | | -use crate::with_number_mapped_type; |
60 | 35 | use crate::with_number_type; |
61 | | -use crate::BlockEntry; |
62 | | -use crate::Column; |
63 | | -use crate::FromData; |
64 | 36 | use crate::Scalar; |
65 | | -use crate::Value; |
66 | 37 | use crate::ARROW_EXT_TYPE_EMPTY_ARRAY; |
67 | 38 | use crate::ARROW_EXT_TYPE_EMPTY_MAP; |
68 | 39 | use crate::ARROW_EXT_TYPE_VARIANT; |
@@ -1025,187 +996,6 @@ impl TableDataType { |
1025 | 996 | _ => self.to_string().to_uppercase(), |
1026 | 997 | } |
1027 | 998 | } |
1028 | | - |
1029 | | - pub fn create_random_column(&self, len: usize) -> BlockEntry { |
1030 | | - match self { |
1031 | | - TableDataType::Null => BlockEntry { |
1032 | | - data_type: DataType::Null, |
1033 | | - value: Value::Column(Column::Null { len }), |
1034 | | - }, |
1035 | | - TableDataType::EmptyArray => BlockEntry { |
1036 | | - data_type: DataType::EmptyArray, |
1037 | | - value: Value::Column(Column::EmptyArray { len }), |
1038 | | - }, |
1039 | | - TableDataType::EmptyMap => BlockEntry { |
1040 | | - data_type: DataType::EmptyMap, |
1041 | | - value: Value::Column(Column::EmptyMap { len }), |
1042 | | - }, |
1043 | | - TableDataType::Boolean => BlockEntry { |
1044 | | - data_type: DataType::Boolean, |
1045 | | - value: Value::Column(BooleanType::from_data( |
1046 | | - (0..len).map(|_| SmallRng::from_entropy().gen_bool(0.5)), |
1047 | | - )), |
1048 | | - }, |
1049 | | - TableDataType::String => BlockEntry { |
1050 | | - data_type: DataType::String, |
1051 | | - value: Value::Column(StringType::from_data((0..len).map(|_| { |
1052 | | - let rng = SmallRng::from_entropy(); |
1053 | | - rng.sample_iter(&Alphanumeric) |
1054 | | - // randomly generate 5 characters. |
1055 | | - .take(5) |
1056 | | - .map(u8::from) |
1057 | | - .collect::<Vec<_>>() |
1058 | | - }))), |
1059 | | - }, |
1060 | | - TableDataType::Number(num_ty) => BlockEntry { |
1061 | | - data_type: DataType::Number(*num_ty), |
1062 | | - value: Value::Column(with_number_mapped_type!(|NUM_TYPE| match num_ty { |
1063 | | - NumberDataType::NUM_TYPE => NumberType::<NUM_TYPE>::from_data( |
1064 | | - (0..len).map(|_| SmallRng::from_entropy().gen()) |
1065 | | - ), |
1066 | | - })), |
1067 | | - }, |
1068 | | - // useless for now. |
1069 | | - TableDataType::Decimal(t) => match t { |
1070 | | - DecimalDataType::Decimal128(x) => BlockEntry { |
1071 | | - data_type: DataType::Decimal(*t), |
1072 | | - value: Value::Column(Column::Decimal(DecimalColumn::Decimal128( |
1073 | | - vec![0i128; len].into(), |
1074 | | - *x, |
1075 | | - ))), |
1076 | | - }, |
1077 | | - DecimalDataType::Decimal256(x) => BlockEntry { |
1078 | | - data_type: DataType::Decimal(*t), |
1079 | | - value: Value::Column(Column::Decimal(DecimalColumn::Decimal256( |
1080 | | - vec![i256::ZERO; len].into(), |
1081 | | - *x, |
1082 | | - ))), |
1083 | | - }, |
1084 | | - }, |
1085 | | - TableDataType::Timestamp => BlockEntry { |
1086 | | - data_type: DataType::Timestamp, |
1087 | | - value: Value::Column(TimestampType::from_data( |
1088 | | - (0..len) |
1089 | | - .map(|_| SmallRng::from_entropy().gen_range(TIMESTAMP_MIN..=TIMESTAMP_MAX)) |
1090 | | - .collect::<Vec<i64>>(), |
1091 | | - )), |
1092 | | - }, |
1093 | | - TableDataType::Date => BlockEntry { |
1094 | | - data_type: DataType::Date, |
1095 | | - value: Value::Column(DateType::from_data( |
1096 | | - (0..len) |
1097 | | - .map(|_| SmallRng::from_entropy().gen_range(DATE_MIN..=DATE_MAX)) |
1098 | | - .collect::<Vec<i32>>(), |
1099 | | - )), |
1100 | | - }, |
1101 | | - TableDataType::Nullable(inner_ty) => { |
1102 | | - let entry = inner_ty.create_random_column(len); |
1103 | | - BlockEntry { |
1104 | | - data_type: DataType::Nullable(Box::new(entry.data_type)), |
1105 | | - value: Value::Column(Column::Nullable(Box::new(NullableColumn { |
1106 | | - column: entry.value.into_column().unwrap(), |
1107 | | - validity: Bitmap::from( |
1108 | | - (0..len) |
1109 | | - .map(|_| SmallRng::from_entropy().gen_bool(0.5)) |
1110 | | - .collect::<Vec<bool>>(), |
1111 | | - ), |
1112 | | - }))), |
1113 | | - } |
1114 | | - } |
1115 | | - TableDataType::Array(inner_ty) => { |
1116 | | - let mut inner_len = 0; |
1117 | | - let mut offsets: Vec<u64> = Vec::with_capacity(len + 1); |
1118 | | - offsets.push(inner_len); |
1119 | | - for _ in 0..len { |
1120 | | - inner_len += SmallRng::from_entropy().gen_range(0..=3); |
1121 | | - offsets.push(inner_len); |
1122 | | - } |
1123 | | - let entry = inner_ty.create_random_column(inner_len as usize); |
1124 | | - BlockEntry { |
1125 | | - data_type: DataType::Array(Box::new(entry.data_type)), |
1126 | | - value: Value::Column(Column::Array(Box::new(ArrayColumn { |
1127 | | - values: entry.value.into_column().unwrap(), |
1128 | | - offsets: offsets.into(), |
1129 | | - }))), |
1130 | | - } |
1131 | | - } |
1132 | | - TableDataType::Map(inner_ty) => { |
1133 | | - let mut inner_len = 0; |
1134 | | - let mut offsets: Vec<u64> = Vec::with_capacity(len + 1); |
1135 | | - offsets.push(inner_len); |
1136 | | - for _ in 0..len { |
1137 | | - inner_len += SmallRng::from_entropy().gen_range(0..=3); |
1138 | | - offsets.push(inner_len); |
1139 | | - } |
1140 | | - let entry = inner_ty.create_random_column(inner_len as usize); |
1141 | | - BlockEntry { |
1142 | | - data_type: DataType::Map(Box::new(entry.data_type)), |
1143 | | - value: Value::Column(Column::Map(Box::new(ArrayColumn { |
1144 | | - values: entry.value.into_column().unwrap(), |
1145 | | - offsets: offsets.into(), |
1146 | | - }))), |
1147 | | - } |
1148 | | - } |
1149 | | - TableDataType::Tuple { fields_type, .. } => { |
1150 | | - let mut fields = Vec::with_capacity(len); |
1151 | | - let mut types = Vec::with_capacity(len); |
1152 | | - for field_type in fields_type.iter() { |
1153 | | - let entry = field_type.create_random_column(len); |
1154 | | - fields.push(entry.value.into_column().unwrap()); |
1155 | | - types.push(entry.data_type); |
1156 | | - } |
1157 | | - BlockEntry { |
1158 | | - data_type: DataType::Tuple(types), |
1159 | | - value: Value::Column(Column::Tuple(fields)), |
1160 | | - } |
1161 | | - } |
1162 | | - TableDataType::Variant => { |
1163 | | - let mut data = Vec::with_capacity(len); |
1164 | | - for _ in 0..len { |
1165 | | - let opt = SmallRng::from_entropy().gen_range(0..=6); |
1166 | | - let val = match opt { |
1167 | | - 0 => JsonbValue::Null, |
1168 | | - 1 => JsonbValue::Bool(true), |
1169 | | - 2 => JsonbValue::Bool(false), |
1170 | | - 3 => { |
1171 | | - let s = Alphanumeric.sample_string(&mut rand::thread_rng(), 5); |
1172 | | - JsonbValue::String(Cow::from(s)) |
1173 | | - } |
1174 | | - 4 => { |
1175 | | - let num = SmallRng::from_entropy().gen_range(i64::MIN..=i64::MAX); |
1176 | | - JsonbValue::Number(JsonbNumber::Int64(num)) |
1177 | | - } |
1178 | | - 5 => { |
1179 | | - let arr_len = SmallRng::from_entropy().gen_range(0..=5); |
1180 | | - let mut values = Vec::with_capacity(arr_len); |
1181 | | - for _ in 0..arr_len { |
1182 | | - let num = SmallRng::from_entropy().gen_range(i64::MIN..=i64::MAX); |
1183 | | - values.push(JsonbValue::Number(JsonbNumber::Int64(num))) |
1184 | | - } |
1185 | | - JsonbValue::Array(values) |
1186 | | - } |
1187 | | - 6 => { |
1188 | | - let obj_len = SmallRng::from_entropy().gen_range(0..=5); |
1189 | | - let mut obj = JsonbObject::new(); |
1190 | | - for _ in 0..obj_len { |
1191 | | - let k = Alphanumeric.sample_string(&mut rand::thread_rng(), 5); |
1192 | | - let num = SmallRng::from_entropy().gen_range(i64::MIN..=i64::MAX); |
1193 | | - let v = JsonbValue::Number(JsonbNumber::Int64(num)); |
1194 | | - obj.insert(k, v); |
1195 | | - } |
1196 | | - JsonbValue::Object(obj) |
1197 | | - } |
1198 | | - _ => JsonbValue::Null, |
1199 | | - }; |
1200 | | - data.push(val.to_vec()); |
1201 | | - } |
1202 | | - BlockEntry { |
1203 | | - data_type: DataType::Variant, |
1204 | | - value: Value::Column(VariantType::from_data(data)), |
1205 | | - } |
1206 | | - } |
1207 | | - } |
1208 | | - } |
1209 | 999 | } |
1210 | 1000 |
|
1211 | 1001 | pub type DataSchemaRef = Arc<DataSchema>; |
|
0 commit comments