Skip to content

Commit 7c3018e

Browse files
authored
Merge pull request #10605 from andylokandy/chore
chore(expr): move random_column() from Schema to Column
2 parents 7cf708f + 0e58ade commit 7c3018e

File tree

3 files changed

+164
-213
lines changed

3 files changed

+164
-213
lines changed

src/query/expression/src/schema.rs

Lines changed: 0 additions & 210 deletions
Original file line numberDiff line numberDiff line change
@@ -12,57 +12,28 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
use std::borrow::Cow;
1615
use std::collections::BTreeMap;
1716
use std::collections::BTreeSet;
1817
use std::collections::HashMap;
1918
use std::collections::HashSet;
2019
use std::sync::Arc;
2120

22-
use common_arrow::arrow::bitmap::Bitmap;
2321
use common_arrow::arrow::datatypes::DataType as ArrowDataType;
2422
use common_arrow::arrow::datatypes::Field as ArrowField;
2523
use common_arrow::arrow::datatypes::Schema as ArrowSchema;
2624
use common_arrow::arrow::datatypes::TimeUnit;
2725
use common_exception::ErrorCode;
2826
use common_exception::Result;
29-
use ethnum::i256;
3027
use itertools::Itertools;
31-
use jsonb::Number as JsonbNumber;
32-
use jsonb::Object as JsonbObject;
33-
use jsonb::Value as JsonbValue;
34-
use rand::distributions::Alphanumeric;
35-
use rand::distributions::DistString;
36-
use rand::rngs::SmallRng;
37-
use rand::Rng;
38-
use rand::SeedableRng;
3928
use serde::Deserialize;
4029
use serde::Serialize;
4130

42-
use crate::types::array::ArrayColumn;
43-
use crate::types::date::DATE_MAX;
44-
use crate::types::date::DATE_MIN;
45-
use crate::types::decimal::DecimalColumn;
4631
use crate::types::decimal::DecimalDataType;
4732
use crate::types::decimal::DecimalSize;
48-
use crate::types::nullable::NullableColumn;
49-
use crate::types::timestamp::TIMESTAMP_MAX;
50-
use crate::types::timestamp::TIMESTAMP_MIN;
51-
use crate::types::BooleanType;
5233
use crate::types::DataType;
53-
use crate::types::DateType;
5434
use crate::types::NumberDataType;
55-
use crate::types::NumberType;
56-
use crate::types::StringType;
57-
use crate::types::TimestampType;
58-
use crate::types::VariantType;
59-
use crate::with_number_mapped_type;
6035
use crate::with_number_type;
61-
use crate::BlockEntry;
62-
use crate::Column;
63-
use crate::FromData;
6436
use crate::Scalar;
65-
use crate::Value;
6637
use crate::ARROW_EXT_TYPE_EMPTY_ARRAY;
6738
use crate::ARROW_EXT_TYPE_EMPTY_MAP;
6839
use crate::ARROW_EXT_TYPE_VARIANT;
@@ -1025,187 +996,6 @@ impl TableDataType {
1025996
_ => self.to_string().to_uppercase(),
1026997
}
1027998
}
1028-
1029-
pub fn create_random_column(&self, len: usize) -> BlockEntry {
1030-
match self {
1031-
TableDataType::Null => BlockEntry {
1032-
data_type: DataType::Null,
1033-
value: Value::Column(Column::Null { len }),
1034-
},
1035-
TableDataType::EmptyArray => BlockEntry {
1036-
data_type: DataType::EmptyArray,
1037-
value: Value::Column(Column::EmptyArray { len }),
1038-
},
1039-
TableDataType::EmptyMap => BlockEntry {
1040-
data_type: DataType::EmptyMap,
1041-
value: Value::Column(Column::EmptyMap { len }),
1042-
},
1043-
TableDataType::Boolean => BlockEntry {
1044-
data_type: DataType::Boolean,
1045-
value: Value::Column(BooleanType::from_data(
1046-
(0..len).map(|_| SmallRng::from_entropy().gen_bool(0.5)),
1047-
)),
1048-
},
1049-
TableDataType::String => BlockEntry {
1050-
data_type: DataType::String,
1051-
value: Value::Column(StringType::from_data((0..len).map(|_| {
1052-
let rng = SmallRng::from_entropy();
1053-
rng.sample_iter(&Alphanumeric)
1054-
// randomly generate 5 characters.
1055-
.take(5)
1056-
.map(u8::from)
1057-
.collect::<Vec<_>>()
1058-
}))),
1059-
},
1060-
TableDataType::Number(num_ty) => BlockEntry {
1061-
data_type: DataType::Number(*num_ty),
1062-
value: Value::Column(with_number_mapped_type!(|NUM_TYPE| match num_ty {
1063-
NumberDataType::NUM_TYPE => NumberType::<NUM_TYPE>::from_data(
1064-
(0..len).map(|_| SmallRng::from_entropy().gen())
1065-
),
1066-
})),
1067-
},
1068-
// useless for now.
1069-
TableDataType::Decimal(t) => match t {
1070-
DecimalDataType::Decimal128(x) => BlockEntry {
1071-
data_type: DataType::Decimal(*t),
1072-
value: Value::Column(Column::Decimal(DecimalColumn::Decimal128(
1073-
vec![0i128; len].into(),
1074-
*x,
1075-
))),
1076-
},
1077-
DecimalDataType::Decimal256(x) => BlockEntry {
1078-
data_type: DataType::Decimal(*t),
1079-
value: Value::Column(Column::Decimal(DecimalColumn::Decimal256(
1080-
vec![i256::ZERO; len].into(),
1081-
*x,
1082-
))),
1083-
},
1084-
},
1085-
TableDataType::Timestamp => BlockEntry {
1086-
data_type: DataType::Timestamp,
1087-
value: Value::Column(TimestampType::from_data(
1088-
(0..len)
1089-
.map(|_| SmallRng::from_entropy().gen_range(TIMESTAMP_MIN..=TIMESTAMP_MAX))
1090-
.collect::<Vec<i64>>(),
1091-
)),
1092-
},
1093-
TableDataType::Date => BlockEntry {
1094-
data_type: DataType::Date,
1095-
value: Value::Column(DateType::from_data(
1096-
(0..len)
1097-
.map(|_| SmallRng::from_entropy().gen_range(DATE_MIN..=DATE_MAX))
1098-
.collect::<Vec<i32>>(),
1099-
)),
1100-
},
1101-
TableDataType::Nullable(inner_ty) => {
1102-
let entry = inner_ty.create_random_column(len);
1103-
BlockEntry {
1104-
data_type: DataType::Nullable(Box::new(entry.data_type)),
1105-
value: Value::Column(Column::Nullable(Box::new(NullableColumn {
1106-
column: entry.value.into_column().unwrap(),
1107-
validity: Bitmap::from(
1108-
(0..len)
1109-
.map(|_| SmallRng::from_entropy().gen_bool(0.5))
1110-
.collect::<Vec<bool>>(),
1111-
),
1112-
}))),
1113-
}
1114-
}
1115-
TableDataType::Array(inner_ty) => {
1116-
let mut inner_len = 0;
1117-
let mut offsets: Vec<u64> = Vec::with_capacity(len + 1);
1118-
offsets.push(inner_len);
1119-
for _ in 0..len {
1120-
inner_len += SmallRng::from_entropy().gen_range(0..=3);
1121-
offsets.push(inner_len);
1122-
}
1123-
let entry = inner_ty.create_random_column(inner_len as usize);
1124-
BlockEntry {
1125-
data_type: DataType::Array(Box::new(entry.data_type)),
1126-
value: Value::Column(Column::Array(Box::new(ArrayColumn {
1127-
values: entry.value.into_column().unwrap(),
1128-
offsets: offsets.into(),
1129-
}))),
1130-
}
1131-
}
1132-
TableDataType::Map(inner_ty) => {
1133-
let mut inner_len = 0;
1134-
let mut offsets: Vec<u64> = Vec::with_capacity(len + 1);
1135-
offsets.push(inner_len);
1136-
for _ in 0..len {
1137-
inner_len += SmallRng::from_entropy().gen_range(0..=3);
1138-
offsets.push(inner_len);
1139-
}
1140-
let entry = inner_ty.create_random_column(inner_len as usize);
1141-
BlockEntry {
1142-
data_type: DataType::Map(Box::new(entry.data_type)),
1143-
value: Value::Column(Column::Map(Box::new(ArrayColumn {
1144-
values: entry.value.into_column().unwrap(),
1145-
offsets: offsets.into(),
1146-
}))),
1147-
}
1148-
}
1149-
TableDataType::Tuple { fields_type, .. } => {
1150-
let mut fields = Vec::with_capacity(len);
1151-
let mut types = Vec::with_capacity(len);
1152-
for field_type in fields_type.iter() {
1153-
let entry = field_type.create_random_column(len);
1154-
fields.push(entry.value.into_column().unwrap());
1155-
types.push(entry.data_type);
1156-
}
1157-
BlockEntry {
1158-
data_type: DataType::Tuple(types),
1159-
value: Value::Column(Column::Tuple(fields)),
1160-
}
1161-
}
1162-
TableDataType::Variant => {
1163-
let mut data = Vec::with_capacity(len);
1164-
for _ in 0..len {
1165-
let opt = SmallRng::from_entropy().gen_range(0..=6);
1166-
let val = match opt {
1167-
0 => JsonbValue::Null,
1168-
1 => JsonbValue::Bool(true),
1169-
2 => JsonbValue::Bool(false),
1170-
3 => {
1171-
let s = Alphanumeric.sample_string(&mut rand::thread_rng(), 5);
1172-
JsonbValue::String(Cow::from(s))
1173-
}
1174-
4 => {
1175-
let num = SmallRng::from_entropy().gen_range(i64::MIN..=i64::MAX);
1176-
JsonbValue::Number(JsonbNumber::Int64(num))
1177-
}
1178-
5 => {
1179-
let arr_len = SmallRng::from_entropy().gen_range(0..=5);
1180-
let mut values = Vec::with_capacity(arr_len);
1181-
for _ in 0..arr_len {
1182-
let num = SmallRng::from_entropy().gen_range(i64::MIN..=i64::MAX);
1183-
values.push(JsonbValue::Number(JsonbNumber::Int64(num)))
1184-
}
1185-
JsonbValue::Array(values)
1186-
}
1187-
6 => {
1188-
let obj_len = SmallRng::from_entropy().gen_range(0..=5);
1189-
let mut obj = JsonbObject::new();
1190-
for _ in 0..obj_len {
1191-
let k = Alphanumeric.sample_string(&mut rand::thread_rng(), 5);
1192-
let num = SmallRng::from_entropy().gen_range(i64::MIN..=i64::MAX);
1193-
let v = JsonbValue::Number(JsonbNumber::Int64(num));
1194-
obj.insert(k, v);
1195-
}
1196-
JsonbValue::Object(obj)
1197-
}
1198-
_ => JsonbValue::Null,
1199-
};
1200-
data.push(val.to_vec());
1201-
}
1202-
BlockEntry {
1203-
data_type: DataType::Variant,
1204-
value: Value::Column(VariantType::from_data(data)),
1205-
}
1206-
}
1207-
}
1208-
}
1209999
}
12101000

12111001
pub type DataSchemaRef = Arc<DataSchema>;

0 commit comments

Comments
 (0)