Skip to content

Commit 47b9eb2

Browse files
authored
Merge pull request ClickHouse#62005 from Avogar/better-string-to-variant
Better conversion from String to Variant
2 parents bdda4e3 + 56b5b5e commit 47b9eb2

16 files changed

+164
-50
lines changed

docs/en/sql-reference/data-types/variant.md

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -190,22 +190,67 @@ SELECT toTypeName(variantType(v)) FROM test LIMIT 1;
190190
└─────────────────────────────────────────────────────────────────────┘
191191
```
192192

193-
## Conversion between Variant column and other columns
193+
## Conversion between a Variant column and other columns
194194

195-
There are 3 possible conversions that can be performed with Variant column.
195+
There are 4 possible conversions that can be performed with a column of type `Variant`.
196196

197-
### Converting an ordinary column to a Variant column
197+
### Converting a String column to a Variant column
198198

199-
It is possible to convert ordinary column with type `T` to a `Variant` column containing this type:
199+
Conversion from `String` to `Variant` is performed by parsing a value of `Variant` type from the string value:
200200

201201
```sql
202-
SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant;
202+
SELECT '42'::Variant(String, UInt64) as variant, variantType(variant) as variant_type
203203
```
204204

205205
```text
206-
┌─type_name──────────────────────────────┬─variant───────┐
207-
│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │
208-
└────────────────────────────────────────┴───────────────┘
206+
┌─variant─┬─variant_type─┐
207+
│ 42 │ UInt64 │
208+
└─────────┴──────────────┘
209+
```
210+
211+
```sql
212+
SELECT '[1, 2, 3]'::Variant(String, Array(UInt64)) as variant, variantType(variant) as variant_type
213+
```
214+
215+
```text
216+
┌─variant─┬─variant_type──┐
217+
│ [1,2,3] │ Array(UInt64) │
218+
└─────────┴───────────────┘
219+
```
220+
221+
```sql
222+
SELECT CAST(map('key1', '42', 'key2', 'true', 'key3', '2020-01-01'), 'Map(String, Variant(UInt64, Bool, Date))') as map_of_variants, mapApply((k, v) -> (k, variantType(v)), map_of_variants) as map_of_variant_types```
223+
```
224+
225+
```text
226+
┌─map_of_variants─────────────────────────────┬─map_of_variant_types──────────────────────────┐
227+
│ {'key1':42,'key2':true,'key3':'2020-01-01'} │ {'key1':'UInt64','key2':'Bool','key3':'Date'} │
228+
└─────────────────────────────────────────────┴───────────────────────────────────────────────┘
229+
```
230+
231+
### Converting an ordinary column to a Variant column
232+
233+
It is possible to convert an ordinary column with type `T` to a `Variant` column containing this type:
234+
235+
```sql
236+
SELECT toTypeName(variant) as type_name, [1,2,3]::Array(UInt64)::Variant(UInt64, String, Array(UInt64)) as variant, variantType(variant) as variant_name
237+
```
238+
239+
```text
240+
┌─type_name──────────────────────────────┬─variant─┬─variant_name──┐
241+
│ Variant(Array(UInt64), String, UInt64) │ [1,2,3] │ Array(UInt64) │
242+
└────────────────────────────────────────┴─────────┴───────────────┘
243+
```
244+
245+
Note: converting from `String` type is always performed through parsing, if you need to convert `String` column to `String` variant of a `Variant` without parsing, you can do the following:
246+
```sql
247+
SELECT '[1, 2, 3]'::Variant(String)::Variant(String, Array(UInt64), UInt64) as variant, variantType(variant) as variant_type
248+
```
249+
250+
```sql
251+
┌─variant───┬─variant_type─┐
252+
│ [1, 2, 3] │ String │
253+
└───────────┴──────────────┘
209254
```
210255

211256
### Converting a Variant column to an ordinary column

src/DataTypes/Serializations/SerializationBool.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,12 +194,12 @@ ReturnType deserializeImpl(
194194
buf.dropCheckpoint();
195195
if (buf.hasUnreadData())
196196
{
197+
restore_column_if_needed();
197198
if constexpr (throw_exception)
198199
throw Exception(
199200
ErrorCodes::CANNOT_PARSE_BOOL,
200201
"Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
201202
"bool_true_representation or bool_false_representation contains some delimiters of input format");
202-
restore_column_if_needed();
203203
return ReturnType(false);
204204
}
205205
return ReturnType(true);

src/DataTypes/getLeastSupertype.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,9 @@ DataTypePtr getLeastSupertype(const DataTypes & types)
463463
/// nested_type will be nullptr, we should return nullptr in this case.
464464
if (!nested_type)
465465
return nullptr;
466+
/// Common type for Nullable(Nothing) and Variant(...) is Variant(...)
467+
if (isVariant(nested_type))
468+
return nested_type;
466469
return std::make_shared<DataTypeNullable>(nested_type);
467470
}
468471
}

src/Functions/CastOverloadResolver.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,11 @@ class CastOverloadResolverImpl : public IFunctionOverloadResolver
100100
validateDataType(type, data_type_validation_settings);
101101

102102
if (cast_type == CastType::accurateOrNull)
103-
return makeNullable(type);
103+
{
104+
/// Variant handles NULLs by itself during conversions.
105+
if (!isVariant(type))
106+
return makeNullable(type);
107+
}
104108

105109
if (internal)
106110
return type;

src/Functions/FunctionsConversion.cpp

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ namespace ErrorCodes
9090
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
9191
extern const int NOT_IMPLEMENTED;
9292
extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN;
93-
extern const int CANNOT_PARSE_BOOL;
9493
extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;
9594
}
9695

@@ -1816,6 +1815,7 @@ struct ConvertImpl
18161815

18171816

18181817
/// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization.
1818+
template <bool throw_on_error>
18191819
struct ConvertImplGenericFromString
18201820
{
18211821
static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count)
@@ -1855,29 +1855,34 @@ struct ConvertImplGenericFromString
18551855
{
18561856
serialization_from.deserializeWholeText(column_to, read_buffer, format_settings);
18571857
}
1858-
catch (const Exception & e)
1858+
catch (const Exception &)
18591859
{
1860-
auto * nullable_column = typeid_cast<ColumnNullable *>(&column_to);
1861-
if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column)
1862-
{
1863-
auto & col_nullmap = nullable_column->getNullMapData();
1864-
if (col_nullmap.size() != nullable_column->size())
1865-
col_nullmap.resize_fill(nullable_column->size());
1866-
if (nullable_column->size() == (i + 1))
1867-
nullable_column->popBack(1);
1868-
nullable_column->insertDefault();
1869-
continue;
1870-
}
1871-
throw;
1860+
if constexpr (throw_on_error)
1861+
throw;
1862+
/// Check if exception happened after we inserted the value
1863+
/// (deserializeWholeText should not do it, but let's check anyway).
1864+
if (column_to.size() > i)
1865+
column_to.popBack(column_to.size() - i);
1866+
column_to.insertDefault();
18721867
}
18731868

1869+
/// Usually deserializeWholeText checks for eof after parsing, but let's check one more time just in case.
18741870
if (!read_buffer.eof())
18751871
{
1876-
if (result_type)
1877-
throwExceptionForIncompletelyParsedValue(read_buffer, *result_type);
1872+
if constexpr (throw_on_error)
1873+
{
1874+
if (result_type)
1875+
throwExceptionForIncompletelyParsedValue(read_buffer, *result_type);
1876+
else
1877+
throw Exception(
1878+
ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse string to column {}. Expected eof", column_to.getName());
1879+
}
18781880
else
1879-
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT,
1880-
"Cannot parse string to column {}. Expected eof", column_to.getName());
1881+
{
1882+
if (column_to.size() > i)
1883+
column_to.popBack(column_to.size() - i);
1884+
column_to.insertDefault();
1885+
}
18811886
}
18821887
}
18831888
}
@@ -3280,7 +3285,9 @@ class FunctionCast final : public IFunctionBase
32803285
{
32813286
if (checkAndGetDataType<DataTypeString>(from_type.get()))
32823287
{
3283-
return &ConvertImplGenericFromString::execute;
3288+
if (cast_type == CastType::accurateOrNull)
3289+
return &ConvertImplGenericFromString<false>::execute;
3290+
return &ConvertImplGenericFromString<true>::execute;
32843291
}
32853292

32863293
return createWrapper<ToDataType>(from_type, to_type, requested_result_is_nullable);
@@ -3443,7 +3450,7 @@ class FunctionCast final : public IFunctionBase
34433450
/// Conversion from String through parsing.
34443451
if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
34453452
{
3446-
return &ConvertImplGenericFromString::execute;
3453+
return &ConvertImplGenericFromString<true>::execute;
34473454
}
34483455
else if (const auto * agg_type = checkAndGetDataType<DataTypeAggregateFunction>(from_type_untyped.get()))
34493456
{
@@ -3486,7 +3493,7 @@ class FunctionCast final : public IFunctionBase
34863493
/// Conversion from String through parsing.
34873494
if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
34883495
{
3489-
return &ConvertImplGenericFromString::execute;
3496+
return &ConvertImplGenericFromString<true>::execute;
34903497
}
34913498

34923499
DataTypePtr from_type_holder;
@@ -3577,7 +3584,7 @@ class FunctionCast final : public IFunctionBase
35773584
/// Conversion from String through parsing.
35783585
if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
35793586
{
3580-
return &ConvertImplGenericFromString::execute;
3587+
return &ConvertImplGenericFromString<true>::execute;
35813588
}
35823589

35833590
const auto * from_type = checkAndGetDataType<DataTypeTuple>(from_type_untyped.get());
@@ -3922,7 +3929,7 @@ class FunctionCast final : public IFunctionBase
39223929
{
39233930
return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count)
39243931
{
3925-
auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable();
3932+
auto res = ConvertImplGenericFromString<true>::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable();
39263933
res->finalize();
39273934
return res;
39283935
};
@@ -4077,6 +4084,29 @@ class FunctionCast final : public IFunctionBase
40774084
return ColumnVariant::create(discriminators, variants);
40784085
}
40794086

4087+
WrapperType createStringToVariantWrapper() const
4088+
{
4089+
return [&](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
4090+
{
4091+
auto column = arguments[0].column->convertToFullColumnIfLowCardinality();
4092+
auto args = arguments;
4093+
args[0].column = column;
4094+
4095+
const ColumnNullable * column_nullable = nullptr;
4096+
if (isColumnNullable(*args[0].column))
4097+
{
4098+
column_nullable = assert_cast<const ColumnNullable *>(args[0].column.get());
4099+
args[0].column = column_nullable->getNestedColumnPtr();
4100+
}
4101+
4102+
args[0].type = removeNullable(removeLowCardinality(args[0].type));
4103+
4104+
if (cast_type == CastType::accurateOrNull)
4105+
return ConvertImplGenericFromString<false>::execute(args, result_type, column_nullable, input_rows_count);
4106+
return ConvertImplGenericFromString<true>::execute(args, result_type, column_nullable, input_rows_count);
4107+
};
4108+
}
4109+
40804110
WrapperType createColumnToVariantWrapper(const DataTypePtr & from_type, const DataTypeVariant & to_variant) const
40814111
{
40824112
/// We allow converting NULL to Variant(...) as Variant can store NULLs.
@@ -4091,6 +4121,10 @@ class FunctionCast final : public IFunctionBase
40914121
}
40924122

40934123
auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type));
4124+
/// Cast String to Variant through parsing if it's not Variant(String).
4125+
if (isStringOrFixedString(removeNullable(removeLowCardinality(from_type))) && (!variant_discr_opt || to_variant.getVariants().size() > 1))
4126+
return createStringToVariantWrapper();
4127+
40944128
if (!variant_discr_opt)
40954129
throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName());
40964130

@@ -4692,7 +4726,7 @@ class FunctionCast final : public IFunctionBase
46924726

46934727
if (to_type->getCustomSerialization() && to_type->getCustomName())
46944728
{
4695-
ret = [requested_result_is_nullable](
4729+
ret = [this, requested_result_is_nullable](
46964730
ColumnsWithTypeAndName & arguments,
46974731
const DataTypePtr & result_type,
46984732
const ColumnNullable * column_nullable,
@@ -4701,7 +4735,10 @@ class FunctionCast final : public IFunctionBase
47014735
auto wrapped_result_type = result_type;
47024736
if (requested_result_is_nullable)
47034737
wrapped_result_type = makeNullable(result_type);
4704-
return ConvertImplGenericFromString::execute(
4738+
if (this->cast_type == CastType::accurateOrNull)
4739+
return ConvertImplGenericFromString<false>::execute(
4740+
arguments, wrapped_result_type, column_nullable, input_rows_count);
4741+
return ConvertImplGenericFromString<true>::execute(
47054742
arguments, wrapped_result_type, column_nullable, input_rows_count);
47064743
};
47074744
return true;

tests/queries/0_stateless/01601_accurate_cast.reference

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
1970-01-01 00:00:19
1111
2023-05-30
1212
1970-01-20
13-
\N
1413
true
1514
false
1615
true

tests/queries/0_stateless/01601_accurate_cast.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ SELECT accurateCast('1xxx', 'Date'); -- { serverError CANNOT_PARSE_DATE }
3535
SELECT accurateCast('2023-05-30', 'Date');
3636
SELECT accurateCast(19, 'Date');
3737

38-
select accurateCast('test', 'Nullable(Bool)');
38+
select accurateCast('test', 'Nullable(Bool)'); -- { serverError CANNOT_PARSE_BOOL }
3939
select accurateCast('test', 'Bool'); -- { serverError CANNOT_PARSE_BOOL }
4040
select accurateCast('truex', 'Bool'); -- { serverError CANNOT_PARSE_BOOL }
4141
select accurateCast('xfalse', 'Bool'); -- { serverError CANNOT_PARSE_BOOL }

tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,3 @@ fuzzer issue
3939
\N
4040
\N
4141
\N
42-
\N
43-
\N
44-
\N
45-
\N

tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ select toIPv6OrNull(number % 2 ? '' : NULL) from numbers(2);
2424
select IPv6StringToNum(number % 2 ? '0000:0000:0000:0000:0000:0000:0000:0000' : NULL) from numbers(2);
2525

2626
select 'fuzzer issue';
27-
SELECT CAST(if(number % 2, 'truetrue', NULL), 'Nullable(Bool)') FROM numbers(2);
28-
SELECT CAST(if(number % 2, 'falsefalse', NULL), 'Nullable(Bool)') FROM numbers(2);
27+
SELECT CAST(if(number % 2, 'truetrue', NULL), 'Nullable(Bool)') FROM numbers(2); -- {serverError CANNOT_PARSE_BOOL}
28+
SELECT CAST(if(number % 2, 'falsefalse', NULL), 'Nullable(Bool)') FROM numbers(2); -- {serverError CANNOT_PARSE_BOOL}
2929
SELECT accurateCastOrNull(if(number % 2, NULL, 'truex'), 'Bool') FROM numbers(4);
3030
SELECT accurateCastOrNull(if(number % 2, 'truex', NULL), 'Bool') FROM numbers(4);

tests/queries/0_stateless/02941_variant_type_1.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ function test1_insert()
1414
echo "test1 insert"
1515
$CH_CLIENT -nmq "insert into test select number, NULL from numbers(3);
1616
insert into test select number + 3, number from numbers(3);
17-
insert into test select number + 6, 'str_' || toString(number) from numbers(3);
17+
insert into test select number + 6, ('str_' || toString(number))::Variant(String) from numbers(3);
1818
insert into test select number + 9, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(3);
1919
insert into test select number + 12, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(3);
2020
insert into test select number + 15, range(number + 1)::Array(UInt64) from numbers(3);"
@@ -40,7 +40,7 @@ function test2_insert()
4040
echo "test2 insert"
4141
$CH_CLIENT -nmq "insert into test select number, NULL from numbers(3);
4242
insert into test select number + 3, number % 2 ? NULL : number from numbers(3);
43-
insert into test select number + 6, number % 2 ? NULL : 'str_' || toString(number) from numbers(3);
43+
insert into test select number + 6, number % 2 ? NULL : ('str_' || toString(number))::Variant(String) from numbers(3);
4444
insert into test select number + 9, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(('lc_str_' || toString(number))::LowCardinality(String), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3);
4545
insert into test select number + 12, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3);
4646
insert into test select number + 15, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(range(number + 1)::Array(UInt64), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3);"
@@ -64,7 +64,7 @@ select v.\`Array(UInt64)\`.size0 from test order by id;"
6464
function test3_insert()
6565
{
6666
echo "test3 insert"
67-
$CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST('str_' || toString(number), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number + 1)::Array(UInt64), type)) as res from numbers(18);"
67+
$CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST(('str_' || toString(number))::Variant(String), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number + 1)::Array(UInt64), type)) as res from numbers(18);"
6868
}
6969

7070
function test3_select()

0 commit comments

Comments
 (0)