Skip to content

Commit ed8c0ca

Browse files
authored
Merge pull request ClickHouse#76974 from Avogar/variant-bool-parsing
Don't parse special Bool values in text formats inside Variant type by default
2 parents 2d1604e + 32ed319 commit ed8c0ca

16 files changed

+121
-32
lines changed

docs/en/sql-reference/data-types/variant.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,19 @@ SELECT CAST(map('key1', '42', 'key2', 'true', 'key3', '2020-01-01'), 'Map(String
226226
└─────────────────────────────────────────────┴───────────────────────────────────────────────┘
227227
```
228228

229+
To disable parsing during conversion from `String` to `Variant` you can disable setting `cast_string_to_dynamic_use_inference`:
230+
231+
```sql
232+
SET cast_string_to_variant_use_inference = 0;
233+
SELECT '[1, 2, 3]'::Variant(String, Array(UInt64)) as variant, variantType(variant) as variant_type
234+
```
235+
236+
```text
237+
┌─variant───┬─variant_type─┐
238+
│ [1, 2, 3] │ String │
239+
└───────────┴──────────────┘
240+
```
241+
229242
### Converting an ordinary column to a Variant column {#converting-an-ordinary-column-to-a-variant-column}
230243

231244
It is possible to convert an ordinary column with type `T` to a `Variant` column containing this type:

src/Core/FormatFactorySettings.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,10 @@ Text to represent true bool value in TSV/CSV/Vertical/Pretty formats.
654654
)", 0) \
655655
DECLARE(String, bool_false_representation, "false", R"(
656656
Text to represent false bool value in TSV/CSV/Vertical/Pretty formats.
657+
)", 0) \
658+
\
659+
DECLARE(Bool, allow_special_bool_values_inside_variant, false, R"(
660+
Allows to parse Bool values inside Variant type from special text bool values like "on", "off", "enable", "disable", etc.
657661
)", 0) \
658662
\
659663
DECLARE(Bool, input_format_values_interpret_expressions, true, R"(

src/Core/Settings.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6074,6 +6074,9 @@ Allow to use the function `getClientHTTPHeader` which lets to obtain a value of
60746074
)", 0) \
60756075
DECLARE(Bool, cast_string_to_dynamic_use_inference, false, R"(
60766076
Use types inference during String to Dynamic conversion
6077+
)", 0) \
6078+
DECLARE(Bool, cast_string_to_variant_use_inference, true, R"(
6079+
Use types inference during String to Variant conversion.
60776080
)", 0) \
60786081
DECLARE(Bool, enable_blob_storage_log, true, R"(
60796082
Write information about blob storage operations to system.blob_storage_log table

src/Core/SettingsChangesHistory.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
7575
{"enable_hdfs_pread", true, true, "New setting."},
7676
{"low_priority_query_wait_time_ms", 1000, 1000, "New setting."},
7777
{"allow_experimental_shared_set_join", 0, 1, "A setting for ClickHouse Cloud to enable SharedSet and SharedJoin"},
78+
{"allow_special_bool_values_inside_variant", true, false, "Don't allow special bool values during Variant type parsing"},
79+
{"cast_string_to_variant_use_inference", true, true, "New setting to enable/disable types inference during CAST from String to Variant"},
7880
{"distributed_cache_read_request_max_tries", 20, 20, "New setting"},
7981
{"min_os_cpu_wait_time_ratio_to_throw", 0, 2, "New setting"},
8082
{"max_os_cpu_wait_time_ratio_to_throw", 0, 6, "New setting"},

src/DataTypes/Serializations/SerializationBool.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ ReturnType deserializeImpl(
189189
}
190190

191191
buf.rollbackToCheckpoint();
192-
if (tryDeserializeAllVariants(col, buf) && check_end_of_value(buf))
192+
if (settings.allow_special_bool_values && tryDeserializeAllVariants(col, buf) && check_end_of_value(buf))
193193
{
194194
buf.dropCheckpoint();
195195
if (buf.hasUnreadData())

src/DataTypes/Serializations/SerializationVariant.cpp

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -920,7 +920,8 @@ bool SerializationVariant::tryDeserializeImpl(
920920
IColumn & column,
921921
const String & field,
922922
std::function<bool(ReadBuffer &)> check_for_null,
923-
std::function<bool(IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer &)> try_deserialize_nested) const
923+
std::function<bool(IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer &, const FormatSettings &)> try_deserialize_nested,
924+
const FormatSettings & settings) const
924925
{
925926
auto & column_variant = assert_cast<ColumnVariant &>(column);
926927
ReadBufferFromString null_buf(field);
@@ -930,12 +931,14 @@ bool SerializationVariant::tryDeserializeImpl(
930931
return true;
931932
}
932933

934+
FormatSettings modified_settings = settings;
935+
modified_settings.allow_special_bool_values = settings.allow_special_bool_values_inside_variant;
933936
for (size_t global_discr : deserialize_text_order)
934937
{
935938
ReadBufferFromString variant_buf(field);
936939
auto & variant_column = column_variant.getVariantByGlobalDiscriminator(global_discr);
937940
size_t prev_size = variant_column.size();
938-
if (try_deserialize_nested(variant_column, variants[global_discr], variant_buf) && variant_buf.eof())
941+
if (try_deserialize_nested(variant_column, variants[global_discr], variant_buf, modified_settings) && variant_buf.eof())
939942
{
940943
column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(global_discr));
941944
column_variant.getOffsets().push_back(prev_size);
@@ -981,12 +984,12 @@ bool SerializationVariant::tryDeserializeTextEscapedImpl(DB::IColumn & column, c
981984
{
982985
return SerializationNullable::tryDeserializeNullEscaped(buf, settings);
983986
};
984-
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
987+
auto try_deserialize_variant = [](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf, const FormatSettings & settings_)
985988
{
986-
return variant_serialization->tryDeserializeTextEscaped(variant_column, buf, settings);
989+
return variant_serialization->tryDeserializeTextEscaped(variant_column, buf, settings_);
987990
};
988991

989-
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
992+
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant, settings);
990993
}
991994

992995
void SerializationVariant::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@@ -1020,12 +1023,12 @@ bool SerializationVariant::tryDeserializeTextRawImpl(DB::IColumn & column, const
10201023
{
10211024
return SerializationNullable::tryDeserializeNullRaw(buf, settings);
10221025
};
1023-
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
1026+
auto try_deserialize_variant = [](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf, const FormatSettings & settings_)
10241027
{
1025-
return variant_serialization->tryDeserializeTextRaw(variant_column, buf, settings);
1028+
return variant_serialization->tryDeserializeTextRaw(variant_column, buf, settings_);
10261029
};
10271030

1028-
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
1031+
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant, settings);
10291032
}
10301033

10311034
void SerializationVariant::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@@ -1060,12 +1063,12 @@ bool SerializationVariant::tryDeserializeTextQuotedImpl(DB::IColumn & column, co
10601063
{
10611064
return SerializationNullable::tryDeserializeNullQuoted(buf);
10621065
};
1063-
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
1066+
auto try_deserialize_variant = [](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf, const FormatSettings & settings_)
10641067
{
1065-
return variant_serialization->tryDeserializeTextQuoted(variant_column, buf, settings);
1068+
return variant_serialization->tryDeserializeTextQuoted(variant_column, buf, settings_);
10661069
};
10671070

1068-
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
1071+
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant, settings);
10691072
}
10701073

10711074
void SerializationVariant::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@@ -1099,12 +1102,12 @@ bool SerializationVariant::tryDeserializeTextCSVImpl(DB::IColumn & column, const
10991102
{
11001103
return SerializationNullable::tryDeserializeNullCSV(buf, settings);
11011104
};
1102-
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
1105+
auto try_deserialize_variant = [](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf, const FormatSettings & settings_)
11031106
{
1104-
return variant_serialization->tryDeserializeTextCSV(variant_column, buf, settings);
1107+
return variant_serialization->tryDeserializeTextCSV(variant_column, buf, settings_);
11051108
};
11061109

1107-
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
1110+
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant, settings);
11081111
}
11091112

11101113
void SerializationVariant::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@@ -1138,12 +1141,12 @@ bool SerializationVariant::tryDeserializeWholeTextImpl(DB::IColumn & column, con
11381141
{
11391142
return SerializationNullable::tryDeserializeNullText(buf);
11401143
};
1141-
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
1144+
auto try_deserialize_variant = [](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf, const FormatSettings & settings_)
11421145
{
1143-
return variant_serialization->tryDeserializeWholeText(variant_column, buf, settings);
1146+
return variant_serialization->tryDeserializeWholeText(variant_column, buf, settings_);
11441147
};
11451148

1146-
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
1149+
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant, settings);
11471150
}
11481151

11491152
void SerializationVariant::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@@ -1188,12 +1191,12 @@ bool SerializationVariant::tryDeserializeTextJSONImpl(DB::IColumn & column, cons
11881191
{
11891192
return SerializationNullable::tryDeserializeNullJSON(buf);
11901193
};
1191-
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
1194+
auto try_deserialize_variant = [](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf, const FormatSettings & settings_)
11921195
{
1193-
return variant_serialization->tryDeserializeTextJSON(variant_column, buf, settings);
1196+
return variant_serialization->tryDeserializeTextJSON(variant_column, buf, settings_);
11941197
};
11951198

1196-
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
1199+
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant, settings);
11971200
}
11981201

11991202
void SerializationVariant::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const

src/DataTypes/Serializations/SerializationVariant.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,8 @@ class SerializationVariant : public ISerialization
219219
IColumn & column,
220220
const String & field,
221221
std::function<bool(ReadBuffer &)> check_for_null,
222-
std::function<bool(IColumn & variant_columm, const SerializationPtr & nested, ReadBuffer &)> try_deserialize_nested) const;
222+
std::function<bool(IColumn & variant_columm, const SerializationPtr & nested, ReadBuffer &, const FormatSettings &)> try_deserialize_nested,
223+
const FormatSettings & settings) const;
223224

224225
VariantSerializations variants;
225226
std::vector<String> variant_names;

src/Formats/FormatFactory.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
335335
format_settings.date_time_overflow_behavior = settings[Setting::date_time_overflow_behavior];
336336
format_settings.try_infer_variant = settings[Setting::input_format_try_infer_variants];
337337
format_settings.client_protocol_version = context->getClientProtocolVersion();
338+
format_settings.allow_special_bool_values_inside_variant = settings[Setting::allow_special_bool_values_inside_variant];
338339

339340
/// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
340341
if (format_settings.schema.is_server)

src/Formats/FormatSettings.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ struct FormatSettings
156156

157157
String bool_true_representation = "true";
158158
String bool_false_representation = "false";
159+
bool allow_special_bool_values = true;
160+
bool allow_special_bool_values_inside_variant = false;
159161

160162
struct CSV
161163
{

src/Functions/FunctionsConversion.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ namespace Setting
8383
{
8484
extern const SettingsBool cast_ipv4_ipv6_default_on_conversion_error;
8585
extern const SettingsBool cast_string_to_dynamic_use_inference;
86+
extern const SettingsBool cast_string_to_variant_use_inference;
8687
extern const SettingsDateTimeOverflowBehavior date_time_overflow_behavior;
8788
extern const SettingsBool input_format_ipv4_default_on_conversion_error;
8889
extern const SettingsBool input_format_ipv6_default_on_conversion_error;
@@ -5170,7 +5171,7 @@ class FunctionCast final : public IFunctionBase
51705171

51715172
auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(removeNullableOrLowCardinalityNullable(from_type)->getName());
51725173
/// Cast String to Variant through parsing if it's not Variant(String).
5173-
if (isStringOrFixedString(removeNullable(removeLowCardinality(from_type))) && (!variant_discr_opt || to_variant.getVariants().size() > 1))
5174+
if (context && context->getSettingsRef()[Setting::cast_string_to_variant_use_inference] && isStringOrFixedString(removeNullable(removeLowCardinality(from_type))) && (!variant_discr_opt || to_variant.getVariants().size() > 1))
51745175
return createStringToVariantWrapper();
51755176

51765177
if (!variant_discr_opt)

0 commit comments

Comments
 (0)