From a0138a24b23d51848d7fec4d78522c0c99f9f0d2 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 15 Dec 2022 18:00:31 +0100 Subject: [PATCH 001/342] Update version to 22.12.2.1 --- cmake/autogenerated_versions.txt | 8 ++++---- .../StorageSystemContributors.generated.cpp | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index d06d3918612b..48251d5e40fb 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54469) SET(VERSION_MAJOR 22) SET(VERSION_MINOR 12) -SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 0d211ed19849fe44b0e43fdebe2c15d76d560a77) -SET(VERSION_DESCRIBE v22.12.1.1-testing) -SET(VERSION_STRING 22.12.1.1) +SET(VERSION_PATCH 2) +SET(VERSION_GITHASH 688e488e930c83eefeac4f87c4cc029cc5b231e3) +SET(VERSION_DESCRIBE v22.12.2.1-stable) +SET(VERSION_STRING 22.12.2.1) # end of autochange diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f5b6829c7eff..f69f9f8ee7fb 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -94,6 +94,7 @@ const char * auto_contributors[] { "Aliaksandr Shylau", "Alina Terekhova", "Amesaru", + "Amila Welihinda", "Amir Vaza", "Amos Bird", "Amr Alaa", @@ -174,6 +175,7 @@ const char * auto_contributors[] { "Avogar", "Azat Khuzhin", "BSD_Conqueror", + "BSWaterB", "Babacar Diassé", "Bakhtiyor Ruziev", "BanyRule", @@ -186,6 +188,7 @@ const char * auto_contributors[] { "Bharat Nallan", "Bharat Nallan Chakravarthy", "Big Elephant", + "BigRedEye", "Bill", "BiteTheDDDDt", "BlahGeek", @@ -203,6 +206,7 @@ const char * auto_contributors[] { "Brett Hoerner", "Brian Hunter", "Bulat Gaifullin", + "Camden Cheek", "Camilo Sierra", "Carbyn", "Carlos Rodríguez Hernández", @@ -291,6 +295,7 @@ const char * auto_contributors[] { "Eldar Zaitov", "Elena", "Elena Baskakova", + "Elena Torró", "Elghazal Ahmed", "Elizaveta Mironyuk", "Elykov Alexandr", @@ -525,6 +530,7 @@ const char * auto_contributors[] { "Maksim Kita", "Mallik Hassan", "Malte", + "Manuel de la Peña", "Marat IDRISOV", "Marcelo Rodriguez", "Marek Vavrusa", @@ -534,6 +540,7 @@ const char * auto_contributors[] { "Mark Andreev", "Mark Frost", "Mark Papadakis", + "Mark Polokhov", "Maroun Maroun", "Marquitos", "Marsel Arduanov", @@ -709,6 +716,7 @@ const char * auto_contributors[] { "Quanfa Fu", "Quid37", "Radistka-75", + "Raevsky Rudolf", "Rafael Acevedo", "Rafael David Tinoco", "Rajkumar", @@ -779,6 +787,7 @@ const char * auto_contributors[] { "Sergey Mirvoda", "Sergey Ryzhkov", "Sergey Shtykov", + "Sergey Skvortsov", "Sergey Tulentsev", "Sergey V. Galtsev", "Sergey Zaikin", @@ -790,6 +799,7 @@ const char * auto_contributors[] { "Sherry Wang", "Shoh Jahon", "SiderZhang", + "Sidorov Pavel", "Silviu Caragea", "Simeon Emanuilov", "Simon Liu", @@ -878,6 +888,7 @@ const char * auto_contributors[] { "Viktor Taranenko", "Vincent Bernat", "Vitalii S", + "Vitaliy", "Vitaliy Fedorchenko", "Vitaliy Karnienko", "Vitaliy Kozlovskiy", @@ -922,6 +933,7 @@ const char * auto_contributors[] { "Weiqing Xu", "William Shallum", "Winter Zhang", + "Xbitz29", "XenoAmess", "Xianda Ke", "Xiang Zhou", @@ -1013,6 +1025,7 @@ const char * auto_contributors[] { "benbiti", "bgranvea", "bharatnc", + "bit-ranger", "bkuschel", "blazerer", "bluebirddm", @@ -1238,6 +1251,7 @@ const char * auto_contributors[] { "luc1ph3r", "lulichao", "luocongkai", + "lzydmxy", "m-ves", "madianjun", "maiha", @@ -1313,6 +1327,7 @@ const char * auto_contributors[] { "peter279k", "philip.han", "pingyu", + "pkubaj", "potya", "presto53", "proller", @@ -1378,6 +1393,7 @@ const char * auto_contributors[] { "taiyang-li", "tangjiangling", "tao jiang", + "taofengliu", "taojiatao", "tavplubix", "tchepavel", @@ -1394,6 +1410,7 @@ const char * auto_contributors[] { "turbo jason", "tyrionhuang", "ubuntu", + "unbyte", "unegare", "unknown", "urgordeadbeef", @@ -1481,6 +1498,7 @@ const char * auto_contributors[] { "Дмитрий Канатников", "Иванов Евгений", "Илья Исаев", + "Коренберг ☢️ Марк", "Павел Литвиненко", "Смитюх Вячеслав", "Сундуков Алексей", From 7a338abff0a913d20212919e00356eabd5b6ef4b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 15 Dec 2022 21:03:42 +0000 Subject: [PATCH 002/342] Backport #44273 to 22.12: Get rid of global Git object --- tests/ci/release.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/ci/release.py b/tests/ci/release.py index 502efd791733..57d5c4cdd6e1 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -32,8 +32,6 @@ RELEASE_READY_STATUS = "Ready for release" -git = Git() - class Repo: VALID = ("ssh", "https", "origin") @@ -79,7 +77,7 @@ def __init__( self.release_commit = release_commit assert release_type in self.BIG + self.SMALL self.release_type = release_type - self._git = git + self._git = Git() self._version = get_version_from_repo(git=self._git) self._release_branch = "" self._rollback_stack = [] # type: List[str] From db8652a65194ea959d7b7a510134b0f2b6699330 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 15 Dec 2022 22:03:33 +0000 Subject: [PATCH 003/342] Backport #44272 to 22.12: Fix deadlock in StorageSystemDatabases --- src/Storages/System/StorageSystemDatabases.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 2353be9b69f8..432d2c4ac647 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -46,7 +46,7 @@ static String getEngineFull(const DatabasePtr & database) break; /// Database was dropped - if (!locked_database && name == database->getDatabaseName()) + if (name == database->getDatabaseName()) return {}; guard.reset(); From 1e7001d675976bed0f78265860f7454c37f6c5a2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 17 Dec 2022 02:17:56 +0000 Subject: [PATCH 004/342] Backport #44209 to 22.12: Add some settings under `compatibility` --- src/Core/SettingsChangesHistory.h | 72 ++++++++++++++++--------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index ee378b295fa6..62b3c1b9c987 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -78,41 +78,43 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { - {"22.12", {{"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, + {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, + {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, + {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, + {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, + {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, + {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, + {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, + {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, + {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, + {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, + {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, + {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, + {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, + {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, + {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, + {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, + {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, + {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, + {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, + {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, + {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, + {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, + {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, + {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, + {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, + {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, + {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, + {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, + {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, + {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, + {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, + {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, + {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, + {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, + {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, + {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, + {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; } From dbfb18f9f78b40f9ebc3821a314ead3776d79def Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 19 Dec 2022 14:03:10 +0000 Subject: [PATCH 005/342] Backport #44311 to 22.12: Fix false success rerun on similar prefix for another builds --- tests/ci/build_check.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index c9e8dac2c008..a718bd53418d 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -122,7 +122,8 @@ def check_for_success_run( build_name: str, build_config: BuildConfig, ) -> None: - logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix) + # the final empty argument is necessary for distinguish build and build_suffix + logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix, "") logging.info("Checking for artifacts in %s", logged_prefix) try: # TODO: theoretically, it would miss performance artifact for pr==0, From 3bb3b74ad0d97ae771d6d238298b4083a40d15a8 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 19 Dec 2022 18:04:11 +0000 Subject: [PATCH 006/342] Backport #44386 to 22.12: Add check for submodules sanity --- .gitmodules | 3 --- .../test/style/process_style_check_result.py | 1 + docker/test/style/run.sh | 4 +++- utils/check-style/check-submodules | 20 +++++++++++++++++++ 4 files changed, 24 insertions(+), 4 deletions(-) create mode 100755 utils/check-style/check-submodules diff --git a/.gitmodules b/.gitmodules index 070109eb32d9..0af0a6a25f4a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -269,9 +269,6 @@ [submodule "contrib/vectorscan"] path = contrib/vectorscan url = https://github.com/VectorCamp/vectorscan.git -[submodule "contrib/liburing"] - path = contrib/liburing - url = https://github.com/axboe/liburing.git [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/ClickHouse/c-ares diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py index 6dc3d05d0512..2edf6ba3591d 100755 --- a/docker/test/style/process_style_check_result.py +++ b/docker/test/style/process_style_check_result.py @@ -19,6 +19,7 @@ def process_result(result_folder): "typos", "whitespaces", "workflows", + "submodules", "docs spelling", ) diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 80911bf86273..315efb9e6c44 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -10,7 +10,7 @@ echo "Check style" | ts echo "Check python formatting with black" | ts ./check-black -n |& tee /test_output/black_output.txt echo "Check python type hinting with mypy" | ts -./check-mypy -n |& tee /test_output/mypy_output.txt +./check-mypy -n |& tee /test_output/mypy_output.txt echo "Check typos" | ts ./check-typos |& tee /test_output/typos_output.txt echo "Check docs spelling" | ts @@ -19,6 +19,8 @@ echo "Check whitespaces" | ts ./check-whitespaces -n |& tee /test_output/whitespaces_output.txt echo "Check workflows" | ts ./check-workflows |& tee /test_output/workflows_output.txt +echo "Check submodules" | ts +./check-submodules |& tee /test_output/submodules_output.txt echo "Check shell scripts with shellcheck" | ts ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv diff --git a/utils/check-style/check-submodules b/utils/check-style/check-submodules new file mode 100755 index 000000000000..815e6c13c0f2 --- /dev/null +++ b/utils/check-style/check-submodules @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +# The script checks if all submodules defined in $GIT_ROOT/.gitmodules exist in $GIT_ROOT/contrib + +set -e + +GIT_ROOT=$(git rev-parse --show-cdup) +GIT_ROOT=${GIT_ROOT:-.} + +cd "$GIT_ROOT" + +# Remove keys for submodule.*.path parameters, the values are separated by \0 +# and check if the directory exists +git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \ + xargs -P100 -0 --no-run-if-empty -I{} bash -c 'if ! test -d {}; then echo Directory for submodule {} is not found; exit 1; fi' 2>&1 + + +# And check that the submodule is fine +git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \ + xargs -P100 -0 --no-run-if-empty -I{} git submodule status -q {} 2>&1 From fe5b9accc645e9e79b6302997c656b0bbec2bcc2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 20 Dec 2022 13:07:18 +0000 Subject: [PATCH 007/342] Backport #44346 to 22.12: Fix possible crash with IN, LowCardinality and constant folding --- src/Interpreters/ExpressionAnalyzer.cpp | 56 ++++++++++++++++++- src/Processors/QueryPlan/AggregatingStep.cpp | 8 +-- src/Processors/QueryPlan/AggregatingStep.h | 2 + .../02503_in_lc_const_args_bug.reference | 1 + .../02503_in_lc_const_args_bug.sql | 2 + 5 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02503_in_lc_const_args_bug.reference create mode 100644 tests/queries/0_stateless/02503_in_lc_const_args_bug.sql diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 22229c0d6c21..bc93abff5345 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #include #include +#include #include #include @@ -1831,7 +1833,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( ssize_t where_step_num = -1; ssize_t having_step_num = -1; - auto finalize_chain = [&](ExpressionActionsChain & chain) + auto finalize_chain = [&](ExpressionActionsChain & chain) -> ColumnsWithTypeAndName { if (prewhere_step_num >= 0) { @@ -1852,7 +1854,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); + auto res = chain.getLastStep().getResultColumns(); chain.clear(); + return res; }; { @@ -1970,7 +1974,55 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (settings.group_by_use_nulls) query_analyzer.appendGroupByModifiers(before_aggregation, chain, only_types); - finalize_chain(chain); + auto columns_before_aggregation = finalize_chain(chain); + + /// Here we want to check that columns after aggregation have the same type as + /// were promised in query_analyzer.aggregated_columns + /// Ideally, they should be equal. In practice, this may be not true. + /// As an example, we don't build sets for IN inside ExpressionAnalysis::analyzeAggregation, + /// so that constant folding for expression (1 in 1) will not work. This may change the return type + /// for functions with LowCardinality argument: function "substr(toLowCardinality('abc'), 1 IN 1)" + /// should usually return LowCardinality(String) when (1 IN 1) is constant, but without built set + /// for (1 IN 1) constant is not propagated and "substr" returns String type. + /// See 02503_in_lc_const_args_bug.sql + /// + /// As a temporary solution, we add converting actions to the next chain. + /// Hopefully, later we can + /// * use a new analyzer where this issue is absent + /// * or remove ExpressionActionsChain completely and re-implement its logic on top of the query plan + { + for (auto & col : columns_before_aggregation) + if (!col.column) + col.column = col.type->createColumn(); + + Block header_before_aggregation(std::move(columns_before_aggregation)); + + auto keys = query_analyzer.aggregationKeys().getNames(); + const auto & aggregates = query_analyzer.aggregates(); + + bool has_grouping = query_analyzer.group_by_kind != GroupByKind::ORDINARY; + auto actual_header = Aggregator::Params::getHeader( + header_before_aggregation, /*only_merge*/ false, keys, aggregates, /*final*/ true); + actual_header = AggregatingStep::appendGroupingColumn( + std::move(actual_header), keys, has_grouping, settings.group_by_use_nulls); + + Block expected_header; + for (const auto & expected : query_analyzer.aggregated_columns) + expected_header.insert(ColumnWithTypeAndName(expected.type, expected.name)); + + if (!blocksHaveEqualStructure(actual_header, expected_header)) + { + auto converting = ActionsDAG::makeConvertingActions( + actual_header.getColumnsWithTypeAndName(), + expected_header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name, + true); + + auto & step = chain.lastStep(query_analyzer.aggregated_columns); + auto & actions = step.actions(); + actions = ActionsDAG::merge(std::move(*actions), std::move(*converting)); + } + } if (query_analyzer.appendHaving(chain, only_types || !second_stage)) { diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 86039342c492..4fd6e7c11ddd 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -79,9 +79,9 @@ Block generateOutputHeader(const Block & input_header, const Names & keys, bool } -static Block appendGroupingColumn(Block block, const Names & keys, const GroupingSetsParamsList & params, bool use_nulls) +Block AggregatingStep::appendGroupingColumn(Block block, const Names & keys, bool has_grouping, bool use_nulls) { - if (params.empty()) + if (!has_grouping) return block; return generateOutputHeader(block, keys, use_nulls); @@ -104,7 +104,7 @@ AggregatingStep::AggregatingStep( bool memory_bound_merging_of_aggregation_results_enabled_) : ITransformingStep( input_stream_, - appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, grouping_sets_params_, group_by_use_nulls_), + appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, !grouping_sets_params_.empty(), group_by_use_nulls_), getTraits(should_produce_results_in_order_of_bucket_number_), false) , params(std::move(params_)) @@ -469,7 +469,7 @@ void AggregatingStep::updateOutputStream() { output_stream = createOutputStream( input_streams.front(), - appendGroupingColumn(params.getHeader(input_streams.front().header, final), params.keys, grouping_sets_params, group_by_use_nulls), + appendGroupingColumn(params.getHeader(input_streams.front().header, final), params.keys, !grouping_sets_params.empty(), group_by_use_nulls), getDataStreamTraits()); } diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 9cb56432797b..0dc06649d2d6 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -42,6 +42,8 @@ class AggregatingStep : public ITransformingStep bool should_produce_results_in_order_of_bucket_number_, bool memory_bound_merging_of_aggregation_results_enabled_); + static Block appendGroupingColumn(Block block, const Names & keys, bool has_grouping, bool use_nulls); + String getName() const override { return "Aggregating"; } void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; diff --git a/tests/queries/0_stateless/02503_in_lc_const_args_bug.reference b/tests/queries/0_stateless/02503_in_lc_const_args_bug.reference new file mode 100644 index 000000000000..8baef1b4abc4 --- /dev/null +++ b/tests/queries/0_stateless/02503_in_lc_const_args_bug.reference @@ -0,0 +1 @@ +abc diff --git a/tests/queries/0_stateless/02503_in_lc_const_args_bug.sql b/tests/queries/0_stateless/02503_in_lc_const_args_bug.sql new file mode 100644 index 000000000000..6756e3815867 --- /dev/null +++ b/tests/queries/0_stateless/02503_in_lc_const_args_bug.sql @@ -0,0 +1,2 @@ +SELECT substr(toLowCardinality('abc'), 1 in 1) AS x GROUP BY x; + From 4704b26d611daf753edf7cea792fdaab4f47d205 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 20 Dec 2022 19:04:08 +0000 Subject: [PATCH 008/342] Backport #44132 to 22.12: Bump libdivide (to gain some new optimizations) --- .gitmodules | 3 + contrib/CMakeLists.txt | 2 +- contrib/libdivide | 1 + contrib/libdivide-cmake/CMakeLists.txt | 7 + contrib/libdivide-cmake/libdivide-config.h | 9 + contrib/libdivide/CMakeLists.txt | 3 - contrib/libdivide/LICENSE.txt | 20 - contrib/libdivide/README.txt | 2 - contrib/libdivide/libdivide.h | 2503 -------------------- docker/test/fasttest/run.sh | 1 + src/Functions/divide/divideImpl.cpp | 1 + src/Functions/modulo.cpp | 11 +- src/Interpreters/createBlockSelector.cpp | 11 +- 13 files changed, 25 insertions(+), 2549 deletions(-) create mode 160000 contrib/libdivide create mode 100644 contrib/libdivide-cmake/CMakeLists.txt create mode 100644 contrib/libdivide-cmake/libdivide-config.h delete mode 100644 contrib/libdivide/CMakeLists.txt delete mode 100644 contrib/libdivide/LICENSE.txt delete mode 100644 contrib/libdivide/README.txt delete mode 100644 contrib/libdivide/libdivide.h diff --git a/.gitmodules b/.gitmodules index 0af0a6a25f4a..0805b6d54926 100644 --- a/.gitmodules +++ b/.gitmodules @@ -291,3 +291,6 @@ [submodule "contrib/google-benchmark"] path = contrib/google-benchmark url = https://github.com/google/benchmark.git +[submodule "contrib/libdivide"] + path = contrib/libdivide + url = https://github.com/ridiculousfish/libdivide.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2e05b318b8fa..6f80059498ea 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -65,7 +65,7 @@ add_contrib (dragonbox-cmake dragonbox) add_contrib (vectorscan-cmake vectorscan) add_contrib (jemalloc-cmake jemalloc) add_contrib (libcpuid-cmake libcpuid) -add_contrib (libdivide) +add_contrib (libdivide-cmake) add_contrib (libmetrohash) add_contrib (lz4-cmake lz4) add_contrib (murmurhash) diff --git a/contrib/libdivide b/contrib/libdivide new file mode 160000 index 000000000000..57678d011970 --- /dev/null +++ b/contrib/libdivide @@ -0,0 +1 @@ +Subproject commit 57678d0119707d85e8a6190df53748c758769cdf diff --git a/contrib/libdivide-cmake/CMakeLists.txt b/contrib/libdivide-cmake/CMakeLists.txt new file mode 100644 index 000000000000..3174808bc231 --- /dev/null +++ b/contrib/libdivide-cmake/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LIBDIVIDE_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libdivide") +add_library (_libdivide INTERFACE) +# for libdivide.h +target_include_directories (_libdivide SYSTEM BEFORE INTERFACE ${LIBDIVIDE_SOURCE_DIR}) +# for libdivide-config.h +target_include_directories (_libdivide SYSTEM BEFORE INTERFACE .) +add_library (ch_contrib::libdivide ALIAS _libdivide) diff --git a/contrib/libdivide-cmake/libdivide-config.h b/contrib/libdivide-cmake/libdivide-config.h new file mode 100644 index 000000000000..8ef001fb97bc --- /dev/null +++ b/contrib/libdivide-cmake/libdivide-config.h @@ -0,0 +1,9 @@ +#if defined(__SSE2__) +# define LIBDIVIDE_SSE2 +#elif defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__) +# define LIBDIVIDE_AVX512 +#elif defined(__AVX2__) +# define LIBDIVIDE_AVX2 +#elif defined(__aarch64__) && defined(__ARM_NEON) +# define LIBDIVIDE_NEON +#endif diff --git a/contrib/libdivide/CMakeLists.txt b/contrib/libdivide/CMakeLists.txt deleted file mode 100644 index 45cbc0a584b5..000000000000 --- a/contrib/libdivide/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_library (_libdivide INTERFACE) -target_include_directories (_libdivide SYSTEM BEFORE INTERFACE .) -add_library (ch_contrib::libdivide ALIAS _libdivide) diff --git a/contrib/libdivide/LICENSE.txt b/contrib/libdivide/LICENSE.txt deleted file mode 100644 index d056b847bba8..000000000000 --- a/contrib/libdivide/LICENSE.txt +++ /dev/null @@ -1,20 +0,0 @@ - libdivide - Copyright (C) 2010 ridiculous_fish - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - libdivide@ridiculousfish.com diff --git a/contrib/libdivide/README.txt b/contrib/libdivide/README.txt deleted file mode 100644 index 2d17a68e4c2b..000000000000 --- a/contrib/libdivide/README.txt +++ /dev/null @@ -1,2 +0,0 @@ -https://github.com/ridiculousfish/libdivide -http://libdivide.com/ diff --git a/contrib/libdivide/libdivide.h b/contrib/libdivide/libdivide.h deleted file mode 100644 index 33d210310a16..000000000000 --- a/contrib/libdivide/libdivide.h +++ /dev/null @@ -1,2503 +0,0 @@ -// libdivide.h - Optimized integer division -// https://libdivide.com -// -// Copyright (C) 2010 - 2019 ridiculous_fish, -// Copyright (C) 2016 - 2019 Kim Walisch, -// -// libdivide is dual-licensed under the Boost or zlib licenses. -// You may use libdivide under the terms of either of these. -// See LICENSE.txt for more details. - -#ifndef LIBDIVIDE_H -#define LIBDIVIDE_H - -#define LIBDIVIDE_VERSION "3.0" -#define LIBDIVIDE_VERSION_MAJOR 3 -#define LIBDIVIDE_VERSION_MINOR 0 - -#include - -#if defined(__cplusplus) -#include -#include -#include -#else -#include -#include -#endif - -#if defined(LIBDIVIDE_SSE2) -#include -#endif -#if defined(LIBDIVIDE_AVX2) || defined(LIBDIVIDE_AVX512) -#include -#endif -#if defined(LIBDIVIDE_NEON) -#include -#endif - -#if defined(_MSC_VER) -#include -// disable warning C4146: unary minus operator applied -// to unsigned type, result still unsigned -#pragma warning(disable : 4146) -#define LIBDIVIDE_VC -#endif - -#if !defined(__has_builtin) -#define __has_builtin(x) 0 -#endif - -#if defined(__SIZEOF_INT128__) -#define HAS_INT128_T -// clang-cl on Windows does not yet support 128-bit division -#if !(defined(__clang__) && defined(LIBDIVIDE_VC)) -#define HAS_INT128_DIV -#endif -#endif - -#if defined(__x86_64__) || defined(_M_X64) -#define LIBDIVIDE_X86_64 -#endif - -#if defined(__i386__) -#define LIBDIVIDE_i386 -#endif - -#if defined(__GNUC__) || defined(__clang__) -#define LIBDIVIDE_GCC_STYLE_ASM -#endif - -#if defined(__cplusplus) || defined(LIBDIVIDE_VC) -#define LIBDIVIDE_FUNCTION __FUNCTION__ -#else -#define LIBDIVIDE_FUNCTION __func__ -#endif - -#define LIBDIVIDE_ERROR(msg) \ - do { \ - fprintf(stderr, "libdivide.h:%d: %s(): Error: %s\n", __LINE__, LIBDIVIDE_FUNCTION, msg); \ - abort(); \ - } while (0) - -#if defined(LIBDIVIDE_ASSERTIONS_ON) -#define LIBDIVIDE_ASSERT(x) \ - do { \ - if (!(x)) { \ - fprintf(stderr, "libdivide.h:%d: %s(): Assertion failed: %s\n", __LINE__, \ - LIBDIVIDE_FUNCTION, #x); \ - abort(); \ - } \ - } while (0) -#else -#define LIBDIVIDE_ASSERT(x) -#endif - -#ifdef __cplusplus -namespace libdivide { -#endif - -// pack divider structs to prevent compilers from padding. -// This reduces memory usage by up to 43% when using a large -// array of libdivide dividers and improves performance -// by up to 10% because of reduced memory bandwidth. -#pragma pack(push, 1) - -struct libdivide_u32_t { - uint32_t magic; - uint8_t more; -}; - -struct libdivide_s32_t { - int32_t magic; - uint8_t more; -}; - -struct libdivide_u64_t { - uint64_t magic; - uint8_t more; -}; - -struct libdivide_s64_t { - int64_t magic; - uint8_t more; -}; - -struct libdivide_u32_branchfree_t { - uint32_t magic; - uint8_t more; -}; - -struct libdivide_s32_branchfree_t { - int32_t magic; - uint8_t more; -}; - -struct libdivide_u64_branchfree_t { - uint64_t magic; - uint8_t more; -}; - -struct libdivide_s64_branchfree_t { - int64_t magic; - uint8_t more; -}; - -#pragma pack(pop) - -// Explanation of the "more" field: -// -// * Bits 0-5 is the shift value (for shift path or mult path). -// * Bit 6 is the add indicator for mult path. -// * Bit 7 is set if the divisor is negative. We use bit 7 as the negative -// divisor indicator so that we can efficiently use sign extension to -// create a bitmask with all bits set to 1 (if the divisor is negative) -// or 0 (if the divisor is positive). -// -// u32: [0-4] shift value -// [5] ignored -// [6] add indicator -// magic number of 0 indicates shift path -// -// s32: [0-4] shift value -// [5] ignored -// [6] add indicator -// [7] indicates negative divisor -// magic number of 0 indicates shift path -// -// u64: [0-5] shift value -// [6] add indicator -// magic number of 0 indicates shift path -// -// s64: [0-5] shift value -// [6] add indicator -// [7] indicates negative divisor -// magic number of 0 indicates shift path -// -// In s32 and s64 branchfree modes, the magic number is negated according to -// whether the divisor is negated. In branchfree strategy, it is not negated. - -enum { - LIBDIVIDE_32_SHIFT_MASK = 0x1F, - LIBDIVIDE_64_SHIFT_MASK = 0x3F, - LIBDIVIDE_ADD_MARKER = 0x40, - LIBDIVIDE_NEGATIVE_DIVISOR = 0x80 -}; - -static inline struct libdivide_s32_t libdivide_s32_gen(int32_t d); -static inline struct libdivide_u32_t libdivide_u32_gen(uint32_t d); -static inline struct libdivide_s64_t libdivide_s64_gen(int64_t d); -static inline struct libdivide_u64_t libdivide_u64_gen(uint64_t d); - -static inline struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d); -static inline struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d); -static inline struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d); -static inline struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d); - -static inline int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom); -static inline uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom); -static inline int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom); -static inline uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom); - -static inline int32_t libdivide_s32_branchfree_do( - int32_t numer, const struct libdivide_s32_branchfree_t *denom); -static inline uint32_t libdivide_u32_branchfree_do( - uint32_t numer, const struct libdivide_u32_branchfree_t *denom); -static inline int64_t libdivide_s64_branchfree_do( - int64_t numer, const struct libdivide_s64_branchfree_t *denom); -static inline uint64_t libdivide_u64_branchfree_do( - uint64_t numer, const struct libdivide_u64_branchfree_t *denom); - -static inline int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom); -static inline uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom); -static inline int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom); -static inline uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom); - -static inline int32_t libdivide_s32_branchfree_recover( - const struct libdivide_s32_branchfree_t *denom); -static inline uint32_t libdivide_u32_branchfree_recover( - const struct libdivide_u32_branchfree_t *denom); -static inline int64_t libdivide_s64_branchfree_recover( - const struct libdivide_s64_branchfree_t *denom); -static inline uint64_t libdivide_u64_branchfree_recover( - const struct libdivide_u64_branchfree_t *denom); - -//////// Internal Utility Functions - -static inline uint32_t libdivide_mullhi_u32(uint32_t x, uint32_t y) { - uint64_t xl = x, yl = y; - uint64_t rl = xl * yl; - return (uint32_t)(rl >> 32); -} - -static inline int32_t libdivide_mullhi_s32(int32_t x, int32_t y) { - int64_t xl = x, yl = y; - int64_t rl = xl * yl; - // needs to be arithmetic shift - return (int32_t)(rl >> 32); -} - -static inline uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) { -#if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64) - return __umulh(x, y); -#elif defined(HAS_INT128_T) - __uint128_t xl = x, yl = y; - __uint128_t rl = xl * yl; - return (uint64_t)(rl >> 64); -#else - // full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) - uint32_t mask = 0xFFFFFFFF; - uint32_t x0 = (uint32_t)(x & mask); - uint32_t x1 = (uint32_t)(x >> 32); - uint32_t y0 = (uint32_t)(y & mask); - uint32_t y1 = (uint32_t)(y >> 32); - uint32_t x0y0_hi = libdivide_mullhi_u32(x0, y0); - uint64_t x0y1 = x0 * (uint64_t)y1; - uint64_t x1y0 = x1 * (uint64_t)y0; - uint64_t x1y1 = x1 * (uint64_t)y1; - uint64_t temp = x1y0 + x0y0_hi; - uint64_t temp_lo = temp & mask; - uint64_t temp_hi = temp >> 32; - - return x1y1 + temp_hi + ((temp_lo + x0y1) >> 32); -#endif -} - -static inline int64_t libdivide_mullhi_s64(int64_t x, int64_t y) { -#if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64) - return __mulh(x, y); -#elif defined(HAS_INT128_T) - __int128_t xl = x, yl = y; - __int128_t rl = xl * yl; - return (int64_t)(rl >> 64); -#else - // full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) - uint32_t mask = 0xFFFFFFFF; - uint32_t x0 = (uint32_t)(x & mask); - uint32_t y0 = (uint32_t)(y & mask); - int32_t x1 = (int32_t)(x >> 32); - int32_t y1 = (int32_t)(y >> 32); - uint32_t x0y0_hi = libdivide_mullhi_u32(x0, y0); - int64_t t = x1 * (int64_t)y0 + x0y0_hi; - int64_t w1 = x0 * (int64_t)y1 + (t & mask); - - return x1 * (int64_t)y1 + (t >> 32) + (w1 >> 32); -#endif -} - -static inline int32_t libdivide_count_leading_zeros32(uint32_t val) { -#if defined(__GNUC__) || __has_builtin(__builtin_clz) - // Fast way to count leading zeros - return __builtin_clz(val); -#elif defined(LIBDIVIDE_VC) - unsigned long result; - if (_BitScanReverse(&result, val)) { - return 31 - result; - } - return 0; -#else - if (val == 0) return 32; - int32_t result = 8; - uint32_t hi = 0xFFU << 24; - while ((val & hi) == 0) { - hi >>= 8; - result += 8; - } - while (val & hi) { - result -= 1; - hi <<= 1; - } - return result; -#endif -} - -static inline int32_t libdivide_count_leading_zeros64(uint64_t val) { -#if defined(__GNUC__) || __has_builtin(__builtin_clzll) - // Fast way to count leading zeros - return __builtin_clzll(val); -#elif defined(LIBDIVIDE_VC) && defined(_WIN64) - unsigned long result; - if (_BitScanReverse64(&result, val)) { - return 63 - result; - } - return 0; -#else - uint32_t hi = val >> 32; - uint32_t lo = val & 0xFFFFFFFF; - if (hi != 0) return libdivide_count_leading_zeros32(hi); - return 32 + libdivide_count_leading_zeros32(lo); -#endif -} - -// libdivide_64_div_32_to_32: divides a 64-bit uint {u1, u0} by a 32-bit -// uint {v}. The result must fit in 32 bits. -// Returns the quotient directly and the remainder in *r -static inline uint32_t libdivide_64_div_32_to_32( - uint32_t u1, uint32_t u0, uint32_t v, uint32_t *r) { -#if (defined(LIBDIVIDE_i386) || defined(LIBDIVIDE_X86_64)) && defined(LIBDIVIDE_GCC_STYLE_ASM) - uint32_t result; - __asm__("divl %[v]" : "=a"(result), "=d"(*r) : [v] "r"(v), "a"(u0), "d"(u1)); - return result; -#else - uint64_t n = ((uint64_t)u1 << 32) | u0; - uint32_t result = (uint32_t)(n / v); - *r = (uint32_t)(n - result * (uint64_t)v); - return result; -#endif -} - -// libdivide_128_div_64_to_64: divides a 128-bit uint {u1, u0} by a 64-bit -// uint {v}. The result must fit in 64 bits. -// Returns the quotient directly and the remainder in *r -static uint64_t libdivide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r) { - // N.B. resist the temptation to use __uint128_t here. - // In LLVM compiler-rt, it performs a 128/128 -> 128 division which is many times slower than - // necessary. In gcc it's better but still slower than the divlu implementation, perhaps because - // it's not inlined. -#if defined(LIBDIVIDE_X86_64) && defined(LIBDIVIDE_GCC_STYLE_ASM) - uint64_t result; - __asm__("divq %[v]" : "=a"(result), "=d"(*r) : [v] "r"(v), "a"(u0), "d"(u1)); - return result; -#else - // Code taken from Hacker's Delight: - // http://www.hackersdelight.org/HDcode/divlu.c. - // License permits inclusion here per: - // http://www.hackersdelight.org/permissions.htm - - const uint64_t b = (1ULL << 32); // Number base (32 bits) - uint64_t un1, un0; // Norm. dividend LSD's - uint64_t vn1, vn0; // Norm. divisor digits - uint64_t q1, q0; // Quotient digits - uint64_t un64, un21, un10; // Dividend digit pairs - uint64_t rhat; // A remainder - int32_t s; // Shift amount for norm - - // If overflow, set rem. to an impossible value, - // and return the largest possible quotient - if (u1 >= v) { - *r = (uint64_t)-1; - return (uint64_t)-1; - } - - // count leading zeros - s = libdivide_count_leading_zeros64(v); - if (s > 0) { - // Normalize divisor - v = v << s; - un64 = (u1 << s) | (u0 >> (64 - s)); - un10 = u0 << s; // Shift dividend left - } else { - // Avoid undefined behavior of (u0 >> 64). - // The behavior is undefined if the right operand is - // negative, or greater than or equal to the length - // in bits of the promoted left operand. - un64 = u1; - un10 = u0; - } - - // Break divisor up into two 32-bit digits - vn1 = v >> 32; - vn0 = v & 0xFFFFFFFF; - - // Break right half of dividend into two digits - un1 = un10 >> 32; - un0 = un10 & 0xFFFFFFFF; - - // Compute the first quotient digit, q1 - q1 = un64 / vn1; - rhat = un64 - q1 * vn1; - - while (q1 >= b || q1 * vn0 > b * rhat + un1) { - q1 = q1 - 1; - rhat = rhat + vn1; - if (rhat >= b) break; - } - - // Multiply and subtract - un21 = un64 * b + un1 - q1 * v; - - // Compute the second quotient digit - q0 = un21 / vn1; - rhat = un21 - q0 * vn1; - - while (q0 >= b || q0 * vn0 > b * rhat + un0) { - q0 = q0 - 1; - rhat = rhat + vn1; - if (rhat >= b) break; - } - - *r = (un21 * b + un0 - q0 * v) >> s; - return q1 * b + q0; -#endif -} - -// Bitshift a u128 in place, left (signed_shift > 0) or right (signed_shift < 0) -static inline void libdivide_u128_shift(uint64_t *u1, uint64_t *u0, int32_t signed_shift) { - if (signed_shift > 0) { - uint32_t shift = signed_shift; - *u1 <<= shift; - *u1 |= *u0 >> (64 - shift); - *u0 <<= shift; - } else if (signed_shift < 0) { - uint32_t shift = -signed_shift; - *u0 >>= shift; - *u0 |= *u1 << (64 - shift); - *u1 >>= shift; - } -} - -// Computes a 128 / 128 -> 64 bit division, with a 128 bit remainder. -static uint64_t libdivide_128_div_128_to_64( - uint64_t u_hi, uint64_t u_lo, uint64_t v_hi, uint64_t v_lo, uint64_t *r_hi, uint64_t *r_lo) { -#if defined(HAS_INT128_T) && defined(HAS_INT128_DIV) - __uint128_t ufull = u_hi; - __uint128_t vfull = v_hi; - ufull = (ufull << 64) | u_lo; - vfull = (vfull << 64) | v_lo; - uint64_t res = (uint64_t)(ufull / vfull); - __uint128_t remainder = ufull - (vfull * res); - *r_lo = (uint64_t)remainder; - *r_hi = (uint64_t)(remainder >> 64); - return res; -#else - // Adapted from "Unsigned Doubleword Division" in Hacker's Delight - // We want to compute u / v - typedef struct { - uint64_t hi; - uint64_t lo; - } u128_t; - u128_t u = {u_hi, u_lo}; - u128_t v = {v_hi, v_lo}; - - if (v.hi == 0) { - // divisor v is a 64 bit value, so we just need one 128/64 division - // Note that we are simpler than Hacker's Delight here, because we know - // the quotient fits in 64 bits whereas Hacker's Delight demands a full - // 128 bit quotient - *r_hi = 0; - return libdivide_128_div_64_to_64(u.hi, u.lo, v.lo, r_lo); - } - // Here v >= 2**64 - // We know that v.hi != 0, so count leading zeros is OK - // We have 0 <= n <= 63 - uint32_t n = libdivide_count_leading_zeros64(v.hi); - - // Normalize the divisor so its MSB is 1 - u128_t v1t = v; - libdivide_u128_shift(&v1t.hi, &v1t.lo, n); - uint64_t v1 = v1t.hi; // i.e. v1 = v1t >> 64 - - // To ensure no overflow - u128_t u1 = u; - libdivide_u128_shift(&u1.hi, &u1.lo, -1); - - // Get quotient from divide unsigned insn. - uint64_t rem_ignored; - uint64_t q1 = libdivide_128_div_64_to_64(u1.hi, u1.lo, v1, &rem_ignored); - - // Undo normalization and division of u by 2. - u128_t q0 = {0, q1}; - libdivide_u128_shift(&q0.hi, &q0.lo, n); - libdivide_u128_shift(&q0.hi, &q0.lo, -63); - - // Make q0 correct or too small by 1 - // Equivalent to `if (q0 != 0) q0 = q0 - 1;` - if (q0.hi != 0 || q0.lo != 0) { - q0.hi -= (q0.lo == 0); // borrow - q0.lo -= 1; - } - - // Now q0 is correct. - // Compute q0 * v as q0v - // = (q0.hi << 64 + q0.lo) * (v.hi << 64 + v.lo) - // = (q0.hi * v.hi << 128) + (q0.hi * v.lo << 64) + - // (q0.lo * v.hi << 64) + q0.lo * v.lo) - // Each term is 128 bit - // High half of full product (upper 128 bits!) are dropped - u128_t q0v = {0, 0}; - q0v.hi = q0.hi * v.lo + q0.lo * v.hi + libdivide_mullhi_u64(q0.lo, v.lo); - q0v.lo = q0.lo * v.lo; - - // Compute u - q0v as u_q0v - // This is the remainder - u128_t u_q0v = u; - u_q0v.hi -= q0v.hi + (u.lo < q0v.lo); // second term is borrow - u_q0v.lo -= q0v.lo; - - // Check if u_q0v >= v - // This checks if our remainder is larger than the divisor - if ((u_q0v.hi > v.hi) || (u_q0v.hi == v.hi && u_q0v.lo >= v.lo)) { - // Increment q0 - q0.lo += 1; - q0.hi += (q0.lo == 0); // carry - - // Subtract v from remainder - u_q0v.hi -= v.hi + (u_q0v.lo < v.lo); - u_q0v.lo -= v.lo; - } - - *r_hi = u_q0v.hi; - *r_lo = u_q0v.lo; - - LIBDIVIDE_ASSERT(q0.hi == 0); - return q0.lo; -#endif -} - -////////// UINT32 - -static inline struct libdivide_u32_t libdivide_internal_u32_gen(uint32_t d, int branchfree) { - if (d == 0) { - LIBDIVIDE_ERROR("divider must be != 0"); - } - - struct libdivide_u32_t result; - uint32_t floor_log_2_d = 31 - libdivide_count_leading_zeros32(d); - - // Power of 2 - if ((d & (d - 1)) == 0) { - // We need to subtract 1 from the shift value in case of an unsigned - // branchfree divider because there is a hardcoded right shift by 1 - // in its division algorithm. Because of this we also need to add back - // 1 in its recovery algorithm. - result.magic = 0; - result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); - } else { - uint8_t more; - uint32_t rem, proposed_m; - proposed_m = libdivide_64_div_32_to_32(1U << floor_log_2_d, 0, d, &rem); - - LIBDIVIDE_ASSERT(rem > 0 && rem < d); - const uint32_t e = d - rem; - - // This power works if e < 2**floor_log_2_d. - if (!branchfree && (e < (1U << floor_log_2_d))) { - // This power works - more = floor_log_2_d; - } else { - // We have to use the general 33-bit algorithm. We need to compute - // (2**power) / d. However, we already have (2**(power-1))/d and - // its remainder. By doubling both, and then correcting the - // remainder, we can compute the larger division. - // don't care about overflow here - in fact, we expect it - proposed_m += proposed_m; - const uint32_t twice_rem = rem + rem; - if (twice_rem >= d || twice_rem < rem) proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - result.magic = 1 + proposed_m; - result.more = more; - // result.more's shift should in general be ceil_log_2_d. But if we - // used the smaller power, we subtract one from the shift because we're - // using the smaller power. If we're using the larger power, we - // subtract one from the shift because it's taken care of by the add - // indicator. So floor_log_2_d happens to be correct in both cases. - } - return result; -} - -struct libdivide_u32_t libdivide_u32_gen(uint32_t d) { - return libdivide_internal_u32_gen(d, 0); -} - -struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) { - if (d == 1) { - LIBDIVIDE_ERROR("branchfree divider must be != 1"); - } - struct libdivide_u32_t tmp = libdivide_internal_u32_gen(d, 1); - struct libdivide_u32_branchfree_t ret = { - tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_32_SHIFT_MASK)}; - return ret; -} - -uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return numer >> more; - } else { - uint32_t q = libdivide_mullhi_u32(denom->magic, numer); - if (more & LIBDIVIDE_ADD_MARKER) { - uint32_t t = ((numer - q) >> 1) + q; - return t >> (more & LIBDIVIDE_32_SHIFT_MASK); - } else { - // All upper bits are 0, - // don't need to mask them off. - return q >> more; - } - } -} - -uint32_t libdivide_u32_branchfree_do( - uint32_t numer, const struct libdivide_u32_branchfree_t *denom) { - uint32_t q = libdivide_mullhi_u32(denom->magic, numer); - uint32_t t = ((numer - q) >> 1) + q; - return t >> denom->more; -} - -uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - - if (!denom->magic) { - return 1U << shift; - } else if (!(more & LIBDIVIDE_ADD_MARKER)) { - // We compute q = n/d = n*m / 2^(32 + shift) - // Therefore we have d = 2^(32 + shift) / m - // We need to ceil it. - // We know d is not a power of 2, so m is not a power of 2, - // so we can just add 1 to the floor - uint32_t hi_dividend = 1U << shift; - uint32_t rem_ignored; - return 1 + libdivide_64_div_32_to_32(hi_dividend, 0, denom->magic, &rem_ignored); - } else { - // Here we wish to compute d = 2^(32+shift+1)/(m+2^32). - // Notice (m + 2^32) is a 33 bit number. Use 64 bit division for now - // Also note that shift may be as high as 31, so shift + 1 will - // overflow. So we have to compute it as 2^(32+shift)/(m+2^32), and - // then double the quotient and remainder. - uint64_t half_n = 1ULL << (32 + shift); - uint64_t d = (1ULL << 32) | denom->magic; - // Note that the quotient is guaranteed <= 32 bits, but the remainder - // may need 33! - uint32_t half_q = (uint32_t)(half_n / d); - uint64_t rem = half_n % d; - // We computed 2^(32+shift)/(m+2^32) - // Need to double it, and then add 1 to the quotient if doubling th - // remainder would increase the quotient. - // Note that rem<<1 cannot overflow, since rem < d and d is 33 bits - uint32_t full_q = half_q + half_q + ((rem << 1) >= d); - - // We rounded down in gen (hence +1) - return full_q + 1; - } -} - -uint32_t libdivide_u32_branchfree_recover(const struct libdivide_u32_branchfree_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - - if (!denom->magic) { - return 1U << (shift + 1); - } else { - // Here we wish to compute d = 2^(32+shift+1)/(m+2^32). - // Notice (m + 2^32) is a 33 bit number. Use 64 bit division for now - // Also note that shift may be as high as 31, so shift + 1 will - // overflow. So we have to compute it as 2^(32+shift)/(m+2^32), and - // then double the quotient and remainder. - uint64_t half_n = 1ULL << (32 + shift); - uint64_t d = (1ULL << 32) | denom->magic; - // Note that the quotient is guaranteed <= 32 bits, but the remainder - // may need 33! - uint32_t half_q = (uint32_t)(half_n / d); - uint64_t rem = half_n % d; - // We computed 2^(32+shift)/(m+2^32) - // Need to double it, and then add 1 to the quotient if doubling th - // remainder would increase the quotient. - // Note that rem<<1 cannot overflow, since rem < d and d is 33 bits - uint32_t full_q = half_q + half_q + ((rem << 1) >= d); - - // We rounded down in gen (hence +1) - return full_q + 1; - } -} - -/////////// UINT64 - -static inline struct libdivide_u64_t libdivide_internal_u64_gen(uint64_t d, int branchfree) { - if (d == 0) { - LIBDIVIDE_ERROR("divider must be != 0"); - } - - struct libdivide_u64_t result; - uint32_t floor_log_2_d = 63 - libdivide_count_leading_zeros64(d); - - // Power of 2 - if ((d & (d - 1)) == 0) { - // We need to subtract 1 from the shift value in case of an unsigned - // branchfree divider because there is a hardcoded right shift by 1 - // in its division algorithm. Because of this we also need to add back - // 1 in its recovery algorithm. - result.magic = 0; - result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); - } else { - uint64_t proposed_m, rem; - uint8_t more; - // (1 << (64 + floor_log_2_d)) / d - proposed_m = libdivide_128_div_64_to_64(1ULL << floor_log_2_d, 0, d, &rem); - - LIBDIVIDE_ASSERT(rem > 0 && rem < d); - const uint64_t e = d - rem; - - // This power works if e < 2**floor_log_2_d. - if (!branchfree && e < (1ULL << floor_log_2_d)) { - // This power works - more = floor_log_2_d; - } else { - // We have to use the general 65-bit algorithm. We need to compute - // (2**power) / d. However, we already have (2**(power-1))/d and - // its remainder. By doubling both, and then correcting the - // remainder, we can compute the larger division. - // don't care about overflow here - in fact, we expect it - proposed_m += proposed_m; - const uint64_t twice_rem = rem + rem; - if (twice_rem >= d || twice_rem < rem) proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - result.magic = 1 + proposed_m; - result.more = more; - // result.more's shift should in general be ceil_log_2_d. But if we - // used the smaller power, we subtract one from the shift because we're - // using the smaller power. If we're using the larger power, we - // subtract one from the shift because it's taken care of by the add - // indicator. So floor_log_2_d happens to be correct in both cases, - // which is why we do it outside of the if statement. - } - return result; -} - -struct libdivide_u64_t libdivide_u64_gen(uint64_t d) { - return libdivide_internal_u64_gen(d, 0); -} - -struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d) { - if (d == 1) { - LIBDIVIDE_ERROR("branchfree divider must be != 1"); - } - struct libdivide_u64_t tmp = libdivide_internal_u64_gen(d, 1); - struct libdivide_u64_branchfree_t ret = { - tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_64_SHIFT_MASK)}; - return ret; -} - -uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return numer >> more; - } else { - uint64_t q = libdivide_mullhi_u64(denom->magic, numer); - if (more & LIBDIVIDE_ADD_MARKER) { - uint64_t t = ((numer - q) >> 1) + q; - return t >> (more & LIBDIVIDE_64_SHIFT_MASK); - } else { - // All upper bits are 0, - // don't need to mask them off. - return q >> more; - } - } -} - -uint64_t libdivide_u64_branchfree_do( - uint64_t numer, const struct libdivide_u64_branchfree_t *denom) { - uint64_t q = libdivide_mullhi_u64(denom->magic, numer); - uint64_t t = ((numer - q) >> 1) + q; - return t >> denom->more; -} - -uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - - if (!denom->magic) { - return 1ULL << shift; - } else if (!(more & LIBDIVIDE_ADD_MARKER)) { - // We compute q = n/d = n*m / 2^(64 + shift) - // Therefore we have d = 2^(64 + shift) / m - // We need to ceil it. - // We know d is not a power of 2, so m is not a power of 2, - // so we can just add 1 to the floor - uint64_t hi_dividend = 1ULL << shift; - uint64_t rem_ignored; - return 1 + libdivide_128_div_64_to_64(hi_dividend, 0, denom->magic, &rem_ignored); - } else { - // Here we wish to compute d = 2^(64+shift+1)/(m+2^64). - // Notice (m + 2^64) is a 65 bit number. This gets hairy. See - // libdivide_u32_recover for more on what we do here. - // TODO: do something better than 128 bit math - - // Full n is a (potentially) 129 bit value - // half_n is a 128 bit value - // Compute the hi half of half_n. Low half is 0. - uint64_t half_n_hi = 1ULL << shift, half_n_lo = 0; - // d is a 65 bit value. The high bit is always set to 1. - const uint64_t d_hi = 1, d_lo = denom->magic; - // Note that the quotient is guaranteed <= 64 bits, - // but the remainder may need 65! - uint64_t r_hi, r_lo; - uint64_t half_q = - libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo); - // We computed 2^(64+shift)/(m+2^64) - // Double the remainder ('dr') and check if that is larger than d - // Note that d is a 65 bit value, so r1 is small and so r1 + r1 - // cannot overflow - uint64_t dr_lo = r_lo + r_lo; - uint64_t dr_hi = r_hi + r_hi + (dr_lo < r_lo); // last term is carry - int dr_exceeds_d = (dr_hi > d_hi) || (dr_hi == d_hi && dr_lo >= d_lo); - uint64_t full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0); - return full_q + 1; - } -} - -uint64_t libdivide_u64_branchfree_recover(const struct libdivide_u64_branchfree_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - - if (!denom->magic) { - return 1ULL << (shift + 1); - } else { - // Here we wish to compute d = 2^(64+shift+1)/(m+2^64). - // Notice (m + 2^64) is a 65 bit number. This gets hairy. See - // libdivide_u32_recover for more on what we do here. - // TODO: do something better than 128 bit math - - // Full n is a (potentially) 129 bit value - // half_n is a 128 bit value - // Compute the hi half of half_n. Low half is 0. - uint64_t half_n_hi = 1ULL << shift, half_n_lo = 0; - // d is a 65 bit value. The high bit is always set to 1. - const uint64_t d_hi = 1, d_lo = denom->magic; - // Note that the quotient is guaranteed <= 64 bits, - // but the remainder may need 65! - uint64_t r_hi, r_lo; - uint64_t half_q = - libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo); - // We computed 2^(64+shift)/(m+2^64) - // Double the remainder ('dr') and check if that is larger than d - // Note that d is a 65 bit value, so r1 is small and so r1 + r1 - // cannot overflow - uint64_t dr_lo = r_lo + r_lo; - uint64_t dr_hi = r_hi + r_hi + (dr_lo < r_lo); // last term is carry - int dr_exceeds_d = (dr_hi > d_hi) || (dr_hi == d_hi && dr_lo >= d_lo); - uint64_t full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0); - return full_q + 1; - } -} - -/////////// SINT32 - -static inline struct libdivide_s32_t libdivide_internal_s32_gen(int32_t d, int branchfree) { - if (d == 0) { - LIBDIVIDE_ERROR("divider must be != 0"); - } - - struct libdivide_s32_t result; - - // If d is a power of 2, or negative a power of 2, we have to use a shift. - // This is especially important because the magic algorithm fails for -1. - // To check if d is a power of 2 or its inverse, it suffices to check - // whether its absolute value has exactly one bit set. This works even for - // INT_MIN, because abs(INT_MIN) == INT_MIN, and INT_MIN has one bit set - // and is a power of 2. - uint32_t ud = (uint32_t)d; - uint32_t absD = (d < 0) ? -ud : ud; - uint32_t floor_log_2_d = 31 - libdivide_count_leading_zeros32(absD); - // check if exactly one bit is set, - // don't care if absD is 0 since that's divide by zero - if ((absD & (absD - 1)) == 0) { - // Branchfree and normal paths are exactly the same - result.magic = 0; - result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); - } else { - LIBDIVIDE_ASSERT(floor_log_2_d >= 1); - - uint8_t more; - // the dividend here is 2**(floor_log_2_d + 31), so the low 32 bit word - // is 0 and the high word is floor_log_2_d - 1 - uint32_t rem, proposed_m; - proposed_m = libdivide_64_div_32_to_32(1U << (floor_log_2_d - 1), 0, absD, &rem); - const uint32_t e = absD - rem; - - // We are going to start with a power of floor_log_2_d - 1. - // This works if works if e < 2**floor_log_2_d. - if (!branchfree && e < (1U << floor_log_2_d)) { - // This power works - more = floor_log_2_d - 1; - } else { - // We need to go one higher. This should not make proposed_m - // overflow, but it will make it negative when interpreted as an - // int32_t. - proposed_m += proposed_m; - const uint32_t twice_rem = rem + rem; - if (twice_rem >= absD || twice_rem < rem) proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - - proposed_m += 1; - int32_t magic = (int32_t)proposed_m; - - // Mark if we are negative. Note we only negate the magic number in the - // branchfull case. - if (d < 0) { - more |= LIBDIVIDE_NEGATIVE_DIVISOR; - if (!branchfree) { - magic = -magic; - } - } - - result.more = more; - result.magic = magic; - } - return result; -} - -struct libdivide_s32_t libdivide_s32_gen(int32_t d) { - return libdivide_internal_s32_gen(d, 0); -} - -struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d) { - struct libdivide_s32_t tmp = libdivide_internal_s32_gen(d, 1); - struct libdivide_s32_branchfree_t result = {tmp.magic, tmp.more}; - return result; -} - -int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - - if (!denom->magic) { - uint32_t sign = (int8_t)more >> 7; - uint32_t mask = (1U << shift) - 1; - uint32_t uq = numer + ((numer >> 31) & mask); - int32_t q = (int32_t)uq; - q >>= shift; - q = (q ^ sign) - sign; - return q; - } else { - uint32_t uq = (uint32_t)libdivide_mullhi_s32(denom->magic, numer); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift and then sign extend - int32_t sign = (int8_t)more >> 7; - // q += (more < 0 ? -numer : numer) - // cast required to avoid UB - uq += ((uint32_t)numer ^ sign) - sign; - } - int32_t q = (int32_t)uq; - q >>= shift; - q += (q < 0); - return q; - } -} - -int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift and then sign extend - int32_t sign = (int8_t)more >> 7; - int32_t magic = denom->magic; - int32_t q = libdivide_mullhi_s32(magic, numer); - q += numer; - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is a power of - // 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - uint32_t q_sign = (uint32_t)(q >> 31); - q += q_sign & ((1U << shift) - is_power_of_2); - - // Now arithmetic right shift - q >>= shift; - // Negate if needed - q = (q ^ sign) - sign; - - return q; -} - -int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - if (!denom->magic) { - uint32_t absD = 1U << shift; - if (more & LIBDIVIDE_NEGATIVE_DIVISOR) { - absD = -absD; - } - return (int32_t)absD; - } else { - // Unsigned math is much easier - // We negate the magic number only in the branchfull case, and we don't - // know which case we're in. However we have enough information to - // determine the correct sign of the magic number. The divisor was - // negative if LIBDIVIDE_NEGATIVE_DIVISOR is set. If ADD_MARKER is set, - // the magic number's sign is opposite that of the divisor. - // We want to compute the positive magic number. - int negative_divisor = (more & LIBDIVIDE_NEGATIVE_DIVISOR); - int magic_was_negated = (more & LIBDIVIDE_ADD_MARKER) ? denom->magic > 0 : denom->magic < 0; - - // Handle the power of 2 case (including branchfree) - if (denom->magic == 0) { - int32_t result = 1U << shift; - return negative_divisor ? -result : result; - } - - uint32_t d = (uint32_t)(magic_was_negated ? -denom->magic : denom->magic); - uint64_t n = 1ULL << (32 + shift); // this shift cannot exceed 30 - uint32_t q = (uint32_t)(n / d); - int32_t result = (int32_t)q; - result += 1; - return negative_divisor ? -result : result; - } -} - -int32_t libdivide_s32_branchfree_recover(const struct libdivide_s32_branchfree_t *denom) { - return libdivide_s32_recover((const struct libdivide_s32_t *)denom); -} - -///////////// SINT64 - -static inline struct libdivide_s64_t libdivide_internal_s64_gen(int64_t d, int branchfree) { - if (d == 0) { - LIBDIVIDE_ERROR("divider must be != 0"); - } - - struct libdivide_s64_t result; - - // If d is a power of 2, or negative a power of 2, we have to use a shift. - // This is especially important because the magic algorithm fails for -1. - // To check if d is a power of 2 or its inverse, it suffices to check - // whether its absolute value has exactly one bit set. This works even for - // INT_MIN, because abs(INT_MIN) == INT_MIN, and INT_MIN has one bit set - // and is a power of 2. - uint64_t ud = (uint64_t)d; - uint64_t absD = (d < 0) ? -ud : ud; - uint32_t floor_log_2_d = 63 - libdivide_count_leading_zeros64(absD); - // check if exactly one bit is set, - // don't care if absD is 0 since that's divide by zero - if ((absD & (absD - 1)) == 0) { - // Branchfree and non-branchfree cases are the same - result.magic = 0; - result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); - } else { - // the dividend here is 2**(floor_log_2_d + 63), so the low 64 bit word - // is 0 and the high word is floor_log_2_d - 1 - uint8_t more; - uint64_t rem, proposed_m; - proposed_m = libdivide_128_div_64_to_64(1ULL << (floor_log_2_d - 1), 0, absD, &rem); - const uint64_t e = absD - rem; - - // We are going to start with a power of floor_log_2_d - 1. - // This works if works if e < 2**floor_log_2_d. - if (!branchfree && e < (1ULL << floor_log_2_d)) { - // This power works - more = floor_log_2_d - 1; - } else { - // We need to go one higher. This should not make proposed_m - // overflow, but it will make it negative when interpreted as an - // int32_t. - proposed_m += proposed_m; - const uint64_t twice_rem = rem + rem; - if (twice_rem >= absD || twice_rem < rem) proposed_m += 1; - // note that we only set the LIBDIVIDE_NEGATIVE_DIVISOR bit if we - // also set ADD_MARKER this is an annoying optimization that - // enables algorithm #4 to avoid the mask. However we always set it - // in the branchfree case - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - proposed_m += 1; - int64_t magic = (int64_t)proposed_m; - - // Mark if we are negative - if (d < 0) { - more |= LIBDIVIDE_NEGATIVE_DIVISOR; - if (!branchfree) { - magic = -magic; - } - } - - result.more = more; - result.magic = magic; - } - return result; -} - -struct libdivide_s64_t libdivide_s64_gen(int64_t d) { - return libdivide_internal_s64_gen(d, 0); -} - -struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d) { - struct libdivide_s64_t tmp = libdivide_internal_s64_gen(d, 1); - struct libdivide_s64_branchfree_t ret = {tmp.magic, tmp.more}; - return ret; -} - -int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - - if (!denom->magic) { // shift path - uint64_t mask = (1ULL << shift) - 1; - uint64_t uq = numer + ((numer >> 63) & mask); - int64_t q = (int64_t)uq; - q >>= shift; - // must be arithmetic shift and then sign-extend - int64_t sign = (int8_t)more >> 7; - q = (q ^ sign) - sign; - return q; - } else { - uint64_t uq = (uint64_t)libdivide_mullhi_s64(denom->magic, numer); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift and then sign extend - int64_t sign = (int8_t)more >> 7; - // q += (more < 0 ? -numer : numer) - // cast required to avoid UB - uq += ((uint64_t)numer ^ sign) - sign; - } - int64_t q = (int64_t)uq; - q >>= shift; - q += (q < 0); - return q; - } -} - -int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift and then sign extend - int64_t sign = (int8_t)more >> 7; - int64_t magic = denom->magic; - int64_t q = libdivide_mullhi_s64(magic, numer); - q += numer; - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is a power of - // 2, or (2**shift) if it is not a power of 2. - uint64_t is_power_of_2 = (magic == 0); - uint64_t q_sign = (uint64_t)(q >> 63); - q += q_sign & ((1ULL << shift) - is_power_of_2); - - // Arithmetic right shift - q >>= shift; - // Negate if needed - q = (q ^ sign) - sign; - - return q; -} - -int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - if (denom->magic == 0) { // shift path - uint64_t absD = 1ULL << shift; - if (more & LIBDIVIDE_NEGATIVE_DIVISOR) { - absD = -absD; - } - return (int64_t)absD; - } else { - // Unsigned math is much easier - int negative_divisor = (more & LIBDIVIDE_NEGATIVE_DIVISOR); - int magic_was_negated = (more & LIBDIVIDE_ADD_MARKER) ? denom->magic > 0 : denom->magic < 0; - - uint64_t d = (uint64_t)(magic_was_negated ? -denom->magic : denom->magic); - uint64_t n_hi = 1ULL << shift, n_lo = 0; - uint64_t rem_ignored; - uint64_t q = libdivide_128_div_64_to_64(n_hi, n_lo, d, &rem_ignored); - int64_t result = (int64_t)(q + 1); - if (negative_divisor) { - result = -result; - } - return result; - } -} - -int64_t libdivide_s64_branchfree_recover(const struct libdivide_s64_branchfree_t *denom) { - return libdivide_s64_recover((const struct libdivide_s64_t *)denom); -} - -#if defined(LIBDIVIDE_NEON) - -static inline uint32x4_t libdivide_u32_do_vec128( - uint32x4_t numers, const struct libdivide_u32_t *denom); -static inline int32x4_t libdivide_s32_do_vec128( - int32x4_t numers, const struct libdivide_s32_t *denom); -static inline uint64x2_t libdivide_u64_do_vec128( - uint64x2_t numers, const struct libdivide_u64_t *denom); -static inline int64x2_t libdivide_s64_do_vec128( - int64x2_t numers, const struct libdivide_s64_t *denom); - -static inline uint32x4_t libdivide_u32_branchfree_do_vec128( - uint32x4_t numers, const struct libdivide_u32_branchfree_t *denom); -static inline int32x4_t libdivide_s32_branchfree_do_vec128( - int32x4_t numers, const struct libdivide_s32_branchfree_t *denom); -static inline uint64x2_t libdivide_u64_branchfree_do_vec128( - uint64x2_t numers, const struct libdivide_u64_branchfree_t *denom); -static inline int64x2_t libdivide_s64_branchfree_do_vec128( - int64x2_t numers, const struct libdivide_s64_branchfree_t *denom); - -//////// Internal Utility Functions - -// Logical right shift by runtime value. -// NEON implements right shift as left shits by negative values. -static inline uint32x4_t libdivide_u32_neon_srl(uint32x4_t v, uint8_t amt) { - int32_t wamt = static_cast(amt); - return vshlq_u32(v, vdupq_n_s32(-wamt)); -} - -static inline uint64x2_t libdivide_u64_neon_srl(uint64x2_t v, uint8_t amt) { - int64_t wamt = static_cast(amt); - return vshlq_u64(v, vdupq_n_s64(-wamt)); -} - -// Arithmetic right shift by runtime value. -static inline int32x4_t libdivide_s32_neon_sra(int32x4_t v, uint8_t amt) { - int32_t wamt = static_cast(amt); - return vshlq_s32(v, vdupq_n_s32(-wamt)); -} - -static inline int64x2_t libdivide_s64_neon_sra(int64x2_t v, uint8_t amt) { - int64_t wamt = static_cast(amt); - return vshlq_s64(v, vdupq_n_s64(-wamt)); -} - -static inline int64x2_t libdivide_s64_signbits(int64x2_t v) { return vshrq_n_s64(v, 63); } - -static inline uint32x4_t libdivide_mullhi_u32_vec128(uint32x4_t a, uint32_t b) { - // Desire is [x0, x1, x2, x3] - uint32x4_t w1 = vreinterpretq_u32_u64(vmull_n_u32(vget_low_u32(a), b)); // [_, x0, _, x1] - uint32x4_t w2 = vreinterpretq_u32_u64(vmull_high_n_u32(a, b)); //[_, x2, _, x3] - return vuzp2q_u32(w1, w2); // [x0, x1, x2, x3] -} - -static inline int32x4_t libdivide_mullhi_s32_vec128(int32x4_t a, int32_t b) { - int32x4_t w1 = vreinterpretq_s32_s64(vmull_n_s32(vget_low_s32(a), b)); // [_, x0, _, x1] - int32x4_t w2 = vreinterpretq_s32_s64(vmull_high_n_s32(a, b)); //[_, x2, _, x3] - return vuzp2q_s32(w1, w2); // [x0, x1, x2, x3] -} - -static inline uint64x2_t libdivide_mullhi_u64_vec128(uint64x2_t x, uint64_t sy) { - // full 128 bits product is: - // x0*y0 + (x0*y1 << 32) + (x1*y0 << 32) + (x1*y1 << 64) - // Note x0,y0,x1,y1 are all conceptually uint32, products are 32x32->64. - - // Get low and high words. x0 contains low 32 bits, x1 is high 32 bits. - uint64x2_t y = vdupq_n_u64(sy); - uint32x2_t x0 = vmovn_u64(x); - uint32x2_t y0 = vmovn_u64(y); - uint32x2_t x1 = vshrn_n_u64(x, 32); - uint32x2_t y1 = vshrn_n_u64(y, 32); - - // Compute x0*y0. - uint64x2_t x0y0 = vmull_u32(x0, y0); - uint64x2_t x0y0_hi = vshrq_n_u64(x0y0, 32); - - // Compute other intermediate products. - uint64x2_t temp = vmlal_u32(x0y0_hi, x1, y0); // temp = x0y0_hi + x1*y0; - // We want to split temp into its low 32 bits and high 32 bits, both - // in the low half of 64 bit registers. - // Use shifts to avoid needing a reg for the mask. - uint64x2_t temp_lo = vshrq_n_u64(vshlq_n_u64(temp, 32), 32); // temp_lo = temp & 0xFFFFFFFF; - uint64x2_t temp_hi = vshrq_n_u64(temp, 32); // temp_hi = temp >> 32; - - temp_lo = vmlal_u32(temp_lo, x0, y1); // temp_lo += x0*y0 - temp_lo = vshrq_n_u64(temp_lo, 32); // temp_lo >>= 32 - temp_hi = vmlal_u32(temp_hi, x1, y1); // temp_hi += x1*y1 - uint64x2_t result = vaddq_u64(temp_hi, temp_lo); - return result; -} - -static inline int64x2_t libdivide_mullhi_s64_vec128(int64x2_t x, int64_t sy) { - int64x2_t p = vreinterpretq_s64_u64( - libdivide_mullhi_u64_vec128(vreinterpretq_u64_s64(x), static_cast(sy))); - int64x2_t y = vdupq_n_s64(sy); - int64x2_t t1 = vandq_s64(libdivide_s64_signbits(x), y); - int64x2_t t2 = vandq_s64(libdivide_s64_signbits(y), x); - p = vsubq_s64(p, t1); - p = vsubq_s64(p, t2); - return p; -} - -////////// UINT32 - -uint32x4_t libdivide_u32_do_vec128(uint32x4_t numers, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return libdivide_u32_neon_srl(numers, more); - } else { - uint32x4_t q = libdivide_mullhi_u32_vec128(numers, denom->magic); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - // Note we can use halving-subtract to avoid the shift. - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32x4_t t = vaddq_u32(vhsubq_u32(numers, q), q); - return libdivide_u32_neon_srl(t, shift); - } else { - return libdivide_u32_neon_srl(q, more); - } - } -} - -uint32x4_t libdivide_u32_branchfree_do_vec128( - uint32x4_t numers, const struct libdivide_u32_branchfree_t *denom) { - uint32x4_t q = libdivide_mullhi_u32_vec128(numers, denom->magic); - uint32x4_t t = vaddq_u32(vhsubq_u32(numers, q), q); - return libdivide_u32_neon_srl(t, denom->more); -} - -////////// UINT64 - -uint64x2_t libdivide_u64_do_vec128(uint64x2_t numers, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return libdivide_u64_neon_srl(numers, more); - } else { - uint64x2_t q = libdivide_mullhi_u64_vec128(numers, denom->magic); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - // No 64-bit halving subtracts in NEON :( - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64x2_t t = vaddq_u64(vshrq_n_u64(vsubq_u64(numers, q), 1), q); - return libdivide_u64_neon_srl(t, shift); - } else { - return libdivide_u64_neon_srl(q, more); - } - } -} - -uint64x2_t libdivide_u64_branchfree_do_vec128( - uint64x2_t numers, const struct libdivide_u64_branchfree_t *denom) { - uint64x2_t q = libdivide_mullhi_u64_vec128(numers, denom->magic); - uint64x2_t t = vaddq_u64(vshrq_n_u64(vsubq_u64(numers, q), 1), q); - return libdivide_u64_neon_srl(t, denom->more); -} - -////////// SINT32 - -int32x4_t libdivide_s32_do_vec128(int32x4_t numers, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32_t mask = (1U << shift) - 1; - int32x4_t roundToZeroTweak = vdupq_n_s32((int)mask); - // q = numer + ((numer >> 31) & roundToZeroTweak); - int32x4_t q = vaddq_s32(numers, vandq_s32(vshrq_n_s32(numers, 31), roundToZeroTweak)); - q = libdivide_s32_neon_sra(q, shift); - int32x4_t sign = vdupq_n_s32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = vsubq_s32(veorq_s32(q, sign), sign); - return q; - } else { - int32x4_t q = libdivide_mullhi_s32_vec128(numers, denom->magic); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - int32x4_t sign = vdupq_n_s32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = vaddq_s32(q, vsubq_s32(veorq_s32(numers, sign), sign)); - } - // q >>= shift - q = libdivide_s32_neon_sra(q, more & LIBDIVIDE_32_SHIFT_MASK); - q = vaddq_s32( - q, vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(q), 31))); // q += (q < 0) - return q; - } -} - -int32x4_t libdivide_s32_branchfree_do_vec128( - int32x4_t numers, const struct libdivide_s32_branchfree_t *denom) { - int32_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift - int32x4_t sign = vdupq_n_s32((int8_t)more >> 7); - int32x4_t q = libdivide_mullhi_s32_vec128(numers, magic); - q = vaddq_s32(q, numers); // q += numers - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - int32x4_t q_sign = vshrq_n_s32(q, 31); // q_sign = q >> 31 - int32x4_t mask = vdupq_n_s32((1U << shift) - is_power_of_2); - q = vaddq_s32(q, vandq_s32(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s32_neon_sra(q, shift); // q >>= shift - q = vsubq_s32(veorq_s32(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -////////// SINT64 - -int64x2_t libdivide_s64_do_vec128(int64x2_t numers, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { // shift path - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64_t mask = (1ULL << shift) - 1; - int64x2_t roundToZeroTweak = vdupq_n_s64(mask); // TODO: no need to sign extend - // q = numer + ((numer >> 63) & roundToZeroTweak); - int64x2_t q = - vaddq_s64(numers, vandq_s64(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_neon_sra(q, shift); - // q = (q ^ sign) - sign; - int64x2_t sign = vreinterpretq_s64_s8(vdupq_n_s8((int8_t)more >> 7)); - q = vsubq_s64(veorq_s64(q, sign), sign); - return q; - } else { - int64x2_t q = libdivide_mullhi_s64_vec128(numers, magic); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - int64x2_t sign = vdupq_n_s64((int8_t)more >> 7); // TODO: no need to widen - // q += ((numer ^ sign) - sign); - q = vaddq_s64(q, vsubq_s64(veorq_s64(numers, sign), sign)); - } - // q >>= denom->mult_path.shift - q = libdivide_s64_neon_sra(q, more & LIBDIVIDE_64_SHIFT_MASK); - q = vaddq_s64( - q, vreinterpretq_s64_u64(vshrq_n_u64(vreinterpretq_u64_s64(q), 63))); // q += (q < 0) - return q; - } -} - -int64x2_t libdivide_s64_branchfree_do_vec128( - int64x2_t numers, const struct libdivide_s64_branchfree_t *denom) { - int64_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift - int64x2_t sign = vdupq_n_s64((int8_t)more >> 7); // TODO: avoid sign extend - - // libdivide_mullhi_s64(numers, magic); - int64x2_t q = libdivide_mullhi_s64_vec128(numers, magic); - q = vaddq_s64(q, numers); // q += numers - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2. - uint32_t is_power_of_2 = (magic == 0); - int64x2_t q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 - int64x2_t mask = vdupq_n_s64((1ULL << shift) - is_power_of_2); - q = vaddq_s64(q, vandq_s64(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s64_neon_sra(q, shift); // q >>= shift - q = vsubq_s64(veorq_s64(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -#endif - -#if defined(LIBDIVIDE_AVX512) - -static inline __m512i libdivide_u32_do_vec512(__m512i numers, const struct libdivide_u32_t *denom); -static inline __m512i libdivide_s32_do_vec512(__m512i numers, const struct libdivide_s32_t *denom); -static inline __m512i libdivide_u64_do_vec512(__m512i numers, const struct libdivide_u64_t *denom); -static inline __m512i libdivide_s64_do_vec512(__m512i numers, const struct libdivide_s64_t *denom); - -static inline __m512i libdivide_u32_branchfree_do_vec512( - __m512i numers, const struct libdivide_u32_branchfree_t *denom); -static inline __m512i libdivide_s32_branchfree_do_vec512( - __m512i numers, const struct libdivide_s32_branchfree_t *denom); -static inline __m512i libdivide_u64_branchfree_do_vec512( - __m512i numers, const struct libdivide_u64_branchfree_t *denom); -static inline __m512i libdivide_s64_branchfree_do_vec512( - __m512i numers, const struct libdivide_s64_branchfree_t *denom); - -//////// Internal Utility Functions - -static inline __m512i libdivide_s64_signbits(__m512i v) { - ; - return _mm512_srai_epi64(v, 63); -} - -static inline __m512i libdivide_s64_shift_right_vec512(__m512i v, int amt) { - return _mm512_srai_epi64(v, amt); -} - -// Here, b is assumed to contain one 32-bit value repeated. -static inline __m512i libdivide_mullhi_u32_vec512(__m512i a, __m512i b) { - __m512i hi_product_0Z2Z = _mm512_srli_epi64(_mm512_mul_epu32(a, b), 32); - __m512i a1X3X = _mm512_srli_epi64(a, 32); - __m512i mask = _mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0); - __m512i hi_product_Z1Z3 = _mm512_and_si512(_mm512_mul_epu32(a1X3X, b), mask); - return _mm512_or_si512(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// b is one 32-bit value repeated. -static inline __m512i libdivide_mullhi_s32_vec512(__m512i a, __m512i b) { - __m512i hi_product_0Z2Z = _mm512_srli_epi64(_mm512_mul_epi32(a, b), 32); - __m512i a1X3X = _mm512_srli_epi64(a, 32); - __m512i mask = _mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0); - __m512i hi_product_Z1Z3 = _mm512_and_si512(_mm512_mul_epi32(a1X3X, b), mask); - return _mm512_or_si512(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// Here, y is assumed to contain one 64-bit value repeated. -static inline __m512i libdivide_mullhi_u64_vec512(__m512i x, __m512i y) { - // see m128i variant for comments. - __m512i x0y0 = _mm512_mul_epu32(x, y); - __m512i x0y0_hi = _mm512_srli_epi64(x0y0, 32); - - __m512i x1 = _mm512_shuffle_epi32(x, (_MM_PERM_ENUM)_MM_SHUFFLE(3, 3, 1, 1)); - __m512i y1 = _mm512_shuffle_epi32(y, (_MM_PERM_ENUM)_MM_SHUFFLE(3, 3, 1, 1)); - - __m512i x0y1 = _mm512_mul_epu32(x, y1); - __m512i x1y0 = _mm512_mul_epu32(x1, y); - __m512i x1y1 = _mm512_mul_epu32(x1, y1); - - __m512i mask = _mm512_set1_epi64(0xFFFFFFFF); - __m512i temp = _mm512_add_epi64(x1y0, x0y0_hi); - __m512i temp_lo = _mm512_and_si512(temp, mask); - __m512i temp_hi = _mm512_srli_epi64(temp, 32); - - temp_lo = _mm512_srli_epi64(_mm512_add_epi64(temp_lo, x0y1), 32); - temp_hi = _mm512_add_epi64(x1y1, temp_hi); - return _mm512_add_epi64(temp_lo, temp_hi); -} - -// y is one 64-bit value repeated. -static inline __m512i libdivide_mullhi_s64_vec512(__m512i x, __m512i y) { - __m512i p = libdivide_mullhi_u64_vec512(x, y); - __m512i t1 = _mm512_and_si512(libdivide_s64_signbits(x), y); - __m512i t2 = _mm512_and_si512(libdivide_s64_signbits(y), x); - p = _mm512_sub_epi64(p, t1); - p = _mm512_sub_epi64(p, t2); - return p; -} - -////////// UINT32 - -__m512i libdivide_u32_do_vec512(__m512i numers, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm512_srli_epi32(numers, more); - } else { - __m512i q = libdivide_mullhi_u32_vec512(numers, _mm512_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - __m512i t = _mm512_add_epi32(_mm512_srli_epi32(_mm512_sub_epi32(numers, q), 1), q); - return _mm512_srli_epi32(t, shift); - } else { - return _mm512_srli_epi32(q, more); - } - } -} - -__m512i libdivide_u32_branchfree_do_vec512( - __m512i numers, const struct libdivide_u32_branchfree_t *denom) { - __m512i q = libdivide_mullhi_u32_vec512(numers, _mm512_set1_epi32(denom->magic)); - __m512i t = _mm512_add_epi32(_mm512_srli_epi32(_mm512_sub_epi32(numers, q), 1), q); - return _mm512_srli_epi32(t, denom->more); -} - -////////// UINT64 - -__m512i libdivide_u64_do_vec512(__m512i numers, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm512_srli_epi64(numers, more); - } else { - __m512i q = libdivide_mullhi_u64_vec512(numers, _mm512_set1_epi64(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - __m512i t = _mm512_add_epi64(_mm512_srli_epi64(_mm512_sub_epi64(numers, q), 1), q); - return _mm512_srli_epi64(t, shift); - } else { - return _mm512_srli_epi64(q, more); - } - } -} - -__m512i libdivide_u64_branchfree_do_vec512( - __m512i numers, const struct libdivide_u64_branchfree_t *denom) { - __m512i q = libdivide_mullhi_u64_vec512(numers, _mm512_set1_epi64(denom->magic)); - __m512i t = _mm512_add_epi64(_mm512_srli_epi64(_mm512_sub_epi64(numers, q), 1), q); - return _mm512_srli_epi64(t, denom->more); -} - -////////// SINT32 - -__m512i libdivide_s32_do_vec512(__m512i numers, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32_t mask = (1U << shift) - 1; - __m512i roundToZeroTweak = _mm512_set1_epi32(mask); - // q = numer + ((numer >> 31) & roundToZeroTweak); - __m512i q = _mm512_add_epi32( - numers, _mm512_and_si512(_mm512_srai_epi32(numers, 31), roundToZeroTweak)); - q = _mm512_srai_epi32(q, shift); - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm512_sub_epi32(_mm512_xor_si512(q, sign), sign); - return q; - } else { - __m512i q = libdivide_mullhi_s32_vec512(numers, _mm512_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm512_add_epi32(q, _mm512_sub_epi32(_mm512_xor_si512(numers, sign), sign)); - } - // q >>= shift - q = _mm512_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); - q = _mm512_add_epi32(q, _mm512_srli_epi32(q, 31)); // q += (q < 0) - return q; - } -} - -__m512i libdivide_s32_branchfree_do_vec512( - __m512i numers, const struct libdivide_s32_branchfree_t *denom) { - int32_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - __m512i q = libdivide_mullhi_s32_vec512(numers, _mm512_set1_epi32(magic)); - q = _mm512_add_epi32(q, numers); // q += numers - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - __m512i q_sign = _mm512_srai_epi32(q, 31); // q_sign = q >> 31 - __m512i mask = _mm512_set1_epi32((1U << shift) - is_power_of_2); - q = _mm512_add_epi32(q, _mm512_and_si512(q_sign, mask)); // q = q + (q_sign & mask) - q = _mm512_srai_epi32(q, shift); // q >>= shift - q = _mm512_sub_epi32(_mm512_xor_si512(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -////////// SINT64 - -__m512i libdivide_s64_do_vec512(__m512i numers, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { // shift path - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64_t mask = (1ULL << shift) - 1; - __m512i roundToZeroTweak = _mm512_set1_epi64(mask); - // q = numer + ((numer >> 63) & roundToZeroTweak); - __m512i q = _mm512_add_epi64( - numers, _mm512_and_si512(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_shift_right_vec512(q, shift); - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm512_sub_epi64(_mm512_xor_si512(q, sign), sign); - return q; - } else { - __m512i q = libdivide_mullhi_s64_vec512(numers, _mm512_set1_epi64(magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm512_add_epi64(q, _mm512_sub_epi64(_mm512_xor_si512(numers, sign), sign)); - } - // q >>= denom->mult_path.shift - q = libdivide_s64_shift_right_vec512(q, more & LIBDIVIDE_64_SHIFT_MASK); - q = _mm512_add_epi64(q, _mm512_srli_epi64(q, 63)); // q += (q < 0) - return q; - } -} - -__m512i libdivide_s64_branchfree_do_vec512( - __m512i numers, const struct libdivide_s64_branchfree_t *denom) { - int64_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - - // libdivide_mullhi_s64(numers, magic); - __m512i q = libdivide_mullhi_s64_vec512(numers, _mm512_set1_epi64(magic)); - q = _mm512_add_epi64(q, numers); // q += numers - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2. - uint32_t is_power_of_2 = (magic == 0); - __m512i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 - __m512i mask = _mm512_set1_epi64((1ULL << shift) - is_power_of_2); - q = _mm512_add_epi64(q, _mm512_and_si512(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s64_shift_right_vec512(q, shift); // q >>= shift - q = _mm512_sub_epi64(_mm512_xor_si512(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -#endif - -#if defined(LIBDIVIDE_AVX2) - -static inline __m256i libdivide_u32_do_vec256(__m256i numers, const struct libdivide_u32_t *denom); -static inline __m256i libdivide_s32_do_vec256(__m256i numers, const struct libdivide_s32_t *denom); -static inline __m256i libdivide_u64_do_vec256(__m256i numers, const struct libdivide_u64_t *denom); -static inline __m256i libdivide_s64_do_vec256(__m256i numers, const struct libdivide_s64_t *denom); - -static inline __m256i libdivide_u32_branchfree_do_vec256( - __m256i numers, const struct libdivide_u32_branchfree_t *denom); -static inline __m256i libdivide_s32_branchfree_do_vec256( - __m256i numers, const struct libdivide_s32_branchfree_t *denom); -static inline __m256i libdivide_u64_branchfree_do_vec256( - __m256i numers, const struct libdivide_u64_branchfree_t *denom); -static inline __m256i libdivide_s64_branchfree_do_vec256( - __m256i numers, const struct libdivide_s64_branchfree_t *denom); - -//////// Internal Utility Functions - -// Implementation of _mm256_srai_epi64(v, 63) (from AVX512). -static inline __m256i libdivide_s64_signbits(__m256i v) { - __m256i hiBitsDuped = _mm256_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1)); - __m256i signBits = _mm256_srai_epi32(hiBitsDuped, 31); - return signBits; -} - -// Implementation of _mm256_srai_epi64 (from AVX512). -static inline __m256i libdivide_s64_shift_right_vec256(__m256i v, int amt) { - const int b = 64 - amt; - __m256i m = _mm256_set1_epi64x(1ULL << (b - 1)); - __m256i x = _mm256_srli_epi64(v, amt); - __m256i result = _mm256_sub_epi64(_mm256_xor_si256(x, m), m); - return result; -} - -// Here, b is assumed to contain one 32-bit value repeated. -static inline __m256i libdivide_mullhi_u32_vec256(__m256i a, __m256i b) { - __m256i hi_product_0Z2Z = _mm256_srli_epi64(_mm256_mul_epu32(a, b), 32); - __m256i a1X3X = _mm256_srli_epi64(a, 32); - __m256i mask = _mm256_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0); - __m256i hi_product_Z1Z3 = _mm256_and_si256(_mm256_mul_epu32(a1X3X, b), mask); - return _mm256_or_si256(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// b is one 32-bit value repeated. -static inline __m256i libdivide_mullhi_s32_vec256(__m256i a, __m256i b) { - __m256i hi_product_0Z2Z = _mm256_srli_epi64(_mm256_mul_epi32(a, b), 32); - __m256i a1X3X = _mm256_srli_epi64(a, 32); - __m256i mask = _mm256_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0); - __m256i hi_product_Z1Z3 = _mm256_and_si256(_mm256_mul_epi32(a1X3X, b), mask); - return _mm256_or_si256(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// Here, y is assumed to contain one 64-bit value repeated. -static inline __m256i libdivide_mullhi_u64_vec256(__m256i x, __m256i y) { - // see m128i variant for comments. - __m256i x0y0 = _mm256_mul_epu32(x, y); - __m256i x0y0_hi = _mm256_srli_epi64(x0y0, 32); - - __m256i x1 = _mm256_shuffle_epi32(x, _MM_SHUFFLE(3, 3, 1, 1)); - __m256i y1 = _mm256_shuffle_epi32(y, _MM_SHUFFLE(3, 3, 1, 1)); - - __m256i x0y1 = _mm256_mul_epu32(x, y1); - __m256i x1y0 = _mm256_mul_epu32(x1, y); - __m256i x1y1 = _mm256_mul_epu32(x1, y1); - - __m256i mask = _mm256_set1_epi64x(0xFFFFFFFF); - __m256i temp = _mm256_add_epi64(x1y0, x0y0_hi); - __m256i temp_lo = _mm256_and_si256(temp, mask); - __m256i temp_hi = _mm256_srli_epi64(temp, 32); - - temp_lo = _mm256_srli_epi64(_mm256_add_epi64(temp_lo, x0y1), 32); - temp_hi = _mm256_add_epi64(x1y1, temp_hi); - return _mm256_add_epi64(temp_lo, temp_hi); -} - -// y is one 64-bit value repeated. -static inline __m256i libdivide_mullhi_s64_vec256(__m256i x, __m256i y) { - __m256i p = libdivide_mullhi_u64_vec256(x, y); - __m256i t1 = _mm256_and_si256(libdivide_s64_signbits(x), y); - __m256i t2 = _mm256_and_si256(libdivide_s64_signbits(y), x); - p = _mm256_sub_epi64(p, t1); - p = _mm256_sub_epi64(p, t2); - return p; -} - -////////// UINT32 - -__m256i libdivide_u32_do_vec256(__m256i numers, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm256_srli_epi32(numers, more); - } else { - __m256i q = libdivide_mullhi_u32_vec256(numers, _mm256_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - __m256i t = _mm256_add_epi32(_mm256_srli_epi32(_mm256_sub_epi32(numers, q), 1), q); - return _mm256_srli_epi32(t, shift); - } else { - return _mm256_srli_epi32(q, more); - } - } -} - -__m256i libdivide_u32_branchfree_do_vec256( - __m256i numers, const struct libdivide_u32_branchfree_t *denom) { - __m256i q = libdivide_mullhi_u32_vec256(numers, _mm256_set1_epi32(denom->magic)); - __m256i t = _mm256_add_epi32(_mm256_srli_epi32(_mm256_sub_epi32(numers, q), 1), q); - return _mm256_srli_epi32(t, denom->more); -} - -////////// UINT64 - -__m256i libdivide_u64_do_vec256(__m256i numers, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm256_srli_epi64(numers, more); - } else { - __m256i q = libdivide_mullhi_u64_vec256(numers, _mm256_set1_epi64x(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - __m256i t = _mm256_add_epi64(_mm256_srli_epi64(_mm256_sub_epi64(numers, q), 1), q); - return _mm256_srli_epi64(t, shift); - } else { - return _mm256_srli_epi64(q, more); - } - } -} - -__m256i libdivide_u64_branchfree_do_vec256( - __m256i numers, const struct libdivide_u64_branchfree_t *denom) { - __m256i q = libdivide_mullhi_u64_vec256(numers, _mm256_set1_epi64x(denom->magic)); - __m256i t = _mm256_add_epi64(_mm256_srli_epi64(_mm256_sub_epi64(numers, q), 1), q); - return _mm256_srli_epi64(t, denom->more); -} - -////////// SINT32 - -__m256i libdivide_s32_do_vec256(__m256i numers, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32_t mask = (1U << shift) - 1; - __m256i roundToZeroTweak = _mm256_set1_epi32(mask); - // q = numer + ((numer >> 31) & roundToZeroTweak); - __m256i q = _mm256_add_epi32( - numers, _mm256_and_si256(_mm256_srai_epi32(numers, 31), roundToZeroTweak)); - q = _mm256_srai_epi32(q, shift); - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm256_sub_epi32(_mm256_xor_si256(q, sign), sign); - return q; - } else { - __m256i q = libdivide_mullhi_s32_vec256(numers, _mm256_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm256_add_epi32(q, _mm256_sub_epi32(_mm256_xor_si256(numers, sign), sign)); - } - // q >>= shift - q = _mm256_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); - q = _mm256_add_epi32(q, _mm256_srli_epi32(q, 31)); // q += (q < 0) - return q; - } -} - -__m256i libdivide_s32_branchfree_do_vec256( - __m256i numers, const struct libdivide_s32_branchfree_t *denom) { - int32_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - __m256i q = libdivide_mullhi_s32_vec256(numers, _mm256_set1_epi32(magic)); - q = _mm256_add_epi32(q, numers); // q += numers - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - __m256i q_sign = _mm256_srai_epi32(q, 31); // q_sign = q >> 31 - __m256i mask = _mm256_set1_epi32((1U << shift) - is_power_of_2); - q = _mm256_add_epi32(q, _mm256_and_si256(q_sign, mask)); // q = q + (q_sign & mask) - q = _mm256_srai_epi32(q, shift); // q >>= shift - q = _mm256_sub_epi32(_mm256_xor_si256(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -////////// SINT64 - -__m256i libdivide_s64_do_vec256(__m256i numers, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { // shift path - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64_t mask = (1ULL << shift) - 1; - __m256i roundToZeroTweak = _mm256_set1_epi64x(mask); - // q = numer + ((numer >> 63) & roundToZeroTweak); - __m256i q = _mm256_add_epi64( - numers, _mm256_and_si256(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_shift_right_vec256(q, shift); - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm256_sub_epi64(_mm256_xor_si256(q, sign), sign); - return q; - } else { - __m256i q = libdivide_mullhi_s64_vec256(numers, _mm256_set1_epi64x(magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm256_add_epi64(q, _mm256_sub_epi64(_mm256_xor_si256(numers, sign), sign)); - } - // q >>= denom->mult_path.shift - q = libdivide_s64_shift_right_vec256(q, more & LIBDIVIDE_64_SHIFT_MASK); - q = _mm256_add_epi64(q, _mm256_srli_epi64(q, 63)); // q += (q < 0) - return q; - } -} - -__m256i libdivide_s64_branchfree_do_vec256( - __m256i numers, const struct libdivide_s64_branchfree_t *denom) { - int64_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - - // libdivide_mullhi_s64(numers, magic); - __m256i q = libdivide_mullhi_s64_vec256(numers, _mm256_set1_epi64x(magic)); - q = _mm256_add_epi64(q, numers); // q += numers - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2. - uint32_t is_power_of_2 = (magic == 0); - __m256i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 - __m256i mask = _mm256_set1_epi64x((1ULL << shift) - is_power_of_2); - q = _mm256_add_epi64(q, _mm256_and_si256(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s64_shift_right_vec256(q, shift); // q >>= shift - q = _mm256_sub_epi64(_mm256_xor_si256(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -#endif - -#if defined(LIBDIVIDE_SSE2) - -static inline __m128i libdivide_u32_do_vec128(__m128i numers, const struct libdivide_u32_t *denom); -static inline __m128i libdivide_s32_do_vec128(__m128i numers, const struct libdivide_s32_t *denom); -static inline __m128i libdivide_u64_do_vec128(__m128i numers, const struct libdivide_u64_t *denom); -static inline __m128i libdivide_s64_do_vec128(__m128i numers, const struct libdivide_s64_t *denom); - -static inline __m128i libdivide_u32_branchfree_do_vec128( - __m128i numers, const struct libdivide_u32_branchfree_t *denom); -static inline __m128i libdivide_s32_branchfree_do_vec128( - __m128i numers, const struct libdivide_s32_branchfree_t *denom); -static inline __m128i libdivide_u64_branchfree_do_vec128( - __m128i numers, const struct libdivide_u64_branchfree_t *denom); -static inline __m128i libdivide_s64_branchfree_do_vec128( - __m128i numers, const struct libdivide_s64_branchfree_t *denom); - -//////// Internal Utility Functions - -// Implementation of _mm_srai_epi64(v, 63) (from AVX512). -static inline __m128i libdivide_s64_signbits(__m128i v) { - __m128i hiBitsDuped = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1)); - __m128i signBits = _mm_srai_epi32(hiBitsDuped, 31); - return signBits; -} - -// Implementation of _mm_srai_epi64 (from AVX512). -static inline __m128i libdivide_s64_shift_right_vec128(__m128i v, int amt) { - const int b = 64 - amt; - __m128i m = _mm_set1_epi64x(1ULL << (b - 1)); - __m128i x = _mm_srli_epi64(v, amt); - __m128i result = _mm_sub_epi64(_mm_xor_si128(x, m), m); - return result; -} - -// Here, b is assumed to contain one 32-bit value repeated. -static inline __m128i libdivide_mullhi_u32_vec128(__m128i a, __m128i b) { - __m128i hi_product_0Z2Z = _mm_srli_epi64(_mm_mul_epu32(a, b), 32); - __m128i a1X3X = _mm_srli_epi64(a, 32); - __m128i mask = _mm_set_epi32(-1, 0, -1, 0); - __m128i hi_product_Z1Z3 = _mm_and_si128(_mm_mul_epu32(a1X3X, b), mask); - return _mm_or_si128(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// SSE2 does not have a signed multiplication instruction, but we can convert -// unsigned to signed pretty efficiently. Again, b is just a 32 bit value -// repeated four times. -static inline __m128i libdivide_mullhi_s32_vec128(__m128i a, __m128i b) { - __m128i p = libdivide_mullhi_u32_vec128(a, b); - // t1 = (a >> 31) & y, arithmetic shift - __m128i t1 = _mm_and_si128(_mm_srai_epi32(a, 31), b); - __m128i t2 = _mm_and_si128(_mm_srai_epi32(b, 31), a); - p = _mm_sub_epi32(p, t1); - p = _mm_sub_epi32(p, t2); - return p; -} - -// Here, y is assumed to contain one 64-bit value repeated. -static inline __m128i libdivide_mullhi_u64_vec128(__m128i x, __m128i y) { - // full 128 bits product is: - // x0*y0 + (x0*y1 << 32) + (x1*y0 << 32) + (x1*y1 << 64) - // Note x0,y0,x1,y1 are all conceptually uint32, products are 32x32->64. - - // Compute x0*y0. - // Note x1, y1 are ignored by mul_epu32. - __m128i x0y0 = _mm_mul_epu32(x, y); - __m128i x0y0_hi = _mm_srli_epi64(x0y0, 32); - - // Get x1, y1 in the low bits. - // We could shuffle or right shift. Shuffles are preferred as they preserve - // the source register for the next computation. - __m128i x1 = _mm_shuffle_epi32(x, _MM_SHUFFLE(3, 3, 1, 1)); - __m128i y1 = _mm_shuffle_epi32(y, _MM_SHUFFLE(3, 3, 1, 1)); - - // No need to mask off top 32 bits for mul_epu32. - __m128i x0y1 = _mm_mul_epu32(x, y1); - __m128i x1y0 = _mm_mul_epu32(x1, y); - __m128i x1y1 = _mm_mul_epu32(x1, y1); - - // Mask here selects low bits only. - __m128i mask = _mm_set1_epi64x(0xFFFFFFFF); - __m128i temp = _mm_add_epi64(x1y0, x0y0_hi); - __m128i temp_lo = _mm_and_si128(temp, mask); - __m128i temp_hi = _mm_srli_epi64(temp, 32); - - temp_lo = _mm_srli_epi64(_mm_add_epi64(temp_lo, x0y1), 32); - temp_hi = _mm_add_epi64(x1y1, temp_hi); - return _mm_add_epi64(temp_lo, temp_hi); -} - -// y is one 64-bit value repeated. -static inline __m128i libdivide_mullhi_s64_vec128(__m128i x, __m128i y) { - __m128i p = libdivide_mullhi_u64_vec128(x, y); - __m128i t1 = _mm_and_si128(libdivide_s64_signbits(x), y); - __m128i t2 = _mm_and_si128(libdivide_s64_signbits(y), x); - p = _mm_sub_epi64(p, t1); - p = _mm_sub_epi64(p, t2); - return p; -} - -////////// UINT32 - -__m128i libdivide_u32_do_vec128(__m128i numers, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm_srli_epi32(numers, more); - } else { - __m128i q = libdivide_mullhi_u32_vec128(numers, _mm_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q); - return _mm_srli_epi32(t, shift); - } else { - return _mm_srli_epi32(q, more); - } - } -} - -__m128i libdivide_u32_branchfree_do_vec128( - __m128i numers, const struct libdivide_u32_branchfree_t *denom) { - __m128i q = libdivide_mullhi_u32_vec128(numers, _mm_set1_epi32(denom->magic)); - __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q); - return _mm_srli_epi32(t, denom->more); -} - -////////// UINT64 - -__m128i libdivide_u64_do_vec128(__m128i numers, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm_srli_epi64(numers, more); - } else { - __m128i q = libdivide_mullhi_u64_vec128(numers, _mm_set1_epi64x(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q); - return _mm_srli_epi64(t, shift); - } else { - return _mm_srli_epi64(q, more); - } - } -} - -__m128i libdivide_u64_branchfree_do_vec128( - __m128i numers, const struct libdivide_u64_branchfree_t *denom) { - __m128i q = libdivide_mullhi_u64_vec128(numers, _mm_set1_epi64x(denom->magic)); - __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q); - return _mm_srli_epi64(t, denom->more); -} - -////////// SINT32 - -__m128i libdivide_s32_do_vec128(__m128i numers, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32_t mask = (1U << shift) - 1; - __m128i roundToZeroTweak = _mm_set1_epi32(mask); - // q = numer + ((numer >> 31) & roundToZeroTweak); - __m128i q = - _mm_add_epi32(numers, _mm_and_si128(_mm_srai_epi32(numers, 31), roundToZeroTweak)); - q = _mm_srai_epi32(q, shift); - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm_sub_epi32(_mm_xor_si128(q, sign), sign); - return q; - } else { - __m128i q = libdivide_mullhi_s32_vec128(numers, _mm_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm_add_epi32(q, _mm_sub_epi32(_mm_xor_si128(numers, sign), sign)); - } - // q >>= shift - q = _mm_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); - q = _mm_add_epi32(q, _mm_srli_epi32(q, 31)); // q += (q < 0) - return q; - } -} - -__m128i libdivide_s32_branchfree_do_vec128( - __m128i numers, const struct libdivide_s32_branchfree_t *denom) { - int32_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - __m128i q = libdivide_mullhi_s32_vec128(numers, _mm_set1_epi32(magic)); - q = _mm_add_epi32(q, numers); // q += numers - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - __m128i q_sign = _mm_srai_epi32(q, 31); // q_sign = q >> 31 - __m128i mask = _mm_set1_epi32((1U << shift) - is_power_of_2); - q = _mm_add_epi32(q, _mm_and_si128(q_sign, mask)); // q = q + (q_sign & mask) - q = _mm_srai_epi32(q, shift); // q >>= shift - q = _mm_sub_epi32(_mm_xor_si128(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -////////// SINT64 - -__m128i libdivide_s64_do_vec128(__m128i numers, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { // shift path - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64_t mask = (1ULL << shift) - 1; - __m128i roundToZeroTweak = _mm_set1_epi64x(mask); - // q = numer + ((numer >> 63) & roundToZeroTweak); - __m128i q = - _mm_add_epi64(numers, _mm_and_si128(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_shift_right_vec128(q, shift); - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm_sub_epi64(_mm_xor_si128(q, sign), sign); - return q; - } else { - __m128i q = libdivide_mullhi_s64_vec128(numers, _mm_set1_epi64x(magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm_add_epi64(q, _mm_sub_epi64(_mm_xor_si128(numers, sign), sign)); - } - // q >>= denom->mult_path.shift - q = libdivide_s64_shift_right_vec128(q, more & LIBDIVIDE_64_SHIFT_MASK); - q = _mm_add_epi64(q, _mm_srli_epi64(q, 63)); // q += (q < 0) - return q; - } -} - -__m128i libdivide_s64_branchfree_do_vec128( - __m128i numers, const struct libdivide_s64_branchfree_t *denom) { - int64_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - - // libdivide_mullhi_s64(numers, magic); - __m128i q = libdivide_mullhi_s64_vec128(numers, _mm_set1_epi64x(magic)); - q = _mm_add_epi64(q, numers); // q += numers - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2. - uint32_t is_power_of_2 = (magic == 0); - __m128i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 - __m128i mask = _mm_set1_epi64x((1ULL << shift) - is_power_of_2); - q = _mm_add_epi64(q, _mm_and_si128(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s64_shift_right_vec128(q, shift); // q >>= shift - q = _mm_sub_epi64(_mm_xor_si128(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -#endif - -/////////// C++ stuff - -#ifdef __cplusplus - -enum Branching { - BRANCHFULL, // use branching algorithms - BRANCHFREE // use branchfree algorithms -}; - -#if defined(LIBDIVIDE_NEON) -// Helper to deduce NEON vector type for integral type. -template -struct NeonVecFor {}; - -template <> -struct NeonVecFor { - typedef uint32x4_t type; -}; - -template <> -struct NeonVecFor { - typedef int32x4_t type; -}; - -template <> -struct NeonVecFor { - typedef uint64x2_t type; -}; - -template <> -struct NeonVecFor { - typedef int64x2_t type; -}; -#endif - -// Versions of our algorithms for SIMD. -#if defined(LIBDIVIDE_NEON) -#define LIBDIVIDE_DIVIDE_NEON(ALGO, INT_TYPE) \ - typename NeonVecFor::type divide(typename NeonVecFor::type n) const { \ - return libdivide_##ALGO##_do_vec128(n, &denom); \ - } -#else -#define LIBDIVIDE_DIVIDE_NEON(ALGO, INT_TYPE) -#endif -#if defined(LIBDIVIDE_SSE2) -#define LIBDIVIDE_DIVIDE_SSE2(ALGO) \ - __m128i divide(__m128i n) const { return libdivide_##ALGO##_do_vec128(n, &denom); } -#else -#define LIBDIVIDE_DIVIDE_SSE2(ALGO) -#endif - -#if defined(LIBDIVIDE_AVX2) -#define LIBDIVIDE_DIVIDE_AVX2(ALGO) \ - __m256i divide(__m256i n) const { return libdivide_##ALGO##_do_vec256(n, &denom); } -#else -#define LIBDIVIDE_DIVIDE_AVX2(ALGO) -#endif - -#if defined(LIBDIVIDE_AVX512) -#define LIBDIVIDE_DIVIDE_AVX512(ALGO) \ - __m512i divide(__m512i n) const { return libdivide_##ALGO##_do_vec512(n, &denom); } -#else -#define LIBDIVIDE_DIVIDE_AVX512(ALGO) -#endif - -// The DISPATCHER_GEN() macro generates C++ methods (for the given integer -// and algorithm types) that redirect to libdivide's C API. -#define DISPATCHER_GEN(T, ALGO) \ - libdivide_##ALGO##_t denom; \ - dispatcher() {} \ - dispatcher(T d) : denom(libdivide_##ALGO##_gen(d)) {} \ - T divide(T n) const { return libdivide_##ALGO##_do(n, &denom); } \ - T recover() const { return libdivide_##ALGO##_recover(&denom); } \ - LIBDIVIDE_DIVIDE_NEON(ALGO, T) \ - LIBDIVIDE_DIVIDE_SSE2(ALGO) \ - LIBDIVIDE_DIVIDE_AVX2(ALGO) \ - LIBDIVIDE_DIVIDE_AVX512(ALGO) - -// The dispatcher selects a specific division algorithm for a given -// type and ALGO using partial template specialization. -template -struct dispatcher {}; - -template <> -struct dispatcher { - DISPATCHER_GEN(int32_t, s32) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(int32_t, s32_branchfree) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(uint32_t, u32) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(uint32_t, u32_branchfree) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(int64_t, s64) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(int64_t, s64_branchfree) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(uint64_t, u64) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(uint64_t, u64_branchfree) -}; - -// This is the main divider class for use by the user (C++ API). -// The actual division algorithm is selected using the dispatcher struct -// based on the integer and algorithm template parameters. -template -class divider { - public: - // We leave the default constructor empty so that creating - // an array of dividers and then initializing them - // later doesn't slow us down. - divider() {} - - // Constructor that takes the divisor as a parameter - divider(T d) : div(d) {} - - // Divides n by the divisor - T divide(T n) const { return div.divide(n); } - - // Recovers the divisor, returns the value that was - // used to initialize this divider object. - T recover() const { return div.recover(); } - - bool operator==(const divider &other) const { - return div.denom.magic == other.denom.magic && div.denom.more == other.denom.more; - } - - bool operator!=(const divider &other) const { return !(*this == other); } - - // Vector variants treat the input as packed integer values with the same type as the divider - // (e.g. s32, u32, s64, u64) and divides each of them by the divider, returning the packed - // quotients. -#if defined(LIBDIVIDE_SSE2) - __m128i divide(__m128i n) const { return div.divide(n); } -#endif -#if defined(LIBDIVIDE_AVX2) - __m256i divide(__m256i n) const { return div.divide(n); } -#endif -#if defined(LIBDIVIDE_AVX512) - __m512i divide(__m512i n) const { return div.divide(n); } -#endif -#if defined(LIBDIVIDE_NEON) - typename NeonVecFor::type divide(typename NeonVecFor::type n) const { - return div.divide(n); - } -#endif - - private: - // Storage for the actual divisor - dispatcher::value, std::is_signed::value, sizeof(T), ALGO> div; -}; - -// Overload of operator / for scalar division -template -T operator/(T n, const divider &div) { - return div.divide(n); -} - -// Overload of operator /= for scalar division -template -T &operator/=(T &n, const divider &div) { - n = div.divide(n); - return n; -} - -// Overloads for vector types. -#if defined(LIBDIVIDE_SSE2) -template -__m128i operator/(__m128i n, const divider &div) { - return div.divide(n); -} - -template -__m128i operator/=(__m128i &n, const divider &div) { - n = div.divide(n); - return n; -} -#endif -#if defined(LIBDIVIDE_AVX2) -template -__m256i operator/(__m256i n, const divider &div) { - return div.divide(n); -} - -template -__m256i operator/=(__m256i &n, const divider &div) { - n = div.divide(n); - return n; -} -#endif -#if defined(LIBDIVIDE_AVX512) -template -__m512i operator/(__m512i n, const divider &div) { - return div.divide(n); -} - -template -__m512i operator/=(__m512i &n, const divider &div) { - n = div.divide(n); - return n; -} -#endif - -#if defined(LIBDIVIDE_NEON) -template -uint32x4_t operator/(uint32x4_t n, const divider &div) { - return div.divide(n); -} - -template -int32x4_t operator/(int32x4_t n, const divider &div) { - return div.divide(n); -} - -template -uint64x2_t operator/(uint64x2_t n, const divider &div) { - return div.divide(n); -} - -template -int64x2_t operator/(int64x2_t n, const divider &div) { - return div.divide(n); -} - -template -uint32x4_t operator/=(uint32x4_t &n, const divider &div) { - n = div.divide(n); - return n; -} - -template -int32x4_t operator/=(int32x4_t &n, const divider &div) { - n = div.divide(n); - return n; -} - -template -uint64x2_t operator/=(uint64x2_t &n, const divider &div) { - n = div.divide(n); - return n; -} - -template -int64x2_t operator/=(int64x2_t &n, const divider &div) { - n = div.divide(n); - return n; -} -#endif - -#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900) -// libdivide::branchfree_divider -template -using branchfree_divider = divider; -#endif - -} // namespace libdivide - -#endif // __cplusplus - -#endif // LIBDIVIDE_H diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 7359e0a9402e..bf6f9bc9e499 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -116,6 +116,7 @@ function clone_submodules contrib/base64 contrib/cctz contrib/libcpuid + contrib/libdivide contrib/double-conversion contrib/llvm-project contrib/lz4 diff --git a/src/Functions/divide/divideImpl.cpp b/src/Functions/divide/divideImpl.cpp index 940f4b35df9e..966d5777c1df 100644 --- a/src/Functions/divide/divideImpl.cpp +++ b/src/Functions/divide/divideImpl.cpp @@ -1,6 +1,7 @@ /// This translation unit should be compiled multiple times /// with different values of NAMESPACE and machine flags (sse2, avx2). +/// See also #if defined(__AVX2__) #define REG_SIZE 32 #define LIBDIVIDE_AVX2 diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index be052b25af4f..be6cb989d10d 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -1,16 +1,7 @@ #include #include -#if defined(__SSE2__) -# define LIBDIVIDE_SSE2 -#elif defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__) -# define LIBDIVIDE_AVX512 -#elif defined(__AVX2__) -# define LIBDIVIDE_AVX2 -#elif defined(__aarch64__) && defined(__ARM_NEON) -# define LIBDIVIDE_NEON -#endif - +#include #include diff --git a/src/Interpreters/createBlockSelector.cpp b/src/Interpreters/createBlockSelector.cpp index fce9833ddfbe..659fc483373b 100644 --- a/src/Interpreters/createBlockSelector.cpp +++ b/src/Interpreters/createBlockSelector.cpp @@ -5,16 +5,7 @@ #include -#if defined(__SSE2__) -# define LIBDIVIDE_SSE2 -#elif defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__) -# define LIBDIVIDE_AVX512 -#elif defined(__AVX2__) -# define LIBDIVIDE_AVX2 -#elif defined(__aarch64__) && defined(__ARM_NEON) -# define LIBDIVIDE_NEON -#endif - +#include #include From 8cbb953666b6646e9ff4b7164f085675042bc0c0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 24 Dec 2022 03:03:55 +0000 Subject: [PATCH 009/342] Backport #44529 to 22.12: Ignore exit code 1 for tar in integration tests --- tests/integration/ci-runner.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 551466cf5837..487cf9b98695 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -358,10 +358,15 @@ def _compress_logs(self, dir, relpaths, result_path): subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL "sync", shell=True ) - subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL + retcode = subprocess.call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL "tar czf {} -C {} {}".format(result_path, dir, " ".join(relpaths)), shell=True, ) + # tar return 1 when the files are changed on compressing, we ignore it + if retcode in (0, 1): + return + # but even on the fatal errors it's better to retry + logging.error("Fatal error on compressing %s: %s", result_path, retcode) def _get_runner_opts(self): result = [] From bd8dfcccb0eeaec67d11cfecb0ba411b8f1d394c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 27 Dec 2022 15:03:02 +0000 Subject: [PATCH 010/342] Backport #44399 to 22.12: Implement a custom central checkout action --- .github/workflows/backport_branches.yml | 208 ++-- .github/workflows/cherry_pick.yml | 3 +- .github/workflows/docs_check.yml | 63 +- .github/workflows/docs_release.yml | 40 +- .github/workflows/jepsen.yml | 12 +- .github/workflows/master.yml | 894 +++++++---------- .github/workflows/nightly.yml | 48 +- .github/workflows/pull_request.yml | 1188 ++++++++++------------- .github/workflows/release.yml | 8 +- .github/workflows/release_branches.yml | 514 +++++----- .github/workflows/tags_stable.yml | 2 +- .github/workflows/woboq.yml | 6 +- tests/ci/docker_images_check.py | 1 - tests/ci/docker_manifests_merge.py | 1 - tests/ci/docker_server.py | 1 - 15 files changed, 1286 insertions(+), 1703 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index c90df6e57b71..cbd3bd7bec42 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -12,11 +12,10 @@ jobs: PythonUnitTests: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -24,34 +23,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -59,18 +56,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -79,7 +75,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -94,13 +90,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -132,28 +127,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -177,28 +169,25 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -222,26 +211,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -265,26 +252,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -308,26 +293,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -351,28 +334,25 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -396,28 +376,25 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -436,12 +413,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -477,14 +452,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -516,14 +490,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -556,14 +529,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -594,14 +566,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -635,14 +606,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -672,14 +642,13 @@ jobs: REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -706,11 +675,10 @@ jobs: - CompatibilityCheck runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/cherry_pick.yml b/.github/workflows/cherry_pick.yml index 3e6f9e76c565..065e584182b7 100644 --- a/.github/workflows/cherry_pick.yml +++ b/.github/workflows/cherry_pick.yml @@ -28,8 +28,9 @@ jobs: REPO_TEAM=core EOF - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} fetch-depth: 0 - name: Cherry pick diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 7a15e77becbb..64d8ff160356 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -20,11 +20,10 @@ jobs: CheckLabels: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -33,17 +32,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json @@ -51,17 +49,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -69,18 +66,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -89,7 +85,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -109,15 +105,14 @@ jobs: - name: Download changed images # even if artifact does not exist, e.g. on `do not test` label or failed Docker job continue-on-error: true - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Style Check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -139,15 +134,14 @@ jobs: REPO_COPY=${{runner.temp}}/docs_check/ClickHouse EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Docs Check run: | cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" @@ -166,11 +160,10 @@ jobs: - DocsCheck runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index da67edd4aa12..c665b7284095 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -22,34 +22,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -57,18 +55,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -77,7 +74,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -96,13 +93,12 @@ jobs: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}} RCSK EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml index 5afc066065e4..e67df15c4d36 100644 --- a/.github/workflows/jepsen.yml +++ b/.github/workflows/jepsen.yml @@ -19,12 +19,10 @@ jobs: TEMP_PATH=${{runner.temp}}/keeper_jepsen REPO_COPY=${{runner.temp}}/keeper_jepsen/ClickHouse EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 - name: Jepsen Test run: | @@ -50,12 +48,10 @@ jobs: # TEMP_PATH=${{runner.temp}}/server_jepsen # REPO_COPY=${{runner.temp}}/server_jepsen/ClickHouse # EOF - # - name: Clear repository - # run: | - # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" # - name: Check out repository code - # uses: actions/checkout@v2 + # uses: ClickHouse/checkout@v1 # with: + # clear-repository: true # fetch-depth: 0 # - name: Jepsen Test # run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f3d672136ef9..0efdb3caaad4 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -12,11 +12,10 @@ jobs: PythonUnitTests: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -24,34 +23,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -59,18 +56,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -79,7 +75,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -96,15 +92,14 @@ jobs: - name: Download changed images # even if artifact does not exist, e.g. on `do not test` label or failed Docker job continue-on-error: true - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Style Check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -126,13 +121,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -158,13 +152,12 @@ jobs: REPO_COPY=${{runner.temp}}/split_build_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: Shared build check @@ -196,28 +189,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -241,24 +231,24 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/images_path - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json @@ -282,28 +272,25 @@ jobs: BUILD_NAME=binary_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -327,26 +314,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -370,26 +355,24 @@ jobs: BUILD_NAME=package_ubsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -413,26 +396,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -456,26 +437,24 @@ jobs: BUILD_NAME=package_msan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -499,26 +478,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -545,26 +522,24 @@ jobs: BUILD_NAME=binary_shared EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -588,26 +563,24 @@ jobs: BUILD_NAME=binary_tidy EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -631,28 +604,25 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -676,28 +646,25 @@ jobs: BUILD_NAME=binary_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -721,28 +688,25 @@ jobs: BUILD_NAME=binary_freebsd EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -766,28 +730,25 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -811,28 +772,25 @@ jobs: BUILD_NAME=binary_ppc64le EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -856,28 +814,25 @@ jobs: BUILD_NAME=binary_amd64sse2 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -901,28 +856,25 @@ jobs: BUILD_NAME=binary_aarch64_v80compat EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -941,12 +893,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -986,14 +936,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1015,7 +964,6 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - # - BuilderBinGCC - BuilderBinPPC64 - BuilderBinAmd64SSE2 - BuilderBinAarch64V80Compat @@ -1033,14 +981,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1064,11 +1011,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Mark Commit Release Ready run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -1090,14 +1036,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1125,14 +1070,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1162,14 +1106,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1199,14 +1142,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1234,14 +1176,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1269,14 +1210,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1306,14 +1246,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1343,14 +1282,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1380,14 +1318,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1417,14 +1354,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1454,14 +1390,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1489,14 +1424,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1526,14 +1460,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1563,14 +1496,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1600,14 +1532,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1637,14 +1568,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1674,14 +1604,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1711,14 +1640,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1749,14 +1677,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1784,14 +1711,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1819,14 +1745,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1854,14 +1779,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1889,14 +1813,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1924,14 +1847,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1959,14 +1881,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1996,14 +1917,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2034,14 +1954,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2068,14 +1987,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2102,14 +2020,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2136,14 +2053,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2175,14 +2091,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2211,14 +2126,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2247,14 +2161,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2283,14 +2196,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2319,14 +2231,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2355,14 +2266,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2391,14 +2301,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2427,14 +2336,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2463,14 +2371,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2500,14 +2407,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2534,14 +2440,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2568,14 +2473,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2602,14 +2506,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2636,14 +2539,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2673,14 +2575,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2707,14 +2608,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2728,40 +2628,6 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - # UnitTestsReleaseGCC: - # needs: [BuilderBinGCC] - # runs-on: [self-hosted, fuzzer-unit-tester] - # steps: - # - name: Set envs - # run: | - # cat >> "$GITHUB_ENV" << 'EOF' - # TEMP_PATH=${{runner.temp}}/unit_tests_asan - # REPORTS_PATH=${{runner.temp}}/reports_dir - # CHECK_NAME=Unit tests (release-gcc) - # REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse - # EOF - # - name: Download json reports - # uses: actions/download-artifact@v2 - # with: - # path: ${{ env.REPORTS_PATH }} - # - name: Clear repository - # run: | - # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - # - name: Check out repository code - # uses: actions/checkout@v2 - # - name: Unit test - # run: | - # sudo rm -fr "$TEMP_PATH" - # mkdir -p "$TEMP_PATH" - # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - # cd "$REPO_COPY/tests/ci" - # python3 unit_tests_check.py "$CHECK_NAME" - # - name: Cleanup - # if: always() - # run: | - # docker ps --quiet | xargs --no-run-if-empty docker kill ||: - # docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - # sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2775,14 +2641,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2809,14 +2674,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2843,14 +2707,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2882,14 +2745,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -2918,14 +2780,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -2954,14 +2815,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -2990,14 +2850,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3026,14 +2885,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3062,14 +2920,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3098,14 +2955,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3134,14 +2990,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3171,14 +3026,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -3205,14 +3059,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -3291,11 +3144,10 @@ jobs: - SQLancerTestDebug runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 9ebbe4e090d0..415d1b8fdc40 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -16,34 +16,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 --all - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 --all - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -51,18 +49,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -71,7 +68,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -90,22 +87,17 @@ jobs: EOF echo "COVERITY_TOKEN=${{ secrets.COVERITY_TOKEN }}" >> "$GITHUB_ENV" - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - id: coverity-checkout - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: - fetch-depth: 0 # otherwise we will have no info about contributors + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" @@ -134,8 +126,10 @@ jobs: CC: clang-15 CXX: clang++-15 steps: - - uses: actions/checkout@v2 + - name: Check out repository code + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis submodules: true - name: Set up JDK 11 diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 857e2c7f6041..3564f95ed436 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -25,11 +25,10 @@ jobs: # Run the first check always, even if the CI is cancelled if: ${{ always() }} steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -38,11 +37,10 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -51,17 +49,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json @@ -69,17 +66,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -87,18 +83,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -107,7 +102,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -128,15 +123,14 @@ jobs: - name: Download changed images # even if artifact does not exist, e.g. on `do not test` label or failed Docker job continue-on-error: true - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Style Check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -158,16 +152,12 @@ jobs: REPO_COPY=${{runner.temp}}/fasttest/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" - mkdir "$GITHUB_WORKSPACE" - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} @@ -192,13 +182,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -224,13 +213,12 @@ jobs: REPO_COPY=${{runner.temp}}/split_build_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: Shared build check @@ -262,28 +250,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # for performance artifact + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -307,26 +292,24 @@ jobs: BUILD_NAME=binary_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -350,28 +333,25 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/images_path - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # for performance artifact - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -395,26 +375,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -438,26 +416,24 @@ jobs: BUILD_NAME=package_ubsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -481,26 +457,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -524,26 +498,24 @@ jobs: BUILD_NAME=package_msan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -567,26 +539,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -613,26 +583,24 @@ jobs: BUILD_NAME=binary_shared EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -656,26 +624,24 @@ jobs: BUILD_NAME=binary_tidy EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -699,26 +665,24 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -742,26 +706,24 @@ jobs: BUILD_NAME=binary_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -785,26 +747,24 @@ jobs: BUILD_NAME=binary_freebsd EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -828,26 +788,24 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -871,26 +829,24 @@ jobs: BUILD_NAME=binary_ppc64le EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -914,26 +870,24 @@ jobs: BUILD_NAME=binary_amd64sse2 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -957,26 +911,24 @@ jobs: BUILD_NAME=binary_aarch64_v80compat EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -995,12 +947,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -1039,14 +989,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1086,14 +1035,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1126,14 +1074,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1163,14 +1110,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1200,14 +1146,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1237,14 +1182,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1274,14 +1218,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1309,14 +1252,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1346,14 +1288,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1383,14 +1324,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1420,14 +1360,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1457,14 +1396,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1494,14 +1432,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1531,14 +1468,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1568,14 +1504,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1605,14 +1540,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1642,14 +1576,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1679,14 +1612,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1716,14 +1648,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1753,14 +1684,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1790,14 +1720,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1825,14 +1754,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1862,14 +1790,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1899,14 +1826,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1936,14 +1862,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1973,14 +1898,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2010,14 +1934,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2047,14 +1970,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2084,14 +2006,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2121,14 +2042,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2158,14 +2078,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2195,14 +2114,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2232,14 +2150,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2269,14 +2186,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2306,14 +2222,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2343,14 +2258,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2380,14 +2294,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2417,14 +2330,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2454,14 +2366,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2491,14 +2402,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2528,14 +2438,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2565,14 +2474,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2602,14 +2510,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2639,14 +2546,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2674,14 +2580,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2709,14 +2614,13 @@ jobs: REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Bugfix test run: | sudo rm -fr "$TEMP_PATH" @@ -2758,14 +2662,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2793,14 +2696,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2828,14 +2730,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2863,14 +2764,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2898,14 +2798,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2933,14 +2832,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2968,14 +2866,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -3005,14 +2902,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3043,14 +2939,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3077,14 +2972,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3111,14 +3005,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3145,14 +3038,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3182,14 +3074,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3216,14 +3107,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3250,14 +3140,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3284,14 +3173,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3318,14 +3206,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3357,14 +3244,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3393,14 +3279,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3429,14 +3314,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3465,14 +3349,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3501,14 +3384,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3537,14 +3419,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3573,14 +3454,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3609,14 +3489,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3645,14 +3524,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3681,14 +3559,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3717,14 +3594,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3753,14 +3629,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3789,14 +3664,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3825,14 +3699,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3861,14 +3734,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3897,14 +3769,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3931,14 +3802,13 @@ jobs: REPO_COPY=${{runner.temp}}/integration_tests_asan_flaky_check/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3968,14 +3838,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4002,14 +3871,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4036,14 +3904,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4070,14 +3937,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4104,14 +3970,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4143,14 +4008,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4179,14 +4043,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4215,14 +4078,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4251,14 +4113,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4287,14 +4148,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4323,14 +4183,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4359,14 +4218,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4395,14 +4253,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4432,14 +4289,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -4466,14 +4322,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -4599,11 +4454,10 @@ jobs: - SQLancerTestDebug runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0b0f125d641b..9200e5e87b8c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,7 +20,7 @@ jobs: REPO_COPY=${{runner.temp}}/release_packages/ClickHouse EOF - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: # Always use the most recent script version ref: master @@ -50,12 +50,10 @@ jobs: DockerServerImages: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # otherwise we will have no version info - name: Check docker clickhouse/clickhouse-server building run: | diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index bf35ca76fc6c..251087f33a55 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -15,34 +15,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -50,18 +48,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -70,7 +67,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -85,13 +82,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -123,28 +119,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -168,24 +161,24 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/images_path - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: - fetch-depth: 0 # otherwise we will have no info about contributors + clear-repository: true + submodules: true + fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json @@ -209,26 +202,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -252,26 +243,24 @@ jobs: BUILD_NAME=package_ubsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -295,26 +284,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -338,26 +325,24 @@ jobs: BUILD_NAME=package_msan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -381,26 +366,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -424,28 +407,25 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -469,28 +449,25 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -509,12 +486,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -553,14 +528,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -592,14 +566,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -623,11 +596,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Mark Commit Release Ready run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -649,14 +621,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -684,14 +655,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -721,14 +691,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -758,14 +727,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -795,14 +763,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -832,14 +799,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -869,14 +835,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -904,14 +869,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -941,14 +905,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -978,14 +941,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1015,14 +977,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1052,14 +1013,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1089,14 +1049,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1126,14 +1085,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1164,14 +1122,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1199,14 +1156,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1234,14 +1190,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1269,14 +1224,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1304,14 +1258,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1339,14 +1292,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1374,14 +1326,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1411,14 +1362,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1449,14 +1399,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1483,14 +1432,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1517,14 +1465,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1551,14 +1498,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1590,14 +1536,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1626,14 +1571,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1662,14 +1606,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1698,14 +1641,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1734,14 +1676,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1770,14 +1711,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1806,14 +1746,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1842,14 +1781,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1878,14 +1816,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1944,11 +1881,10 @@ jobs: - CompatibilityCheck runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index f8cfa1137cc4..f5b42e9c882a 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -34,7 +34,7 @@ jobs: run: | echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: ref: master fetch-depth: 0 diff --git a/.github/workflows/woboq.yml b/.github/workflows/woboq.yml index b928a4a8d3d7..363652c9f332 100644 --- a/.github/workflows/woboq.yml +++ b/.github/workflows/woboq.yml @@ -21,12 +21,10 @@ jobs: REPO_COPY=${{runner.temp}}/codebrowser/ClickHouse IMAGES_PATH=${{runner.temp}}/images_path EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true submodules: 'true' - name: Codebrowser run: | diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0618969f94cd..034e0110e2f1 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -476,7 +476,6 @@ def main(): url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") - print(f'::set-output name=url_output::"{url}"') if not args.reports: return diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index 2ba5a99de0af..14585159d479 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -208,7 +208,6 @@ def main(): url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") - print(f'::set-output name=url_output::"{url}"') if not args.reports: return diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index e0053f096643..fd28e5a18908 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -340,7 +340,6 @@ def main(): url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") - print(f'::set-output name=url_output::"{url}"') if not args.reports: return From 574923352316af69b0c46b6fa442897044faeca6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 30 Dec 2022 12:02:46 +0000 Subject: [PATCH 011/342] Backport #42126 to 22.12: Fixing "Invalid number of rows in Chunk" with lightweight deletes --- .../MergeTreeBaseSelectProcessor.cpp | 3 +- .../MergeTree/MergeTreeRangeReader.cpp | 652 ++++++++++-------- src/Storages/MergeTree/MergeTreeRangeReader.h | 115 ++- .../00609_prewhere_and_default.sql | 14 + ...more_range_reader_test_wide_part.reference | 3 + ...7_one_more_range_reader_test_wide_part.sql | 17 + .../01674_filter_by_uint8.reference | 13 +- .../0_stateless/01674_filter_by_uint8.sql | 1 + ...5_add_part_offset_virtual_column.reference | 18 + .../02235_add_part_offset_virtual_column.sql | 8 + .../02460_prewhere_row_level_policy.reference | 0 .../02460_prewhere_row_level_policy.sql | 9 + ...vel_policy_lightweight_delete.reference.j2 | 29 + ...row_level_policy_lightweight_delete.sql.j2 | 59 ++ .../02473_multistep_prewhere.python | 148 ++++ .../02473_multistep_prewhere.reference | 0 .../0_stateless/02473_multistep_prewhere.sh | 11 + ...ewhere_filtered_rows_div_by_zero.reference | 76 ++ ...481_prewhere_filtered_rows_div_by_zero.sql | 28 + ...13_prewhere_combine_step_filters.reference | 110 +++ .../02513_prewhere_combine_step_filters.sql | 26 + 21 files changed, 1025 insertions(+), 315 deletions(-) create mode 100644 tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference create mode 100644 tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql create mode 100644 tests/queries/0_stateless/02460_prewhere_row_level_policy.reference create mode 100644 tests/queries/0_stateless/02460_prewhere_row_level_policy.sql create mode 100644 tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 create mode 100644 tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 create mode 100644 tests/queries/0_stateless/02473_multistep_prewhere.python create mode 100644 tests/queries/0_stateless/02473_multistep_prewhere.reference create mode 100755 tests/queries/0_stateless/02473_multistep_prewhere.sh create mode 100644 tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference create mode 100644 tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql create mode 100644 tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference create mode 100644 tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index bd7e3a64749b..5b6b0f09bc39 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -68,12 +68,13 @@ IMergeTreeSelectAlgorithm::IMergeTreeSelectAlgorithm( size_t non_const_columns_offset = header_without_const_virtual_columns.columns(); injectNonConstVirtualColumns(0, header_without_const_virtual_columns, virt_column_names); - /// Reverse order is to minimize reallocations when removing columns from the block for (size_t col_num = non_const_columns_offset; col_num < header_without_const_virtual_columns.columns(); ++col_num) non_const_virtual_column_names.emplace_back(header_without_const_virtual_columns.getByPosition(col_num).name); result_header = header_without_const_virtual_columns; injectPartConstVirtualColumns(0, result_header, nullptr, partition_value_type, virt_column_names); + + LOG_TEST(log, "PREWHERE actions: {}", (prewhere_actions ? prewhere_actions->dump() : std::string(""))); } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 1a5a4d918067..ac5c3b1db2d6 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -30,13 +30,17 @@ namespace ErrorCodes } -static void filterColumns(Columns & columns, const IColumn::Filter & filter) +static void filterColumns(Columns & columns, const IColumn::Filter & filter, size_t filter_bytes) { for (auto & column : columns) { if (column) { - column = column->filter(filter, -1); + if (column->size() != filter.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of column {} doesn't match size of filter {}", + column->size(), filter.size()); + + column = column->filter(filter, filter_bytes); if (column->empty()) { @@ -47,13 +51,12 @@ static void filterColumns(Columns & columns, const IColumn::Filter & filter) } } -static void filterColumns(Columns & columns, const ColumnPtr & filter) +static void filterColumns(Columns & columns, const FilterWithCachedCount & filter) { - ConstantFilterDescription const_descr(*filter); - if (const_descr.always_true) + if (filter.alwaysTrue()) return; - if (const_descr.always_false) + if (filter.alwaysFalse()) { for (auto & col : columns) if (col) @@ -62,8 +65,7 @@ static void filterColumns(Columns & columns, const ColumnPtr & filter) return; } - FilterDescription descr(*filter); - filterColumns(columns, *descr.data); + filterColumns(columns, filter.getData(), filter.countBytesInFilter()); } @@ -320,11 +322,13 @@ void MergeTreeRangeReader::ReadResult::clear() num_rows_to_skip_in_last_granule += rows_per_granule.back(); rows_per_granule.assign(rows_per_granule.size(), 0); total_rows_per_granule = 0; - filter_holder = nullptr; - filter = nullptr; + final_filter = FilterWithCachedCount(); + num_rows = 0; + columns.clear(); + additional_columns.clear(); } -void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns) +void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns, const NumRows & rows_per_granule_previous) const { for (auto & column : old_columns) { @@ -337,9 +341,12 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns) continue; } + LOG_TEST(log, "ReadResult::shrink() column size: {} total_rows_per_granule: {}", + column->size(), total_rows_per_granule); + auto new_column = column->cloneEmpty(); new_column->reserve(total_rows_per_granule); - for (size_t j = 0, pos = 0; j < rows_per_granule_original.size(); pos += rows_per_granule_original[j++]) + for (size_t j = 0, pos = 0; j < rows_per_granule_previous.size(); pos += rows_per_granule_previous[j++]) { if (rows_per_granule[j]) new_column->insertRangeFrom(*column, pos, rows_per_granule[j]); @@ -348,74 +355,265 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns) } } +/// The main invariant of the data in the read result is that he number of rows is +/// either equal to total_rows_per_granule (if filter has not been applied) or to the number of +/// 1s in the filter (if filter has been applied). +void MergeTreeRangeReader::ReadResult::checkInternalConsistency() const +{ + /// Check that filter size matches number of rows that will be read. + if (final_filter.present() && final_filter.size() != total_rows_per_granule) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Final filter size {} doesn't match total_rows_per_granule {}", + final_filter.size(), total_rows_per_granule); + + /// Check that num_rows is consistent with final_filter and rows_per_granule. + if (final_filter.present() && final_filter.countBytesInFilter() != num_rows && total_rows_per_granule != num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Number of rows {} doesn't match neither filter 1s count {} nor total_rows_per_granule {}", + num_rows, final_filter.countBytesInFilter(), total_rows_per_granule); + + /// Check that additional columns have the same number of rows as the main columns. + if (additional_columns && additional_columns.rows() != num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Number of rows in additional columns {} is not equal to number of rows in result columns {}", + additional_columns.rows(), num_rows); + + for (const auto & column : columns) + { + if (column) + chassert(column->size() == num_rows); + } +} + +std::string MergeTreeRangeReader::ReadResult::dumpInfo() const +{ + WriteBufferFromOwnString out; + out << "num_rows: " << num_rows + << ", columns: " << columns.size() + << ", total_rows_per_granule: " << total_rows_per_granule; + if (final_filter.present()) + { + out << ", filter size: " << final_filter.size() + << ", filter 1s: " << final_filter.countBytesInFilter(); + } + else + { + out << ", no filter"; + } + for (size_t ci = 0; ci < columns.size(); ++ci) + { + out << ", column[" << ci << "]: "; + if (!columns[ci]) + out << " nullptr"; + else + { + out << " " << columns[ci]->dumpStructure(); + } + } + if (additional_columns) + { + out << ", additional_columns: " << additional_columns.dumpStructure(); + } + return out.str(); +} + +static std::string dumpNames(const NamesAndTypesList & columns) +{ + WriteBufferFromOwnString out; + for (auto it = columns.begin(); it != columns.end(); ++it) + { + if (it != columns.begin()) + out << ", "; + out << it->name; + } + return out.str(); +} + void MergeTreeRangeReader::ReadResult::setFilterConstTrue() { - clearFilter(); - filter_holder = DataTypeUInt8().createColumnConst(num_rows, 1u); + /// Remove the filter, so newly read columns will not be filtered. + final_filter = FilterWithCachedCount(); } -void MergeTreeRangeReader::ReadResult::setFilterConstFalse() +static ColumnPtr andFilters(ColumnPtr c1, ColumnPtr c2) { - clearFilter(); - columns.clear(); - num_rows = 0; + if (c1->size() != c2->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of filters don't match: {} and {}", + c1->size(), c2->size()); + + // TODO: use proper vectorized implementation of AND? + auto res = ColumnUInt8::create(c1->size()); + auto & res_data = res->getData(); + const auto & c1_data = typeid_cast(*c1).getData(); + const auto & c2_data = typeid_cast(*c2).getData(); + const size_t size = c1->size(); + const size_t step = 16; + size_t i = 0; + /// NOTE: '&&' must be used instead of '&' for 'AND' operation because UInt8 columns might contain any non-zero + /// value for true and we cannot bitwise AND them to get the correct result. + for (; i + step < size; i += step) + for (size_t j = 0; j < step; ++j) + res_data[i+j] = (c1_data[i+j] && c2_data[i+j]); + for (; i < size; ++i) + res_data[i] = (c1_data[i] && c2_data[i]); + return res; +} + +static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second); + +void MergeTreeRangeReader::ReadResult::applyFilter(const FilterWithCachedCount & filter) +{ + if (filter.size() != num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter size {} doesn't match number of rows {}", + filter.size(), num_rows); + + LOG_TEST(log, "ReadResult::applyFilter() num_rows before: {}", num_rows); + + filterColumns(columns, filter); + + { + auto tmp_columns = additional_columns.getColumns(); + filterColumns(tmp_columns, filter); + if (!tmp_columns.empty()) + additional_columns.setColumns(tmp_columns); + else + additional_columns.clear(); + } + + num_rows = filter.countBytesInFilter(); + + LOG_TEST(log, "ReadResult::applyFilter() num_rows after: {}", num_rows); } -void MergeTreeRangeReader::ReadResult::optimize(bool can_read_incomplete_granules, bool allow_filter_columns) +void MergeTreeRangeReader::ReadResult::optimize(const FilterWithCachedCount & current_filter, bool can_read_incomplete_granules) { - if (total_rows_per_granule == 0 || filter == nullptr) + checkInternalConsistency(); + + /// Combine new filter with the previous one if it is present. + /// This filter has the size of total_rows_per granule. It is applied after reading contiguous chunks from + /// the start of each granule. + FilterWithCachedCount filter = current_filter; + if (final_filter.present()) + { + /// If current filter has the same size as the final filter, it means that the final filter has not been applied. + /// In this case we AND current filter with the existing final filter. + /// In other case, when the final filter has been applied, the size of current step filter will be equal to number of ones + /// in the final filter. In this case we combine current filter with the final filter. + ColumnPtr combined_filter; + if (current_filter.size() == final_filter.size()) + combined_filter = andFilters(final_filter.getColumn(), current_filter.getColumn()); + else + combined_filter = combineFilters(final_filter.getColumn(), current_filter.getColumn()); + + filter = FilterWithCachedCount(combined_filter); + } + + if (total_rows_per_granule == 0 || !filter.present()) return; NumRows zero_tails; - auto total_zero_rows_in_tails = countZeroTails(filter->getData(), zero_tails, can_read_incomplete_granules); + auto total_zero_rows_in_tails = countZeroTails(filter.getData(), zero_tails, can_read_incomplete_granules); - if (total_zero_rows_in_tails == filter->size()) + LOG_TEST(log, "ReadResult::optimize() before: {}", dumpInfo()); + + SCOPE_EXIT(checkInternalConsistency()); + + SCOPE_EXIT({ + LOG_TEST(log, "ReadResult::optimize() after: {}", dumpInfo()); + }); + + if (total_zero_rows_in_tails == filter.size()) { + LOG_TEST(log, "ReadResult::optimize() combined filter is const False"); clear(); return; } - else if (total_zero_rows_in_tails == 0 && countBytesInResultFilter(filter->getData()) == filter->size()) + else if (total_zero_rows_in_tails == 0 && filter.countBytesInFilter() == filter.size()) { + LOG_TEST(log, "ReadResult::optimize() combined filter is const True"); setFilterConstTrue(); return; } /// Just a guess. If only a few rows may be skipped, it's better not to skip at all. - else if (2 * total_zero_rows_in_tails > filter->size()) + else if (2 * total_zero_rows_in_tails > filter.size()) { + const NumRows rows_per_granule_previous = rows_per_granule; + const size_t total_rows_per_granule_previous = total_rows_per_granule; + for (auto i : collections::range(0, rows_per_granule.size())) { - rows_per_granule_original.push_back(rows_per_granule[i]); rows_per_granule[i] -= zero_tails[i]; } - num_rows_to_skip_in_last_granule += rows_per_granule_original.back() - rows_per_granule.back(); + num_rows_to_skip_in_last_granule += rows_per_granule_previous.back() - rows_per_granule.back(); + total_rows_per_granule = total_rows_per_granule_previous - total_zero_rows_in_tails; + + /// Check if const 1 after shrink. + /// We can apply shrink only if after the previous step the number of rows in the result + /// matches the rows_per_granule info. Otherwise we will not be able to match newly added zeros in granule tails. + if (num_rows == total_rows_per_granule_previous && + filter.countBytesInFilter() + total_zero_rows_in_tails == total_rows_per_granule_previous) /// All zeros are in tails? + { + setFilterConstTrue(); - filter_original = filter; - filter_holder_original = std::move(filter_holder); + /// If all zeros are in granule tails, we can use shrink to filter out rows. + shrink(columns, rows_per_granule_previous); /// shrink acts as filtering in such case + auto c = additional_columns.getColumns(); + shrink(c, rows_per_granule_previous); + additional_columns.setColumns(c); - /// Check if const 1 after shrink - if (allow_filter_columns && countBytesInResultFilter(filter->getData()) + total_zero_rows_in_tails == total_rows_per_granule) - { - total_rows_per_granule = total_rows_per_granule - total_zero_rows_in_tails; num_rows = total_rows_per_granule; - setFilterConstTrue(); - shrink(columns); /// shrink acts as filtering in such case + + LOG_TEST(log, "ReadResult::optimize() after shrink {}", dumpInfo()); } else { - auto new_filter = ColumnUInt8::create(filter->size() - total_zero_rows_in_tails); + auto new_filter = ColumnUInt8::create(filter.size() - total_zero_rows_in_tails); IColumn::Filter & new_data = new_filter->getData(); - collapseZeroTails(filter->getData(), new_data); - total_rows_per_granule = new_filter->size(); - num_rows = total_rows_per_granule; - filter = new_filter.get(); - filter_holder = std::move(new_filter); + /// Shorten the filter by removing zeros from granule tails + collapseZeroTails(filter.getData(), rows_per_granule_previous, new_data); + if (total_rows_per_granule != new_filter->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "New filter size {} doesn't match number of rows to be read {}", + new_filter->size(), total_rows_per_granule); + + /// Need to apply combined filter here before replacing it with shortened one because otherwise + /// the filter size will not match the number of rows in the result columns. + if (num_rows == total_rows_per_granule_previous) + { + /// Filter from the previous steps has not been applied yet, do it now. + applyFilter(filter); + } + else + { + /// Filter was applied before, so apply only new filter from the current step. + applyFilter(current_filter); + } + + final_filter = FilterWithCachedCount(new_filter->getPtr()); + if (num_rows != final_filter.countBytesInFilter()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Count of 1s in final filter {} doesn't match number of rows {}", + final_filter.countBytesInFilter(), num_rows); + + LOG_TEST(log, "ReadResult::optimize() after colapseZeroTails {}", dumpInfo()); } - need_filter = true; } - /// Another guess, if it's worth filtering at PREWHERE - else if (countBytesInResultFilter(filter->getData()) < 0.6 * filter->size()) - need_filter = true; + else + { + /// Check if we have rows already filtered at the previous step. In such case we must apply the filter because + /// otherwise num_rows doesn't match total_rows_per_granule and the next read step will not know how to filter + /// newly read columns to match the num_rows. + if (num_rows != total_rows_per_granule) + { + applyFilter(current_filter); + } + /// Another guess, if it's worth filtering at PREWHERE + else if (filter.countBytesInFilter() < 0.6 * filter.size()) + { + applyFilter(filter); + } + + final_filter = std::move(filter); + } } size_t MergeTreeRangeReader::ReadResult::countZeroTails(const IColumn::Filter & filter_vec, NumRows & zero_tails, bool can_read_incomplete_granules) const @@ -441,7 +639,7 @@ size_t MergeTreeRangeReader::ReadResult::countZeroTails(const IColumn::Filter & return total_zero_rows_in_tails; } -void MergeTreeRangeReader::ReadResult::collapseZeroTails(const IColumn::Filter & filter_vec, IColumn::Filter & new_filter_vec) +void MergeTreeRangeReader::ReadResult::collapseZeroTails(const IColumn::Filter & filter_vec, const NumRows & rows_per_granule_previous, IColumn::Filter & new_filter_vec) const { const auto * filter_data = filter_vec.data(); auto * new_filter_data = new_filter_vec.data(); @@ -449,7 +647,7 @@ void MergeTreeRangeReader::ReadResult::collapseZeroTails(const IColumn::Filter & for (auto i : collections::range(0, rows_per_granule.size())) { memcpySmallAllowReadWriteOverflow15(new_filter_data, filter_data, rows_per_granule[i]); - filter_data += rows_per_granule_original[i]; + filter_data += rows_per_granule_previous[i]; new_filter_data += rows_per_granule[i]; } @@ -597,54 +795,6 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con return count; } -/// Filter size must match total_rows_per_granule -void MergeTreeRangeReader::ReadResult::setFilter(const ColumnPtr & new_filter) -{ - if (!new_filter && filter) - throw Exception("Can't replace existing filter with empty.", ErrorCodes::LOGICAL_ERROR); - - if (filter) - { - size_t new_size = new_filter->size(); - - if (new_size != total_rows_per_granule) - throw Exception("Can't set filter because it's size is " + toString(new_size) + " but " - + toString(total_rows_per_granule) + " rows was read.", ErrorCodes::LOGICAL_ERROR); - } - - ConstantFilterDescription const_description(*new_filter); - if (const_description.always_true) - { - setFilterConstTrue(); - } - else if (const_description.always_false) - { - clear(); - } - else - { - FilterDescription filter_description(*new_filter); - filter_holder = filter_description.data_holder ? filter_description.data_holder : new_filter; - filter = typeid_cast(filter_holder.get()); - if (!filter) - throw Exception("setFilter function expected ColumnUInt8.", ErrorCodes::LOGICAL_ERROR); - } -} - - -size_t MergeTreeRangeReader::ReadResult::countBytesInResultFilter(const IColumn::Filter & filter_) -{ - auto it = filter_bytes_map.find(&filter_); - if (it == filter_bytes_map.end()) - { - auto bytes = countBytesInFilter(filter_); - filter_bytes_map[&filter_] = bytes; - return bytes; - } - else - return it->second; -} - MergeTreeRangeReader::MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, @@ -659,30 +809,37 @@ MergeTreeRangeReader::MergeTreeRangeReader( , is_initialized(true) { if (prev_reader) - sample_block = prev_reader->getSampleBlock(); + result_sample_block = prev_reader->getSampleBlock(); for (const auto & name_and_type : merge_tree_reader->getColumns()) - sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + { + read_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + result_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + } for (const auto & column_name : non_const_virtual_column_names_) { - if (sample_block.has(column_name)) + if (result_sample_block.has(column_name)) continue; non_const_virtual_column_names.push_back(column_name); - if (column_name == "_part_offset") - sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); + if (column_name == "_part_offset" && !prev_reader) + { + /// _part_offset column is filled by the first reader. + read_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); + result_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); + } } if (prewhere_info) { const auto & step = *prewhere_info; if (step.actions) - step.actions->execute(sample_block, true); + step.actions->execute(result_sample_block, true); if (step.remove_column) - sample_block.erase(step.column_name); + result_sample_block.erase(step.column_name); } } @@ -765,7 +922,12 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar if (max_rows == 0) throw Exception("Expected at least 1 row to read, got 0.", ErrorCodes::LOGICAL_ERROR); - ReadResult read_result; + ReadResult read_result(log); + + SCOPE_EXIT({ + LOG_TEST(log, "read() returned {}, sample block {}", + read_result.dumpInfo(), this->result_sample_block.dumpNames()); + }); if (prev_reader) { @@ -778,69 +940,52 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar if (read_result.num_rows == 0) return read_result; - bool has_columns = false; + /// Calculate and update read bytes size_t total_bytes = 0; for (auto & column : columns) { if (column) { total_bytes += column->byteSize(); - has_columns = true; } } - read_result.addNumBytesRead(total_bytes); - bool should_evaluate_missing_defaults = false; - - if (has_columns) + if (!columns.empty()) { - /// num_read_rows >= read_result.num_rows - /// We must filter block before adding columns to read_result.block - - /// Fill missing columns before filtering because some arrays from Nested may have empty data. - merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_read_rows); + /// If all requested columns are absent in part num_read_rows will be 0. + /// In this case we need to use number of rows in the result to fill the default values and don't filter block. + if (num_read_rows == 0) + num_read_rows = read_result.num_rows; + + /// fillMissingColumns() must be called after reading but befoe any filterings because + /// some columns (e.g. arrays) might be only partially filled and thus not be valid and + /// fillMissingColumns() fixes this. + bool should_evaluate_missing_defaults; + merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, + num_read_rows); - if (read_result.getFilter()) - filterColumns(columns, read_result.getFilter()->getData()); - } - else - { - size_t num_rows = read_result.num_rows; + if (read_result.total_rows_per_granule == num_read_rows && read_result.num_rows != num_read_rows) + { + /// We have filter applied from the previous step + /// So we need to apply it to the newly read rows + if (!read_result.final_filter.present() || read_result.final_filter.countBytesInFilter() != read_result.num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Final filter is missing or has mistaching size, read_result: {}", + read_result.dumpInfo()); - /// If block is empty, we still may need to add missing columns. - /// In that case use number of rows in result block and don't filter block. - if (num_rows) - merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_rows); - } + filterColumns(columns, read_result.final_filter); + } - if (!columns.empty()) - { /// If some columns absent in part, then evaluate default values if (should_evaluate_missing_defaults) { - auto block = prev_reader->sample_block.cloneWithColumns(read_result.columns); - auto block_before_prewhere = read_result.block_before_prewhere; - for (const auto & column : block) - { - if (block_before_prewhere.has(column.name)) - block_before_prewhere.erase(column.name); - } + Block additional_columns = prev_reader->getSampleBlock().cloneWithColumns(read_result.columns); + for (const auto & col : read_result.additional_columns) + additional_columns.insert(col); - if (block_before_prewhere) - { - if (read_result.need_filter) - { - auto old_columns = block_before_prewhere.getColumns(); - filterColumns(old_columns, read_result.getFilterOriginal()->getData()); - block_before_prewhere.setColumns(old_columns); - } - - for (auto & column : block_before_prewhere) - block.insert(std::move(column)); - } - merge_tree_reader->evaluateMissingDefaults(block, columns); + merge_tree_reader->evaluateMissingDefaults(additional_columns, columns); } + /// If columns not empty, then apply on-fly alter conversions if any required merge_tree_reader->performRequiredConversions(columns); } @@ -854,11 +999,15 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar read_result = startReadingChain(max_rows, ranges); read_result.num_rows = read_result.numReadRows(); - if (read_result.num_rows) + LOG_TEST(log, "First reader returned: {}, requested columns: {}", + read_result.dumpInfo(), dumpNames(merge_tree_reader->getColumns())); + + if (read_result.num_rows == 0) + return read_result; + { /// Physical columns go first and then some virtual columns follow - /// TODO: is there a better way to account for virtual columns that were filled by previous readers? - size_t physical_columns_count = read_result.columns.size() - read_result.extra_columns_filled.size(); + size_t physical_columns_count = merge_tree_reader->getColumns().size(); Columns physical_columns(read_result.columns.begin(), read_result.columns.begin() + physical_columns_count); bool should_evaluate_missing_defaults; @@ -875,8 +1024,6 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar for (size_t i = 0; i < physical_columns.size(); ++i) read_result.columns[i] = std::move(physical_columns[i]); } - else - read_result.columns.clear(); size_t total_bytes = 0; for (auto & column : read_result.columns) @@ -885,18 +1032,35 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar read_result.addNumBytesRead(total_bytes); } - if (read_result.num_rows == 0) - return read_result; - executePrewhereActionsAndFilterColumns(read_result); + read_result.checkInternalConsistency(); + + if (!read_result.can_return_prewhere_column_without_filtering) + { + if (!read_result.filterWasApplied()) + { + /// TODO: another solution might be to set all 0s from final filter into the prewhere column and not filter all the columns here + /// but rely on filtering in WHERE. + read_result.applyFilter(read_result.final_filter); + read_result.checkInternalConsistency(); + } + + read_result.can_return_prewhere_column_without_filtering = true; + } + + if (read_result.num_rows != 0 && read_result.columns.size() != getSampleBlock().columns()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Number of columns in result doesn't match number of columns in sample block, read_result: {}, sample block: {}", + read_result.dumpInfo(), getSampleBlock().dumpStructure()); + return read_result; } - MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t max_rows, MarkRanges & ranges) { - ReadResult result; + ReadResult result(log); result.columns.resize(merge_tree_reader->getColumns().size()); size_t current_task_last_mark = getLastMark(ranges); @@ -946,14 +1110,11 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t result.addRows(stream.finalize(result.columns)); /// Last granule may be incomplete. - if (!result.rowsPerGranule().empty()) + if (!result.rows_per_granule.empty()) result.adjustLastGranule(); - for (const auto & column_name : non_const_virtual_column_names) - { - if (column_name == "_part_offset") - fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); - } + if (read_sample_block.has("_part_offset")) + fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); return result; } @@ -968,11 +1129,13 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead UInt64 * pos = vec.data(); UInt64 * end = &vec[num_rows]; + /// Fill the reamining part of the previous range (it was started in the previous read request). while (pos < end && leading_begin_part_offset < leading_end_part_offset) *pos++ = leading_begin_part_offset++; - const auto start_ranges = result.startedRanges(); + const auto & start_ranges = result.started_ranges; + /// Fill the ranges which were started in the current read request. for (const auto & start_range : start_ranges) { UInt64 start_part_offset = index_granularity->getMarkStartingRow(start_range.range.begin); @@ -983,7 +1146,6 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead } result.columns.emplace_back(std::move(column)); - result.extra_columns_filled.push_back("_part_offset"); } Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, size_t & num_rows) @@ -995,7 +1157,7 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si if (merge_tree_reader->getColumns().empty()) return columns; - if (result.rowsPerGranule().empty()) + if (result.rows_per_granule.empty()) { /// If zero rows were read on prev step, than there is no more rows to read. /// Last granule may have less rows than index_granularity, so finish reading manually. @@ -1005,8 +1167,8 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si columns.resize(merge_tree_reader->numColumnsInResult()); - const auto & rows_per_granule = result.rowsPerGranule(); - const auto & started_ranges = result.startedRanges(); + const auto & rows_per_granule = result.rows_per_granule; + const auto & started_ranges = result.started_ranges; size_t current_task_last_mark = ReadResult::getLastMark(started_ranges); size_t next_range_to_start = 0; @@ -1027,13 +1189,13 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si num_rows += stream.read(columns, rows_per_granule[i], !last); } - stream.skip(result.numRowsToSkipInLastGranule()); + stream.skip(result.num_rows_to_skip_in_last_granule); num_rows += stream.finalize(columns); /// added_rows may be zero if all columns were read in prewhere and it's ok. - if (num_rows && num_rows != result.totalRowsPerGranule()) + if (num_rows && num_rows != result.total_rows_per_granule) throw Exception("RangeReader read " + toString(num_rows) + " rows, but " - + toString(result.totalRowsPerGranule()) + " expected.", ErrorCodes::LOGICAL_ERROR); + + toString(result.total_rows_per_granule) + " expected.", ErrorCodes::LOGICAL_ERROR); return columns; } @@ -1047,7 +1209,7 @@ static void checkCombinedFiltersSize(size_t bytes_in_first_filter, size_t second } /// Second filter size must be equal to number of 1s in the first filter. -/// The result size is equal to first filter size. +/// The result has size equal to first filter size and contains 1s only where both filters contain 1s. static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) { ConstantFilterDescription first_const_descr(*first); @@ -1100,23 +1262,22 @@ static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) return mut_first; } -void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & result) +void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & result) const { + result.checkInternalConsistency(); + if (!prewhere_info) return; - const auto & header = merge_tree_reader->getColumns(); - size_t num_columns = header.size(); + const auto & header = read_sample_block; + size_t num_columns = header.columns(); /// Check that we have columns from previous steps and newly read required columns - if (result.columns.size() < num_columns + result.extra_columns_filled.size()) + if (result.columns.size() < num_columns) throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of columns passed to MergeTreeRangeReader. Expected {}, got {}", num_columns, result.columns.size()); - /// This filter has the size of total_rows_per granule. It is applied after reading contiguous chunks from - /// the start of each granule. - ColumnPtr combined_filter; /// Filter computed at the current step. Its size is equal to num_rows which is <= total_rows_per_granule ColumnPtr current_step_filter; size_t prewhere_column_pos; @@ -1138,35 +1299,28 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r for (auto name_and_type = header.begin(); name_and_type != header.end() && pos < result.columns.size(); ++pos, ++name_and_type) block.insert({result.columns[pos], name_and_type->type, name_and_type->name}); - for (const auto & column_name : non_const_virtual_column_names) { - if (block.has(column_name)) - continue; + /// Columns might be projected out. We need to store them here so that default columns can be evaluated later. + Block additional_columns = block; - if (column_name == "_part_offset") - { - if (pos >= result.columns.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Invalid number of columns passed to MergeTreeRangeReader. Expected {}, got {}", - num_columns, result.columns.size()); + if (prewhere_info->actions) + prewhere_info->actions->execute(block); - block.insert({result.columns[pos], std::make_shared(), column_name}); - } - else if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name) + result.additional_columns.clear(); + /// Additional columns might only be needed if there are more steps in the chain. + if (!last_reader_in_chain) { - /// Do nothing, it will be added later + for (auto & col : additional_columns) + { + /// Exclude columns that are present in the result block to avoid storing them and filtering twice. + /// TODO: also need to exclude the columns that are not needed for the next steps. + if (block.has(col.name)) + continue; + result.additional_columns.insert(col); + } } - else - throw Exception("Unexpected non-const virtual column: " + column_name, ErrorCodes::LOGICAL_ERROR); - ++pos; } - /// Columns might be projected out. We need to store them here so that default columns can be evaluated later. - result.block_before_prewhere = block; - - if (prewhere_info->actions) - prewhere_info->actions->execute(block); - prewhere_column_pos = block.getPositionByName(prewhere_info->column_name); result.columns.clear(); @@ -1174,90 +1328,38 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r for (auto & col : block) result.columns.emplace_back(std::move(col.column)); - current_step_filter.swap(result.columns[prewhere_column_pos]); - combined_filter = current_step_filter; + current_step_filter = result.columns[prewhere_column_pos]; } - if (result.getFilter()) + if (prewhere_info->remove_column) + result.columns.erase(result.columns.begin() + prewhere_column_pos); + else { - ColumnPtr prev_filter = result.getFilterHolder(); - combined_filter = combineFilters(prev_filter, std::move(combined_filter)); + /// In case when we are not removing prewhere column the caller expects it to serve as a final filter: + /// it must contain 0s not only from the current step but also from all the previous steps. + /// One way to achieve this is to apply the final_filter if we know that the final _filter was not applied at + /// several previous steps but was accumulated instead. + result.can_return_prewhere_column_without_filtering = + (!result.final_filter.present() || result.final_filter.countBytesInFilter() == result.num_rows); } - result.setFilter(combined_filter); + FilterWithCachedCount current_filter(current_step_filter); - /// If there is a WHERE, we filter in there, and only optimize IO and shrink columns here - if (!last_reader_in_chain) - result.optimize(merge_tree_reader->canReadIncompleteGranules(), true); + result.optimize(current_filter, merge_tree_reader->canReadIncompleteGranules()); - /// If we read nothing or filter gets optimized to nothing - if (result.totalRowsPerGranule() == 0) - result.setFilterConstFalse(); - /// If we need to filter in PREWHERE - else if (prewhere_info->need_filter || result.need_filter) + if (prewhere_info->need_filter && !result.filterWasApplied()) { - /// If there is a filter and without optimized - if (result.getFilter() && last_reader_in_chain) - { - const auto * result_filter = result.getFilter(); - /// optimize is not called, need to check const 1 and const 0 - size_t bytes_in_filter = result.countBytesInResultFilter(result_filter->getData()); - if (bytes_in_filter == 0) - result.setFilterConstFalse(); - else if (bytes_in_filter == result.num_rows) - result.setFilterConstTrue(); - } - - /// If there is still a filter, do the filtering now - if (result.getFilter()) - { - /// filter might be shrunk while columns not - const auto * result_filter = result.getFilterOriginal(); - - filterColumns(result.columns, current_step_filter); - - result.need_filter = true; - - bool has_column = false; - for (auto & column : result.columns) - { - if (column) - { - has_column = true; - result.num_rows = column->size(); - break; - } - } - - /// There is only one filter column. Record the actual number - if (!has_column) - result.num_rows = result.countBytesInResultFilter(result_filter->getData()); - } - - /// Check if the PREWHERE column is needed - if (!result.columns.empty()) - { - if (prewhere_info->remove_column) - result.columns.erase(result.columns.begin() + prewhere_column_pos); - else - result.columns[prewhere_column_pos] = - getSampleBlock().getByName(prewhere_info->column_name).type-> - createColumnConst(result.num_rows, 1u)->convertToFullColumnIfConst(); - } - } - /// Filter in WHERE instead - else - { - if (prewhere_info->remove_column) - result.columns.erase(result.columns.begin() + prewhere_column_pos); - else - { - auto type = getSampleBlock().getByName(prewhere_info->column_name).type; - ColumnWithTypeAndName col(result.getFilterHolder()->convertToFullColumnIfConst(), std::make_shared(), ""); - result.columns[prewhere_column_pos] = castColumn(col, type); - result.clearFilter(); // Acting as a flag to not filter in PREWHERE - } + /// Depending on whether the final filter was applied at the previous step or not we need to apply either + /// just the current step filter or the accumulated filter. + FilterWithCachedCount filter_to_apply = + current_filter.size() == result.total_rows_per_granule ? + result.final_filter : + current_filter; + + result.applyFilter(filter_to_apply); } + + LOG_TEST(log, "After execute prewhere {}", result.dumpInfo()); } std::string PrewhereExprInfo::dump() const diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 06f3f5760fb5..039a499e9c18 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -1,6 +1,9 @@ #pragma once #include #include +#include +#include +#include #include namespace DB @@ -34,6 +37,45 @@ struct PrewhereExprInfo std::string dump() const; }; +class FilterWithCachedCount +{ + ConstantFilterDescription const_description; /// TODO: ConstantFilterDescription only checks always true/false for const columns + /// think how to handle when the column in not const but has all 0s or all 1s + ColumnPtr column = nullptr; + const IColumn::Filter * data = nullptr; + mutable size_t cached_count_bytes = -1; + +public: + explicit FilterWithCachedCount() = default; + + explicit FilterWithCachedCount(const ColumnPtr & column_) + : const_description(*column_) + { + ColumnPtr col = column_->convertToFullIfNeeded(); + FilterDescription desc(*col); + column = desc.data_holder ? desc.data_holder : col; + data = desc.data; + } + + bool present() const { return !!column; } + + bool alwaysTrue() const { return const_description.always_true; } + bool alwaysFalse() const { return const_description.always_false; } + + ColumnPtr getColumn() const { return column; } + + const IColumn::Filter & getData() const { return *data; } + + size_t size() const { return column->size(); } + + size_t countBytesInFilter() const + { + if (cached_count_bytes == size_t(-1)) + cached_count_bytes = DB::countBytesInFilter(*data); + return cached_count_bytes; + } +}; + /// MergeTreeReader iterator which allows sequential reading for arbitrary number of rows between pairs of marks in the same part. /// Stores reading state, which can be inside granule. Can skip rows in current granule and start reading from next mark. /// Used generally for reading number of rows less than index granularity to decrease cache misses for fat blocks. @@ -174,53 +216,46 @@ class MergeTreeRangeReader using RangesInfo = std::vector; - const RangesInfo & startedRanges() const { return started_ranges; } - const NumRows & rowsPerGranule() const { return rows_per_granule; } + explicit ReadResult(Poco::Logger * log_) : log(log_) {} static size_t getLastMark(const MergeTreeRangeReader::ReadResult::RangesInfo & ranges); - /// The number of rows were read at LAST iteration in chain. <= num_added_rows + num_filtered_rows. - size_t totalRowsPerGranule() const { return total_rows_per_granule; } - size_t numRowsToSkipInLastGranule() const { return num_rows_to_skip_in_last_granule; } - /// Filter you need to apply to newly-read columns in order to add them to block. - const ColumnUInt8 * getFilterOriginal() const { return filter_original ? filter_original : filter; } - const ColumnUInt8 * getFilter() const { return filter; } - ColumnPtr & getFilterHolder() { return filter_holder; } - void addGranule(size_t num_rows_); void adjustLastGranule(); void addRows(size_t rows) { num_read_rows += rows; } void addRange(const MarkRange & range) { started_ranges.push_back({rows_per_granule.size(), range}); } - /// Set filter or replace old one. Filter must have more zeroes than previous. - void setFilter(const ColumnPtr & new_filter); - /// For each granule calculate the number of filtered rows at the end. Remove them and update filter. - void optimize(bool can_read_incomplete_granules, bool allow_filter_columns); + /// Add current step filter to the result and then for each granule calculate the number of filtered rows at the end. + /// Remove them and update filter. + /// Apply the filter to the columns and update num_rows if required + void optimize(const FilterWithCachedCount & current_filter, bool can_read_incomplete_granules); /// Remove all rows from granules. void clear(); - void clearFilter() { filter = nullptr; } void setFilterConstTrue(); - void setFilterConstFalse(); void addNumBytesRead(size_t count) { num_bytes_read += count; } - void shrink(Columns & old_columns); + /// Shrinks columns according to the diff between current and previous rows_per_granule. + void shrink(Columns & old_columns, const NumRows & rows_per_granule_previous) const; + + /// Applies the filter to the columns and updates num_rows. + void applyFilter(const FilterWithCachedCount & filter); - size_t countBytesInResultFilter(const IColumn::Filter & filter); + /// Verifies that columns and filter sizes match. + /// The checks might be non-trivial so it make sense to have the only in debug builds. + void checkInternalConsistency() const; - /// If this flag is false than filtering form PREWHERE can be delayed and done in WHERE - /// to reduce memory copies and applying heavy filters multiple times - bool need_filter = false; + std::string dumpInfo() const; - Block block_before_prewhere; + /// Contains columns that are not included into result but might be needed for default values calculation. + Block additional_columns; RangesInfo started_ranges; /// The number of rows read from each granule. /// Granule here is not number of rows between two marks /// It's amount of rows per single reading act NumRows rows_per_granule; - NumRows rows_per_granule_original; /// Sum(rows_per_granule) size_t total_rows_per_granule = 0; /// The number of rows was read at first step. May be zero if no read columns present in part. @@ -229,29 +264,36 @@ class MergeTreeRangeReader size_t num_rows_to_skip_in_last_granule = 0; /// Without any filtration. size_t num_bytes_read = 0; - /// nullptr if prev reader hasn't prewhere_actions. Otherwise filter.size() >= total_rows_per_granule. - ColumnPtr filter_holder; - ColumnPtr filter_holder_original; - const ColumnUInt8 * filter = nullptr; - const ColumnUInt8 * filter_original = nullptr; - void collapseZeroTails(const IColumn::Filter & filter, IColumn::Filter & new_filter); + /// This filter has the size of total_rows_per_granule. This means that it can be applied to newly read columns. + /// The result of applying this filter is that only rows that pass all previous filtering steps will remain. + FilterWithCachedCount final_filter; + + /// This flag is true when prewhere column can be returned without filtering. + /// It's true when it contains 0s from all filtering steps (not just the step when it was calculated). + /// NOTE: If we accumulated the final_filter for several steps without applying it then prewhere column calculated at the last step + /// will not contain 0s from all previous steps. + bool can_return_prewhere_column_without_filtering = true; + + /// Checks if result columns have current final_filter applied. + bool filterWasApplied() const { return !final_filter.present() || final_filter.countBytesInFilter() == num_rows; } + + /// Builds updated filter by cutting zeros in granules tails + void collapseZeroTails(const IColumn::Filter & filter, const NumRows & rows_per_granule_previous, IColumn::Filter & new_filter) const; size_t countZeroTails(const IColumn::Filter & filter, NumRows & zero_tails, bool can_read_incomplete_granules) const; static size_t numZerosInTail(const UInt8 * begin, const UInt8 * end); - std::map filter_bytes_map; - - Names extra_columns_filled; + Poco::Logger * log; }; ReadResult read(size_t max_rows, MarkRanges & ranges); - const Block & getSampleBlock() const { return sample_block; } + const Block & getSampleBlock() const { return result_sample_block; } private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); Columns continueReadingChain(const ReadResult & result, size_t & num_rows); - void executePrewhereActionsAndFilterColumns(ReadResult & result); + void executePrewhereActionsAndFilterColumns(ReadResult & result) const; void fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); IMergeTreeReader * merge_tree_reader = nullptr; @@ -261,11 +303,14 @@ class MergeTreeRangeReader Stream stream; - Block sample_block; + Block read_sample_block; /// Block with columns that are actually read from disk + non-const virtual columns that are filled at this step. + Block result_sample_block; /// Block with columns that are returned by this step. bool last_reader_in_chain = false; bool is_initialized = false; Names non_const_virtual_column_names; + + Poco::Logger * log = &Poco::Logger::get("MergeTreeRangeReader"); }; } diff --git a/tests/queries/0_stateless/00609_prewhere_and_default.sql b/tests/queries/0_stateless/00609_prewhere_and_default.sql index 7da809cd140a..f1aa69c13201 100644 --- a/tests/queries/0_stateless/00609_prewhere_and_default.sql +++ b/tests/queries/0_stateless/00609_prewhere_and_default.sql @@ -3,11 +3,25 @@ create table `table_00609` (key UInt64, val UInt64) engine = MergeTree order by insert into `table_00609` select number, number / 8192 from system.numbers limit 100000; alter table `table_00609` add column def UInt64 default val + 1; select * from `table_00609` prewhere val > 2 format Null; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=100; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=1000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=10000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=20000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=30000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=40000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=80000; drop table if exists `table_00609`; create table `table_00609` (key UInt64, val UInt64) engine = MergeTree order by key settings index_granularity=8192; insert into `table_00609` select number, number / 8192 from system.numbers limit 100000; alter table `table_00609` add column def UInt64; select * from `table_00609` prewhere val > 2 format Null; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=100; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=1000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=10000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=20000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=30000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=40000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=80000; drop table if exists `table_00609`; diff --git a/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference new file mode 100644 index 000000000000..b4dfe343bbe6 --- /dev/null +++ b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference @@ -0,0 +1,3 @@ +foo +foo +foo diff --git a/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql new file mode 100644 index 000000000000..244f58b6717d --- /dev/null +++ b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql @@ -0,0 +1,17 @@ +drop table if exists t; + +create table t (id UInt32, a Int) engine = MergeTree order by id settings min_bytes_for_wide_part=0; + +insert into t values (1, 0) (2, 1) (3, 0) (4, 0) (5, 0); +alter table t add column s String default 'foo'; +select s from t prewhere a = 1; + +drop table t; + +create table t (id UInt32, a Int) engine = MergeTree order by id settings min_bytes_for_wide_part=0; + +insert into t values (1, 1) (2, 1) (3, 0) (4, 0) (5, 0); +alter table t add column s String default 'foo'; +select s from t prewhere a = 1; + +drop table t; diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.reference b/tests/queries/0_stateless/01674_filter_by_uint8.reference index 6b522898280d..435423ba4552 100644 --- a/tests/queries/0_stateless/01674_filter_by_uint8.reference +++ b/tests/queries/0_stateless/01674_filter_by_uint8.reference @@ -2,7 +2,12 @@ 0 255 1 ['foo','bar'] 1 1 -2 ['foo','bar'] 2 1 -3 ['foo','bar'] 3 1 -4 ['foo','bar'] 4 1 -5 ['foo','bar'] 5 1 +2 ['foo','bar'] 2 2 +3 ['foo','bar'] 3 3 +4 ['foo','bar'] 4 4 +5 ['foo','bar'] 5 5 +1 ['foo','bar'] 1 1 +2 ['foo','bar'] 2 2 +3 ['foo','bar'] 3 3 +4 ['foo','bar'] 4 4 +5 ['foo','bar'] 5 5 diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.sql b/tests/queries/0_stateless/01674_filter_by_uint8.sql index 960153d9c5af..0bf11cea59be 100644 --- a/tests/queries/0_stateless/01674_filter_by_uint8.sql +++ b/tests/queries/0_stateless/01674_filter_by_uint8.sql @@ -10,5 +10,6 @@ ENGINE = MergeTree ORDER BY u; INSERT INTO t_filter SELECT toString(number), ['foo', 'bar'], number, toUInt8(number) FROM numbers(1000); SELECT * FROM t_filter WHERE f LIMIT 5; +SELECT * FROM t_filter WHERE f != 0 LIMIT 5; DROP TABLE IF EXISTS t_filter; diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference index 0edbea640652..2455f50b7f2a 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference @@ -32,9 +32,27 @@ 0 0 198401_1_1_1 1 1 198401_1_1_1 999998 999998 198401_1_1_1 +0 +1 +2 +0 foo +1 foo +2 foo SOME GRANULES FILTERED OUT 335872 166463369216 166463369216 34464 1510321840 1510321840 301408 164953047376 164953047376 +100000 +100001 +100002 +100000 foo +100001 foo +100002 foo PREWHERE 301408 164953047376 164953047376 +42 +10042 +20042 +42 foo +10042 foo +20042 foo diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql index eb1f01e65f7b..1de6447172d7 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql @@ -24,6 +24,8 @@ INSERT INTO t_1 select rowNumberInAllBlocks(), *, '1984-01-01' from t_random_1 l OPTIMIZE TABLE t_1 FINAL; +ALTER TABLE t_1 ADD COLUMN foo String DEFAULT 'foo'; + SELECT COUNT(DISTINCT(_part)) FROM t_1; SELECT min(_part_offset), max(_part_offset) FROM t_1; @@ -37,13 +39,19 @@ SELECT order_0, _part_offset, _part FROM t_1 WHERE order_0 <= 1 OR (order_0 BETW SELECT order_0, _part_offset, computed FROM t_1 ORDER BY order_0, _part_offset, computed LIMIT 3; SELECT order_0, _part_offset, computed FROM t_1 ORDER BY order_0 DESC, _part_offset DESC, computed DESC LIMIT 3; SELECT order_0, _part_offset, _part FROM t_1 WHERE order_0 <= 1 OR order_0 >= 999998 ORDER BY order_0 LIMIT 3; +SELECT _part_offset FROM t_1 ORDER BY order_0 LIMIT 3; +SELECT _part_offset, foo FROM t_1 ORDER BY order_0 LIMIT 3; SELECT 'SOME GRANULES FILTERED OUT'; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 where granule == 0; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 where granule == 0 AND _part_offset < 100000; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 where granule == 0 AND _part_offset >= 100000; +SELECT _part_offset FROM t_1 where granule == 0 AND _part_offset >= 100000 ORDER BY order_0 LIMIT 3; +SELECT _part_offset, foo FROM t_1 where granule == 0 AND _part_offset >= 100000 ORDER BY order_0 LIMIT 3; SELECT 'PREWHERE'; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere granule == 0 where _part_offset >= 100000; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10 } SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10 } +SELECT _part_offset FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; +SELECT _part_offset, foo FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; diff --git a/tests/queries/0_stateless/02460_prewhere_row_level_policy.reference b/tests/queries/0_stateless/02460_prewhere_row_level_policy.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02460_prewhere_row_level_policy.sql b/tests/queries/0_stateless/02460_prewhere_row_level_policy.sql new file mode 100644 index 000000000000..fc98fa773b4e --- /dev/null +++ b/tests/queries/0_stateless/02460_prewhere_row_level_policy.sql @@ -0,0 +1,9 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/40956#issuecomment-1262096612 +DROP TABLE IF EXISTS row_level_policy_prewhere; +DROP ROW POLICY IF EXISTS row_level_policy_prewhere_policy0 ON row_level_policy_prewhere; + +CREATE TABLE row_level_policy_prewhere (x Int16, y String) ENGINE = MergeTree ORDER BY x; +INSERT INTO row_level_policy_prewhere(y, x) VALUES ('A',1), ('B',2), ('C',3); +CREATE ROW POLICY row_level_policy_prewhere_policy0 ON row_level_policy_prewhere FOR SELECT USING x >= 0 TO default; +SELECT * FROM row_level_policy_prewhere PREWHERE y = 'foo'; +DROP TABLE row_level_policy_prewhere; diff --git a/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 new file mode 100644 index 000000000000..ca7b300e00e5 --- /dev/null +++ b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 @@ -0,0 +1,29 @@ +{% for index_granularity in [999, 1000, 1001, 9999, 10000, 10001] -%} +-- { echoOn } + +SELECT count() FROM url_na_log; +130000 +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +130000 +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; +110000 +-- Delete more than a half rows (60K) from the range 2022-08-10 .. 2022-08-20 +-- There should be 50K rows remaining in this range +DELETE FROM url_na_log WHERE SiteId = 209 AND DateVisit >= '2022-08-13' AND DateVisit <= '2022-08-18'; +SELECT count() FROM url_na_log; +70000 +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +130000 +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; +50000 +-- Hide more than a half of remaining rows (30K) from the range 2022-08-10 .. 2022-08-20 using row policy +-- Now the this range should have 20K rows left +CREATE ROW POLICY url_na_log_policy0 ON url_na_log FOR SELECT USING DateVisit < '2022-08-11' or DateVisit > '2022-08-19' TO default; +SELECT count() FROM url_na_log; +40000 +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +130000 +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; +20000 +DROP ROW POLICY url_na_log_policy0 ON url_na_log; +{% endfor -%} diff --git a/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 new file mode 100644 index 000000000000..e1ec348e6ac2 --- /dev/null +++ b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 @@ -0,0 +1,59 @@ +{% for index_granularity in [999, 1000, 1001, 9999, 10000, 10001] %} + +DROP TABLE IF EXISTS url_na_log; + +CREATE TABLE url_na_log(SiteId UInt32, DateVisit Date, PRIMARY KEY (SiteId)) +ENGINE = MergeTree() +ORDER BY (SiteId, DateVisit) +SETTINGS index_granularity = {{ index_granularity }}, min_bytes_for_wide_part = 0; + +-- Insert some data to have 110K rows in the range 2022-08-10 .. 2022-08-20 and some more rows before and after that range +insert into url_na_log select 209, '2022-08-09' from numbers(10000); +insert into url_na_log select 209, '2022-08-10' from numbers(10000); +insert into url_na_log select 209, '2022-08-11' from numbers(10000); +insert into url_na_log select 209, '2022-08-12' from numbers(10000); +insert into url_na_log select 209, '2022-08-13' from numbers(10000); +insert into url_na_log select 209, '2022-08-14' from numbers(10000); +insert into url_na_log select 209, '2022-08-15' from numbers(10000); +insert into url_na_log select 209, '2022-08-16' from numbers(10000); +insert into url_na_log select 209, '2022-08-17' from numbers(10000); +insert into url_na_log select 209, '2022-08-18' from numbers(10000); +insert into url_na_log select 209, '2022-08-19' from numbers(10000); +insert into url_na_log select 209, '2022-08-20' from numbers(10000); +insert into url_na_log select 209, '2022-08-21' from numbers(10000); + + +SET mutations_sync=2; +SET allow_experimental_lightweight_delete=1; + +OPTIMIZE TABLE url_na_log FINAL; + +-- { echoOn } + +SELECT count() FROM url_na_log; +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; + + +-- Delete more than a half rows (60K) from the range 2022-08-10 .. 2022-08-20 +-- There should be 50K rows remaining in this range +DELETE FROM url_na_log WHERE SiteId = 209 AND DateVisit >= '2022-08-13' AND DateVisit <= '2022-08-18'; + +SELECT count() FROM url_na_log; +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; + + +-- Hide more than a half of remaining rows (30K) from the range 2022-08-10 .. 2022-08-20 using row policy +-- Now the this range should have 20K rows left +CREATE ROW POLICY url_na_log_policy0 ON url_na_log FOR SELECT USING DateVisit < '2022-08-11' or DateVisit > '2022-08-19' TO default; + +SELECT count() FROM url_na_log; +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; + +DROP ROW POLICY url_na_log_policy0 ON url_na_log; + +-- { echoOff } + +{% endfor %} diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.python b/tests/queries/0_stateless/02473_multistep_prewhere.python new file mode 100644 index 000000000000..a12656f636b4 --- /dev/null +++ b/tests/queries/0_stateless/02473_multistep_prewhere.python @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +import requests +import os +import sys + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, 'helpers')) + +from pure_http_client import ClickHouseClient + + +class Tester: + ''' + - Creates test table + - Deletes the specified range of rows + - Masks another range using row-level policy + - Runs some read queries and checks that the results + ''' + def __init__(self, session, url, index_granularity, total_rows): + self.session = session + self.url = url + self.index_granularity = index_granularity + self.total_rows = total_rows + self.reported_errors = set() + self.repro_queries = [] + + def report_error(self): + print('Repro steps:', '\n\n\t'.join(self.repro_queries)) + exit(1) + + def query(self, query_text, include_in_repro_steps = True, expected_data = None): + self.repro_queries.append(query_text) + resp = self.session.post(self.url, data=query_text) + if resp.status_code != 200: + # Group similar errors + error = resp.text[0:40] + if error not in self.reported_errors: + self.reported_errors.add(error) + print('Code:', resp.status_code) + print('Result:', resp.text) + self.report_error() + + result = resp.text + # Check that the result is as expected + if ((not expected_data is None) and (int(result) != len(expected_data))): + print('Expected {} rows, got {}'.format(len(expected_data), result)) + print('Expected data:' + str(expected_data)) + self.report_error() + + if not include_in_repro_steps: + self.repro_queries.pop() + + + def check_data(self, all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + all_data_after_delete = all_data[ + ~((all_data.a == 0) & + (all_data.b > delete_range_start) & + (all_data.b <= delete_range_end))] + all_data_after_row_policy = all_data_after_delete[ + (all_data_after_delete.b <= row_level_policy_range_start) | + (all_data_after_delete.b > row_level_policy_range_end)] + + for to_select in ['count()', 'sum(d)']: # Test reading with and without column with default value + self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data_after_row_policy) + + delta = 10 + for query_range_start in [0, delta]: + for query_range_end in [self.total_rows - delta]: #, self.total_rows]: + expected = all_data_after_row_policy[ + (all_data_after_row_policy.a == 0) & + (all_data_after_row_policy.b > query_range_start) & + (all_data_after_row_policy.b <= query_range_end)] + self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format( + to_select, query_range_start, query_range_end), False, expected) + + expected = all_data_after_row_policy[ + (all_data_after_row_policy.a == 0) & + (all_data_after_row_policy.c > query_range_start) & + (all_data_after_row_policy.c <= query_range_end)] + self.query('SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;'.format( + to_select, query_range_start, query_range_end), False, expected) + + expected = all_data_after_row_policy[ + (all_data_after_row_policy.a == 0) & + ((all_data_after_row_policy.c <= query_range_start) | + (all_data_after_row_policy.c > query_range_end))] + self.query('SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;'.format( + to_select, query_range_start, query_range_end), False, expected) + + + def run_test(self, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + self.repro_queries = [] + + self.query(''' + CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, PRIMARY KEY (a)) + ENGINE = MergeTree() ORDER BY (a, b) + SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity)) + + self.query('INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});'.format(self.total_rows)) + + client = ClickHouseClient() + all_data = client.query_return_df("SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;") + + self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;') + + # After all data has been written add a column with default value + self.query('ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;') + + self.check_data(all_data, -100, -100, -100, -100) + + self.query('DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};'.format( + delete_range_start, delete_range_end)) + + self.check_data(all_data, delete_range_start, delete_range_end, -100, -100) + + self.query('CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;'.format( + row_level_policy_range_start, row_level_policy_range_end)) + + self.check_data(all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + + self.query('DROP POLICY policy_tab_02473 ON tab_02473;') + + self.query('DROP TABLE tab_02473;') + + + +def main(): + # Set mutations to synchronous mode and enable lightweight DELETE's + url = os.environ['CLICKHOUSE_URL'] + '&mutations_sync=2&allow_experimental_lightweight_delete=1&max_threads=1' + + default_index_granularity = 10; + total_rows = 8 * default_index_granularity + step = default_index_granularity + session = requests.Session() + for index_granularity in [default_index_granularity-1, default_index_granularity]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]: + tester = Tester(session, url, index_granularity, total_rows) + # Test combinations of ranges of various size masked by lightweight DELETES + # along with ranges of various size masked by row-level policies + for delete_range_start in range(0, total_rows, 3 * step): + for delete_range_end in range(delete_range_start + 3 * step, total_rows, 2 * step): + for row_level_policy_range_start in range(0, total_rows, 3 * step): + for row_level_policy_range_end in range(row_level_policy_range_start + 3 * step, total_rows, 2 * step): + tester.run_test(delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + + +if __name__ == "__main__": + main() + diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.reference b/tests/queries/0_stateless/02473_multistep_prewhere.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.sh b/tests/queries/0_stateless/02473_multistep_prewhere.sh new file mode 100755 index 000000000000..bbb411b0a32a --- /dev/null +++ b/tests/queries/0_stateless/02473_multistep_prewhere.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python3 "$CURDIR"/02473_multistep_prewhere.python + diff --git a/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference new file mode 100644 index 000000000000..bb8ce4a83962 --- /dev/null +++ b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference @@ -0,0 +1,76 @@ +-- { echoOn } +CREATE TABLE test_filter(a Int32, b Int32, c Int32) ENGINE = MergeTree() ORDER BY a SETTINGS index_granularity = 3; +INSERT INTO test_filter SELECT number, number+1, (number/2 + 1) % 2 FROM numbers(15); +SELECT _part_offset, intDiv(_part_offset, 3) as granule, * FROM test_filter ORDER BY _part_offset; +0 0 0 1 1 +1 0 1 2 1 +2 0 2 3 0 +3 1 3 4 0 +4 1 4 5 1 +5 1 5 6 1 +6 2 6 7 0 +7 2 7 8 0 +8 2 8 9 1 +9 3 9 10 1 +10 3 10 11 0 +11 3 11 12 0 +12 4 12 13 1 +13 4 13 14 1 +14 4 14 15 0 +-- Check that division by zero occurs on some rows +SELECT intDiv(b, c) FROM test_filter; -- { serverError ILLEGAL_DIVISION } +-- Filter out those rows using WHERE or PREWHERE +SELECT intDiv(b, c) FROM test_filter WHERE c != 0; +1 +2 +5 +6 +9 +10 +13 +14 +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0; +1 +2 +5 +6 +9 +10 +13 +14 +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0 WHERE b%2 != 0; +1 +5 +9 +13 +SET mutations_sync = 2, allow_experimental_lightweight_delete = 1; +-- Delete all rows where division by zero could occur +DELETE FROM test_filter WHERE c = 0; +-- Test that now division by zero doesn't occur without explicit condition +SELECT intDiv(b, c) FROM test_filter; +1 +2 +5 +6 +9 +10 +13 +14 +SELECT * FROM test_filter PREWHERE intDiv(b, c) > 0; +0 1 1 +1 2 1 +4 5 1 +5 6 1 +8 9 1 +9 10 1 +12 13 1 +13 14 1 +SELECT * FROM test_filter PREWHERE b != 0 WHERE intDiv(b, c) > 0; +0 1 1 +1 2 1 +4 5 1 +5 6 1 +8 9 1 +9 10 1 +12 13 1 +13 14 1 diff --git a/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql new file mode 100644 index 000000000000..94ffb1b87302 --- /dev/null +++ b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS test_filter; + +-- { echoOn } +CREATE TABLE test_filter(a Int32, b Int32, c Int32) ENGINE = MergeTree() ORDER BY a SETTINGS index_granularity = 3; + +INSERT INTO test_filter SELECT number, number+1, (number/2 + 1) % 2 FROM numbers(15); + +SELECT _part_offset, intDiv(_part_offset, 3) as granule, * FROM test_filter ORDER BY _part_offset; + +-- Check that division by zero occurs on some rows +SELECT intDiv(b, c) FROM test_filter; -- { serverError ILLEGAL_DIVISION } +-- Filter out those rows using WHERE or PREWHERE +SELECT intDiv(b, c) FROM test_filter WHERE c != 0; +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0; +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0 WHERE b%2 != 0; + + +SET mutations_sync = 2, allow_experimental_lightweight_delete = 1; + +-- Delete all rows where division by zero could occur +DELETE FROM test_filter WHERE c = 0; +-- Test that now division by zero doesn't occur without explicit condition +SELECT intDiv(b, c) FROM test_filter; +SELECT * FROM test_filter PREWHERE intDiv(b, c) > 0; +SELECT * FROM test_filter PREWHERE b != 0 WHERE intDiv(b, c) > 0; + +-- { echoOff } +DROP TABLE test_filter; diff --git a/tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference new file mode 100644 index 000000000000..85adb1850d4d --- /dev/null +++ b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference @@ -0,0 +1,110 @@ +-- { echoOn } +SELECT * FROM table_02513; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143011 +143012 +143013 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 WHERE n%11; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143013 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 PREWHERE n%11; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143013 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 WHERE n%11 AND n%13; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 PREWHERE n%11 WHERE n%13; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 WHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 PREWHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 WHERE n%143011 AND n%13; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 PREWHERE n%143011 WHERE n%13; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } diff --git a/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql new file mode 100644 index 000000000000..771893ce6746 --- /dev/null +++ b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS table_02513; + +CREATE TABLE table_02513 (n UInt64) ENGINE=MergeTree() ORDER BY tuple() SETTINGS index_granularity=100; + +INSERT INTO table_02513 SELECT number+11*13*1000 FROM numbers(20); + +SET allow_experimental_lightweight_delete=1; +SET mutations_sync=2; +SET max_threads=1; + +DELETE FROM table_02513 WHERE n%10=0; + +-- { echoOn } +SELECT * FROM table_02513; +SELECT * FROM table_02513 WHERE n%11; +SELECT * FROM table_02513 PREWHERE n%11; +SELECT * FROM table_02513 WHERE n%11 AND n%13; +SELECT * FROM table_02513 PREWHERE n%11 WHERE n%13; + +SELECT * FROM table_02513 WHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 PREWHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 WHERE n%143011 AND n%13; +SELECT * FROM table_02513 PREWHERE n%143011 WHERE n%13; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +-- { echoOff } + +DROP TABLE table_02513; From c68f898ba0e16ceda81160b3d91ffee8cddd408f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 30 Dec 2022 14:03:13 +0000 Subject: [PATCH 012/342] Backport #44705 to 22.12: Fix crash on delete from materialized view --- src/Interpreters/InterpreterDeleteQuery.cpp | 74 ++++++++++--------- src/Storages/IStorage.h | 4 + src/Storages/RocksDB/StorageEmbeddedRocksDB.h | 2 + ...02319_lightweight_delete_on_merge_tree.sql | 2 +- ...2518_delete_on_materialized_view.reference | 0 .../02518_delete_on_materialized_view.sql | 14 ++++ 6 files changed, 61 insertions(+), 35 deletions(-) create mode 100644 tests/queries/0_stateless/02518_delete_on_materialized_view.reference create mode 100644 tests/queries/0_stateless/02518_delete_on_materialized_view.sql diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index b5b8ae81366a..720a179a9d47 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes { extern const int TABLE_IS_READ_ONLY; extern const int SUPPORT_IS_DISABLED; + extern const int BAD_ARGUMENTS; } @@ -58,8 +59,7 @@ BlockIO InterpreterDeleteQuery::execute() auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto merge_tree = std::dynamic_pointer_cast(table); - if (!merge_tree) + if (table->supportsDelete()) { /// Convert to MutationCommand MutationCommands mutation_commands; @@ -75,39 +75,45 @@ BlockIO InterpreterDeleteQuery::execute() table->mutate(mutation_commands, getContext()); return {}; } + else if (table->supportsLightweightDelete()) + { + if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it"); + + /// Convert to MutationCommand + MutationCommands mutation_commands; + MutationCommand mut_command; + + /// Build "UPDATE _row_exists = 0 WHERE predicate" query + mut_command.type = MutationCommand::Type::UPDATE; + mut_command.predicate = delete_query.predicate; + + auto command = std::make_shared(); + command->type = ASTAlterCommand::UPDATE; + command->predicate = delete_query.predicate; + command->update_assignments = std::make_shared(); + auto set_row_does_not_exist = std::make_shared(); + set_row_does_not_exist->column_name = LightweightDeleteDescription::FILTER_COLUMN.name; + auto zero_value = std::make_shared(DB::Field(UInt8(0))); + set_row_does_not_exist->children.push_back(zero_value); + command->update_assignments->children.push_back(set_row_does_not_exist); + command->children.push_back(command->predicate); + command->children.push_back(command->update_assignments); + mut_command.column_to_update_expression[set_row_does_not_exist->column_name] = zero_value; + mut_command.ast = command->ptr(); + + mutation_commands.emplace_back(mut_command); + + table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); + table->mutate(mutation_commands, getContext()); - if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it"); - - /// Convert to MutationCommand - MutationCommands mutation_commands; - MutationCommand mut_command; - - /// Build "UPDATE _row_exists = 0 WHERE predicate" query - mut_command.type = MutationCommand::Type::UPDATE; - mut_command.predicate = delete_query.predicate; - - auto command = std::make_shared(); - command->type = ASTAlterCommand::UPDATE; - command->predicate = delete_query.predicate; - command->update_assignments = std::make_shared(); - auto set_row_does_not_exist = std::make_shared(); - set_row_does_not_exist->column_name = LightweightDeleteDescription::FILTER_COLUMN.name; - auto zero_value = std::make_shared(DB::Field(UInt8(0))); - set_row_does_not_exist->children.push_back(zero_value); - command->update_assignments->children.push_back(set_row_does_not_exist); - command->children.push_back(command->predicate); - command->children.push_back(command->update_assignments); - mut_command.column_to_update_expression[set_row_does_not_exist->column_name] = zero_value; - mut_command.ast = command->ptr(); - - mutation_commands.emplace_back(mut_command); - - table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); - table->mutate(mutation_commands, getContext()); - - return {}; + return {}; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "DELETE query is not supported for table"); + } } } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index f018a7112845..d6cbb4fc38aa 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -241,6 +241,10 @@ class IStorage : public std::enable_shared_from_this, public TypePromo /// Return true if storage can execute lightweight delete mutations. virtual bool supportsLightweightDelete() const { return false; } + /// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete + /// because those are internally translated into 'ALTER UDPATE' mutations. + virtual bool supportsDelete() const { return false; } + private: StorageID storage_id; diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index ca0ab7a18408..eec817acd557 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -80,6 +80,8 @@ class StorageEmbeddedRocksDB final : public IStorage, public IKeyValueEntity, Wi const std::vector & keys, PaddedPODArray * out_null_map) const; + bool supportsDelete() const override { return true; } + private: const String primary_key; using RocksDBPtr = std::unique_ptr; diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql index 67513a1cdff6..c7f8b67e7406 100644 --- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql +++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql @@ -103,7 +103,7 @@ ALTER TABLE t_proj ADD PROJECTION p_1 (SELECT avg(a), avg(b), count()); INSERT INTO t_proj SELECT number + 1, number + 1 FROM numbers(1000); -DELETE FROM t_proj WHERE a < 100; -- { serverError NOT_IMPLEMENTED } +DELETE FROM t_proj WHERE a < 100; -- { serverError BAD_ARGUMENTS } SELECT avg(a), avg(b), count() FROM t_proj; diff --git a/tests/queries/0_stateless/02518_delete_on_materialized_view.reference b/tests/queries/0_stateless/02518_delete_on_materialized_view.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02518_delete_on_materialized_view.sql b/tests/queries/0_stateless/02518_delete_on_materialized_view.sql new file mode 100644 index 000000000000..73abca4ea53c --- /dev/null +++ b/tests/queries/0_stateless/02518_delete_on_materialized_view.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS kek; +DROP TABLE IF EXISTS kekv; + +CREATE TABLE kek (a UInt32) ENGINE = MergeTree ORDER BY a; +CREATE MATERIALIZED VIEW kekv ENGINE = MergeTree ORDER BY tuple() AS SELECT * FROM kek; + +INSERT INTO kek VALUES (1); +DELETE FROM kekv WHERE a = 1; -- { serverError BAD_ARGUMENTS} + +SET allow_experimental_lightweight_delete=1; +DELETE FROM kekv WHERE a = 1; -- { serverError BAD_ARGUMENTS} + +DROP TABLE IF EXISTS kek; +DROP TABLE IF EXISTS kekv; From dd372851642457d46ed863923a780983b793ed27 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 30 Dec 2022 18:02:50 +0000 Subject: [PATCH 013/342] Backport #44224 to 22.12: Fix version parsing from compatibility setting --- src/Core/Settings.cpp | 13 ++++++------- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 4 +++- .../02503_bad_compatibility_setting.reference | 1 + .../0_stateless/02503_bad_compatibility_setting.sql | 3 +++ 5 files changed, 14 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02503_bad_compatibility_setting.reference create mode 100644 tests/queries/0_stateless/02503_bad_compatibility_setting.sql diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 7bac3f04fc66..fa1a10d22f20 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -148,31 +148,30 @@ std::vector Settings::getAllRegisteredNames() const void Settings::set(std::string_view name, const Field & value) { - BaseSettings::set(name, value); - if (name == "compatibility") - applyCompatibilitySetting(); + applyCompatibilitySetting(value.get()); /// If we change setting that was changed by compatibility setting before /// we should remove it from settings_changed_by_compatibility_setting, /// otherwise the next time we will change compatibility setting /// this setting will be changed too (and we don't want it). else if (settings_changed_by_compatibility_setting.contains(name)) settings_changed_by_compatibility_setting.erase(name); + + BaseSettings::set(name, value); } -void Settings::applyCompatibilitySetting() +void Settings::applyCompatibilitySetting(const String & compatibility_value) { /// First, revert all changes applied by previous compatibility setting for (const auto & setting_name : settings_changed_by_compatibility_setting) resetToDefault(setting_name); settings_changed_by_compatibility_setting.clear(); - String compatibility = getString("compatibility"); /// If setting value is empty, we don't need to change settings - if (compatibility.empty()) + if (compatibility_value.empty()) return; - ClickHouseVersion version(compatibility); + ClickHouseVersion version(compatibility_value); /// Iterate through ClickHouse version in descending order and apply reversed /// changes for each version that is higher that version from compatibility setting for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index acf11ca31481..f77651326bec 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -922,7 +922,7 @@ struct Settings : public BaseSettings, public IHints<2, Settings void setDefaultValue(const String & name) { resetToDefault(name); } private: - void applyCompatibilitySetting(); + void applyCompatibilitySetting(const String & compatibility); std::unordered_set settings_changed_by_compatibility_setting; }; diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 62b3c1b9c987..0c637c6d345d 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -28,7 +29,8 @@ class ClickHouseVersion for (const auto & split_element : split) { size_t component; - if (!tryParse(component, split_element)) + ReadBufferFromString buf(split_element); + if (!tryReadIntText(component, buf) || !buf.eof()) throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; components.push_back(component); } diff --git a/tests/queries/0_stateless/02503_bad_compatibility_setting.reference b/tests/queries/0_stateless/02503_bad_compatibility_setting.reference new file mode 100644 index 000000000000..5b7d2a449a01 --- /dev/null +++ b/tests/queries/0_stateless/02503_bad_compatibility_setting.reference @@ -0,0 +1 @@ + 0 diff --git a/tests/queries/0_stateless/02503_bad_compatibility_setting.sql b/tests/queries/0_stateless/02503_bad_compatibility_setting.sql new file mode 100644 index 000000000000..178c6a875318 --- /dev/null +++ b/tests/queries/0_stateless/02503_bad_compatibility_setting.sql @@ -0,0 +1,3 @@ +set compatibility='a.a'; -- { serverError BAD_ARGUMENTS } +select value, changed from system.settings where name = 'compatibility' + From a9a1c0839bd54b731caecd640392b0576e51659a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 6 Jan 2023 09:03:00 +0000 Subject: [PATCH 014/342] Backport #44956 to 22.12: Do not check read result consistency when unwinding --- .../MergeTree/MergeTreeRangeReader.cpp | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index ac5c3b1db2d6..6f8da624449e 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -516,11 +516,13 @@ void MergeTreeRangeReader::ReadResult::optimize(const FilterWithCachedCount & cu LOG_TEST(log, "ReadResult::optimize() before: {}", dumpInfo()); - SCOPE_EXIT(checkInternalConsistency()); - - SCOPE_EXIT({ - LOG_TEST(log, "ReadResult::optimize() after: {}", dumpInfo()); - }); + SCOPE_EXIT( + if (!std::uncaught_exceptions()) + { + checkInternalConsistency(); + LOG_TEST(log, "ReadResult::optimize() after: {}", dumpInfo()); + } + ); if (total_zero_rows_in_tails == filter.size()) { @@ -924,10 +926,11 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar ReadResult read_result(log); - SCOPE_EXIT({ - LOG_TEST(log, "read() returned {}, sample block {}", - read_result.dumpInfo(), this->result_sample_block.dumpNames()); - }); + SCOPE_EXIT( + if (!std::uncaught_exceptions()) + LOG_TEST(log, "read() returned {}, sample block {}", + read_result.dumpInfo(), this->result_sample_block.dumpNames()); + ); if (prev_reader) { From 0b746f99d392a60014fbe85df6f6799ff64a3e21 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 6 Jan 2023 16:15:21 +0100 Subject: [PATCH 015/342] Update version to 22.12.3.1 --- cmake/autogenerated_versions.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 48251d5e40fb..a97f3afb1abf 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54469) SET(VERSION_MAJOR 22) SET(VERSION_MINOR 12) -SET(VERSION_PATCH 2) -SET(VERSION_GITHASH 688e488e930c83eefeac4f87c4cc029cc5b231e3) -SET(VERSION_DESCRIBE v22.12.2.1-stable) -SET(VERSION_STRING 22.12.2.1) +SET(VERSION_PATCH 3) +SET(VERSION_GITHASH c790cfd4465bdf9a8c474837c27aa314ef6f61bd) +SET(VERSION_DESCRIBE v22.12.3.1-stable) +SET(VERSION_STRING 22.12.3.1) # end of autochange From 962b63911d1abc36cf322dab274ef30da1a9cd1e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 10 Jan 2023 02:27:13 +0000 Subject: [PATCH 016/342] Backport #44959 to 22.12: fix alter table ttl error when wide part has light weight delete mask --- src/Interpreters/MutationsInterpreter.cpp | 7 +++ ...02521_lightweight_delete_and_ttl.reference | 25 ++++++++++ .../02521_lightweight_delete_and_ttl.sql | 46 +++++++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference create mode 100644 tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 26b8bce1f4a7..e363366b1a8b 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -434,6 +434,13 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) const ProjectionsDescription & projections_desc = metadata_snapshot->getProjections(); NamesAndTypesList all_columns = columns_desc.getAllPhysical(); + /// Add _row_exists column if it is physically present in the part + if (auto part_storage = dynamic_pointer_cast(storage)) + { + if (part_storage->hasLightweightDeletedMask()) + all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); + } + NameSet updated_columns; bool materialize_ttl_recalculate_only = materializeTTLRecalculateOnly(storage); diff --git a/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference new file mode 100644 index 000000000000..3b40d9048cd5 --- /dev/null +++ b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference @@ -0,0 +1,25 @@ +-- { echoOn } +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +Rows in parts 100000 +SELECT 'Count', count() FROM lwd_test_02521; +Count 100000 +DELETE FROM lwd_test_02521 WHERE id < 25000; +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +Rows in parts 100000 +SELECT 'Count', count() FROM lwd_test_02521; +Count 50000 +ALTER TABLE lwd_test_02521 MODIFY TTL event_time + INTERVAL 1 MONTH SETTINGS mutations_sync = 1; +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +Rows in parts 50000 +SELECT 'Count', count() FROM lwd_test_02521; +Count 25000 +ALTER TABLE lwd_test_02521 DELETE WHERE id >= 40000 SETTINGS mutations_sync = 1; +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +Rows in parts 40000 +SELECT 'Count', count() FROM lwd_test_02521; +Count 15000 +OPTIMIZE TABLE lwd_test_02521 FINAL SETTINGS mutations_sync = 1; +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +Rows in parts 15000 +SELECT 'Count', count() FROM lwd_test_02521; +Count 15000 diff --git a/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql new file mode 100644 index 000000000000..cf2fe452e4bc --- /dev/null +++ b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql @@ -0,0 +1,46 @@ +DROP TABLE IF EXISTS lwd_test_02521; + +CREATE TABLE lwd_test_02521 (id UInt64, value String, event_time DateTime) +ENGINE MergeTree() +ORDER BY id +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO lwd_test_02521 SELECT number, randomString(10), now() - INTERVAL 2 MONTH FROM numbers(50000); +INSERT INTO lwd_test_02521 SELECT number, randomString(10), now() FROM numbers(50000); + +OPTIMIZE TABLE lwd_test_02521 FINAL SETTINGS mutations_sync = 1; + +SET mutations_sync=1; +SET allow_experimental_lightweight_delete = 1; + +-- { echoOn } +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +SELECT 'Count', count() FROM lwd_test_02521; + + +DELETE FROM lwd_test_02521 WHERE id < 25000; + +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +SELECT 'Count', count() FROM lwd_test_02521; + + +ALTER TABLE lwd_test_02521 MODIFY TTL event_time + INTERVAL 1 MONTH SETTINGS mutations_sync = 1; + +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +SELECT 'Count', count() FROM lwd_test_02521; + + +ALTER TABLE lwd_test_02521 DELETE WHERE id >= 40000 SETTINGS mutations_sync = 1; + +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +SELECT 'Count', count() FROM lwd_test_02521; + + +OPTIMIZE TABLE lwd_test_02521 FINAL SETTINGS mutations_sync = 1; + +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +SELECT 'Count', count() FROM lwd_test_02521; + +-- { echoOff } + +DROP TABLE lwd_test_02521; \ No newline at end of file From f6448e74ec780184f700c3f16e2b8659281ba755 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 8 Jun 2022 10:14:03 -0700 Subject: [PATCH 017/342] Kusto-phase1: Add Support to Kusto Query Language This is the initial implement of Kusto Query Language. in this commit, we support the following features as MVP : Tabular expression statements Limit returned results Select Column (basic project) sort, order Perform string equality operations Filter using a list of elements Filter using common string operations Some string operators Aggregate by columns Base aggregate functions only support avg, count ,min, max, sum Aggregate by time intervals --- src/Parsers/Kusto/ParserKQLFilter.cpp | 25 +- src/Parsers/Kusto/ParserKQLLimit.cpp | 51 +++- src/Parsers/Kusto/ParserKQLOperators.cpp | 182 ++---------- src/Parsers/Kusto/ParserKQLOperators.h | 13 +- src/Parsers/Kusto/ParserKQLProject.cpp | 30 +- src/Parsers/Kusto/ParserKQLProject.h | 6 + src/Parsers/Kusto/ParserKQLQuery.cpp | 343 ++++------------------- src/Parsers/Kusto/ParserKQLQuery.h | 19 +- src/Parsers/Kusto/ParserKQLSort.cpp | 35 ++- src/Parsers/Kusto/ParserKQLStatement.cpp | 57 +--- src/Parsers/Kusto/ParserKQLStatement.h | 7 - src/Parsers/Kusto/ParserKQLSummarize.cpp | 153 +++++++--- src/Parsers/Kusto/ParserKQLSummarize.h | 4 +- src/Parsers/Kusto/ParserKQLTable.cpp | 27 +- src/Parsers/Kusto/ParserKQLTable.h | 3 +- src/Parsers/tests/gtest_Parser.cpp | 32 +-- 16 files changed, 382 insertions(+), 605 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 3a399bdccdb1..ad7ad807d03a 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -10,15 +10,28 @@ namespace DB bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - String expr = getExprFromToken(pos); - ASTPtr where_expression; + if (op_pos.empty()) + return true; + Pos begin = pos; + String expr; - Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos pos_filter(token_filter, pos.max_depth); - if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected)) + KQLOperators convetor; + + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + if (expr.empty()) + expr = "(" + convetor.getExprFromToken(pos) +")"; + else + expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + } + + Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(tokenFilter, pos.max_depth); + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) return false; - node->as()->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + pos = begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index bb8e08fd3786..7811ebba9abb 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -2,26 +2,55 @@ #include #include #include -#include #include -#include namespace DB { bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ASTPtr limit_length; - - auto expr = getExprFromToken(pos); - - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); - - if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected)) + if (op_pos.empty()) + return true; + + auto begin = pos; + Int64 minLimit = -1; + auto final_pos = pos; + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + auto isNumber = [&] + { + for (auto ch = pos->begin ; ch < pos->end; ++ch) + { + if (!isdigit(*ch)) + return false; + } + return true; + }; + + if (!isNumber()) + return false; + + auto limitLength = std::strtol(pos->begin,nullptr, 10); + if (-1 == minLimit) + { + minLimit = limitLength; + final_pos = pos; + } + else + { + if (minLimit > limitLength) + { + minLimit = limitLength; + final_pos = pos; + } + } + } + + if (!ParserExpressionWithOptionalAlias(false).parse(final_pos, node, expected)) return false; - node->as()->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); + pos = begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index f8e4f9eaab05..1db05d3c07a3 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,8 +1,6 @@ #include #include #include -#include -#include namespace DB { @@ -12,126 +10,50 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Pos &token_pos,String kql_op, String ch_op) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) { - String new_expr; - Expected expected; - ParserToken s_lparen(TokenType::OpeningRoundBracket); + String new_expr, leftWildcards= "", rightWildcards=""; - ++token_pos; - if (!s_lparen.ignore(token_pos, expected)) - throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); - - auto haystack = tokens.back(); - - String logic_op = (kql_op == "has_all") ? " and " : " or "; - - while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) - { - auto tmp_arg = String(token_pos->begin, token_pos->end); - if (token_pos->type == TokenType::Comma) - new_expr = new_expr + logic_op; - else - new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; - - ++token_pos; - if (token_pos->type == TokenType::ClosingRoundBracket) - break; - - } - - tokens.pop_back(); - return new_expr; -} - -String KQLOperators::genInOpExpr(IParser::Pos &token_pos, String kql_op, String ch_op) -{ - String new_expr; - - ParserToken s_lparen(TokenType::OpeningRoundBracket); - - ASTPtr select; - Expected expected; - - ++token_pos; - if (!s_lparen.ignore(token_pos, expected)) - throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); - - --token_pos; - --token_pos; - return ch_op; - -} - -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) -{ - String new_expr, left_wildcards, right_wildcards, left_space, right_space; - - switch (wildcards_pos) + switch (wildcardsPos) { case WildcardsPos::none: break; case WildcardsPos::left: - left_wildcards ="%"; + leftWildcards ="%"; break; case WildcardsPos::right: - right_wildcards = "%"; + rightWildcards = "%"; break; case WildcardsPos::both: - left_wildcards ="%"; - right_wildcards = "%"; + leftWildcards ="%"; + rightWildcards = "%"; break; } - switch (space_pos) - { - case WildcardsPos::none: - break; - - case WildcardsPos::left: - left_space =" "; - break; - - case WildcardsPos::right: - right_space = " "; - break; - - case WildcardsPos::both: - left_space =" "; - right_space = " "; - break; - } - - ++token_pos; - - if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; - else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) - { - auto tmp_arg = String(token_pos->begin, token_pos->end); - new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; - } + if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) + new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; else - throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } -bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) +String KQLOperators::getExprFromToken(IParser::Pos pos) { - auto begin = pos; + String res; + std::vector tokens; - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - KQLOperatorValue op_value = KQLOperatorValue::none; + KQLOperatorValue opValue = KQLOperatorValue::none; auto token = String(pos->begin,pos->end); String op = token; - if (token == "!") + if ( token == "!" ) { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -162,27 +84,16 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) else --pos; } - else - --pos; - if (KQLOperator.find(op) == KQLOperator.end()) - { - pos = begin; - return false; - } - - op_value = KQLOperator[op]; + if (KQLOperator.find(op) != KQLOperator.end()) + opValue = KQLOperator[op]; String new_expr; - - if (op_value == KQLOperatorValue::none) + if (opValue == KQLOperatorValue::none) tokens.push_back(op); else { - auto last_op = tokens.back(); - auto last_pos = pos; - - switch (op_value) + switch (opValue) { case KQLOperatorValue::contains: new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); @@ -221,7 +132,7 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) case KQLOperatorValue::not_equal: break; - + case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -229,6 +140,7 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; + case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; @@ -238,11 +150,9 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) break; case KQLOperatorValue::has_all: - new_expr = genHasAnyAllOpExpr(tokens, pos, "has_all", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_any: - new_expr = genHasAnyAllOpExpr(tokens, pos, "has_any", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_cs: @@ -254,67 +164,35 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) break; case KQLOperatorValue::hasprefix: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hasprefix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hassuffix: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::hassuffix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::in_cs: - new_expr = genInOpExpr(pos,op,"in"); + new_expr = "in"; break; - + case KQLOperatorValue::not_in_cs: - new_expr = genInOpExpr(pos,op,"not in"); + new_expr = "not in"; break; case KQLOperatorValue::in: @@ -349,11 +227,13 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) tokens.push_back(new_expr); } - return true; + ++pos; } - pos = begin; - return false; -} + for (auto it=tokens.begin(); it!=tokens.end(); ++it) + res = res + *it + " "; + + return res; } +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9796ae10c07c..9beeeda55efc 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -2,14 +2,13 @@ #include #include -#include + namespace DB { -class KQLOperators -{ +class KQLOperators { public: - bool convert(std::vector &tokens,IParser::Pos &pos); + String getExprFromToken(IParser::Pos pos); protected: enum class WildcardsPos:uint8_t @@ -83,7 +82,7 @@ class KQLOperators {"hasprefix" , KQLOperatorValue::hasprefix}, {"!hasprefix" , KQLOperatorValue::not_hasprefix}, {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, - {"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, {"hassuffix" , KQLOperatorValue::hassuffix}, {"!hassuffix" , KQLOperatorValue::not_hassuffix}, {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, @@ -98,9 +97,7 @@ class KQLOperators {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); - static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); - static String genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op); + String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); }; } diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index e978323d8215..fee8cdb612b6 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -6,20 +6,42 @@ namespace DB bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ASTPtr select_expression_list; + auto begin = pos; String expr; + if (op_pos.empty()) + expr = "*"; + else + { + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it ; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + if (pos->type == TokenType::BareWord) + { + String tmp(pos->begin,pos->end); - expr = getExprFromToken(pos); + if (it != op_pos.begin() && columns.find(tmp) == columns.end()) + return false; + columns.insert(tmp); + } + ++pos; + } + } + expr = getExprFromToken(op_pos.back()); + } Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos new_pos(tokens, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) + if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) return false; - node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + pos = begin; return true; } + + } diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h index b64675beed09..3ab3c82f1be3 100644 --- a/src/Parsers/Kusto/ParserKQLProject.h +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -8,9 +8,15 @@ namespace DB class ParserKQLProject : public ParserKQLBase { +public: + void addColumn(String column) {columns.insert(column);} + protected: const char * getName() const override { return "KQL project"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + std::unordered_set columns; }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 04ee36705a9a..0a9fa1fc4df0 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -7,335 +7,116 @@ #include #include #include +#include #include -#include -#include -#include -#include -#include -#include namespace DB { -String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) -{ - Tokens tokens(text.c_str(), text.c_str() + text.size()); - IParser::Pos pos(tokens, max_depth); - - return getExprFromToken(pos); -} - -String ParserKQLBase :: getExprFromPipe(Pos & pos) +bool ParserKQLBase :: parsePrepare(Pos & pos) { - uint16_t bracket_count = 0; - auto begin = pos; - auto end = pos; - while (!end->isEnd() && end->type != TokenType::Semicolon) - { - if (end->type == TokenType::OpeningRoundBracket) - ++bracket_count; - - if (end->type == TokenType::OpeningRoundBracket) - --bracket_count; - - if (end->type == TokenType::PipeMark && bracket_count == 0) - break; - - ++end; - } - --end; - return String(begin->begin, end->end); + op_pos.push_back(pos); + return true; } -String ParserKQLBase :: getExprFromToken(Pos & pos) +String ParserKQLBase :: getExprFromToken(Pos pos) { String res; - std::vector tokens; - String alias; - - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + while (!pos->isEnd() && pos->type != TokenType::PipeMark) { - String token = String(pos->begin,pos->end); - - if (token == "=") - { - ++pos; - if (String(pos->begin,pos->end) != "~") - { - alias = tokens.back(); - tokens.pop_back(); - } - --pos; - } - else if (!KQLOperators().convert(tokens,pos)) - { - tokens.push_back(token); - } - - if (pos->type == TokenType::Comma && !alias.empty()) - { - tokens.pop_back(); - tokens.push_back("AS"); - tokens.push_back(alias); - tokens.push_back(","); - alias.clear(); - } + res = res + String(pos->begin,pos->end) +" "; ++pos; } - - if (!alias.empty()) - { - tokens.push_back("AS"); - tokens.push_back(alias); - } - - for (auto const &token : tokens) - res = res.empty()? token : res +" " + token; return res; } -std::unique_ptr ParserKQLQuery::getOperator(String & op_name) -{ - if (op_name == "filter" || op_name == "where") - return std::make_unique(); - else if (op_name == "limit" || op_name == "take") - return std::make_unique(); - else if (op_name == "project") - return std::make_unique(); - else if (op_name == "sort by" || op_name == "order by") - return std::make_unique(); - else if (op_name == "summarize") - return std::make_unique(); - else if (op_name == "table") - return std::make_unique(); - else - return nullptr; -} - bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - struct KQLOperatorDataFlowState - { - String operator_name; - bool need_input; - bool gen_output; - int8_t backspace_steps; // how many steps to last token of previous pipe - }; - auto select_query = std::make_shared(); node = select_query; - ASTPtr tables; - std::unordered_map kql_parser = - { - { "filter", {"filter", false, false, 3}}, - { "where", {"filter", false, false, 3}}, - { "limit", {"limit", false, true, 3}}, - { "take", {"limit", false, true, 3}}, - { "project", {"project", false, false, 3}}, - { "sort by", {"order by", false, false, 4}}, - { "order by", {"order by", false, false, 4}}, - { "table", {"table", false, false, 3}}, - { "summarize", {"summarize", true, true, 3}} + ParserKQLFilter KQLfilter_p; + ParserKQLLimit KQLlimit_p; + ParserKQLProject KQLproject_p; + ParserKQLSort KQLsort_p; + ParserKQLSummarize KQLsummarize_p; + ParserKQLTable KQLtable_p; + + ASTPtr select_expression_list; + ASTPtr tables; + ASTPtr where_expression; + ASTPtr group_expression_list; + ASTPtr order_expression_list; + ASTPtr limit_length; + + std::unordered_map KQLParser = { + { "filter",&KQLfilter_p}, + { "where",&KQLfilter_p}, + { "limit",&KQLlimit_p}, + { "take",&KQLlimit_p}, + { "project",&KQLproject_p}, + { "sort",&KQLsort_p}, + { "order",&KQLsort_p}, + { "summarize",&KQLsummarize_p}, + { "table",&KQLtable_p} }; std::vector> operation_pos; - String table_name(pos->begin, pos->end); - - operation_pos.push_back(std::make_pair("table", pos)); - ++pos; - uint16_t bracket_count = 0; + operation_pos.push_back(std::make_pair("table",pos)); - while (!pos->isEnd() && pos->type != TokenType::Semicolon) + while (!pos->isEnd()) { - if (pos->type == TokenType::OpeningRoundBracket) - ++bracket_count; - if (pos->type == TokenType::OpeningRoundBracket) - --bracket_count; - - if (pos->type == TokenType::PipeMark && bracket_count == 0) + ++pos; + if (pos->type == TokenType::PipeMark) { ++pos; - String kql_operator(pos->begin, pos->end); - if (kql_operator == "order" || kql_operator == "sort") - { - ++pos; - ParserKeyword s_by("by"); - if (s_by.ignore(pos,expected)) - { - kql_operator = "order by"; - --pos; - } - } - if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) + String KQLoperator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(kql_operator, pos)); + operation_pos.push_back(std::make_pair(KQLoperator,pos)); } - else - ++pos; } - auto kql_operator_str = operation_pos.back().first; - auto npos = operation_pos.back().second; - if (!npos.isValid()) - return false; - - auto kql_operator_p = getOperator(kql_operator_str); - - if (!kql_operator_p) - return false; - - if (operation_pos.size() == 1) - { - if (!kql_operator_p->parse(npos, node, expected)) - return false; - } - else if (operation_pos.size() == 2 && operation_pos.front().first == "table") + for (auto &op_pos : operation_pos) { - if (!kql_operator_p->parse(npos, node, expected)) + auto KQLoperator = op_pos.first; + auto npos = op_pos.second; + if (!npos.isValid()) return false; - npos = operation_pos.front().second; - if (!ParserKQLTable().parse(npos, node, expected)) - return false; - } - else - { - String project_clause, order_clause, where_clause, limit_clause; - auto last_pos = operation_pos.back().second; - auto last_op = operation_pos.back().first; - - auto set_main_query_clause =[&](String & op, Pos & op_pos) - { - auto op_str = ParserKQLBase::getExprFromPipe(op_pos); - if (op == "project") - project_clause = op_str; - else if (op == "where" || op == "filter") - where_clause = where_clause.empty() ? std::format("({})", op_str) : where_clause + std::format("AND ({})", op_str); - else if (op == "limit" || op == "take") - limit_clause = op_str; - else if (op == "order by" || op == "sort by") - order_clause = order_clause.empty() ? op_str : order_clause + "," + op_str; - }; - - set_main_query_clause(last_op, last_pos); - - operation_pos.pop_back(); - - if (kql_parser[last_op].need_input) - { - if (!kql_operator_p->parse(npos, node, expected)) - return false; - } - else - { - while (!operation_pos.empty()) - { - auto prev_op = operation_pos.back().first; - auto prev_pos = operation_pos.back().second; - - if (kql_parser[prev_op].gen_output) - break; - if (!project_clause.empty() && prev_op == "project") - break; - set_main_query_clause(prev_op, prev_pos); - operation_pos.pop_back(); - last_op = prev_op; - last_pos = prev_pos; - } - } - - if (!operation_pos.empty()) - { - for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) - --last_pos; - - String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); - Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); - IParser::Pos pos_subquery(token_subquery, pos.max_depth); - - if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) - return false; - select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); - } - else - { - if (!ParserKQLTable().parse(last_pos, node, expected)) - return false; - } - - auto set_query_clasue =[&](String op_str, String op_calsue) - { - auto oprator = getOperator(op_str); - if (oprator) - { - Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); - IParser::Pos pos_clause(token_clause, pos.max_depth); - if (!oprator->parse(pos_clause, node, expected)) - return false; - } - return true; - }; - - if (!select_query->select()) - { - if (project_clause.empty()) - project_clause = "*"; - if (!set_query_clasue("project", project_clause)) - return false; - } - - if (!order_clause.empty()) - if (!set_query_clasue("order by", order_clause)) - return false; - if (!where_clause.empty()) - if (!set_query_clasue("where", where_clause)) - return false; - - if (!limit_clause.empty()) - if (!set_query_clasue("limit", limit_clause)) - return false; - return true; - } - - if (!select_query->select()) - { - auto expr = String("*"); - Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); - if (!std::make_unique()->parse(new_pos, node, expected)) + if (!KQLParser[KQLoperator]->parsePrepare(npos)) return false; } - return true; -} - -bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ASTPtr select_node; - - if (!ParserKQLTaleFunction().parse(pos, select_node, expected)) + if (!KQLtable_p.parse(pos, tables, expected)) return false; - ASTPtr node_subquery = std::make_shared(); - node_subquery->children.push_back(select_node); + if (!KQLproject_p.parse(pos, select_expression_list, expected)) + return false; - ASTPtr node_table_expr = std::make_shared(); - node_table_expr->as()->subquery = node_subquery; + if (!KQLlimit_p.parse(pos, limit_length, expected)) + return false; - node_table_expr->children.emplace_back(node_subquery); + if (!KQLfilter_p.parse(pos, where_expression, expected)) + return false; - ASTPtr node_table_in_select_query_emlement = std::make_shared(); - node_table_in_select_query_emlement->as()->table_expression = node_table_expr; + if (!KQLsort_p.parse(pos, order_expression_list, expected)) + return false; - ASTPtr res = std::make_shared(); + if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + return false; + else + group_expression_list = KQLsummarize_p.group_expression_list; - res->children.emplace_back(node_table_in_select_query_emlement); + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); - node = res; return true; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 42f5f84f0317..25aa4e6b83c2 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -1,31 +1,24 @@ #pragma once #include -#include namespace DB { class ParserKQLBase : public IParserBase { public: - static String getExprFromToken(Pos & pos); - static String getExprFromPipe(Pos & pos); - static String getExprFromToken(const String & text, const uint32_t & max_depth); -}; - -class ParserKQLQuery : public IParserBase -{ + virtual bool parsePrepare(Pos & pos) ; protected: - static std::unique_ptr getOperator(String &op_name); - const char * getName() const override { return "KQL query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + std::vector op_pos; + std::vector expresions; + virtual String getExprFromToken(Pos pos); }; -class ParserKQLSubquery : public IParserBase +class ParserKQLQuery : public IParserBase { protected: - const char * getName() const override { return "KQL subquery"; } + const char * getName() const override { return "KQL query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index f7540d729fdd..9f226c2fc824 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -10,50 +10,61 @@ namespace DB bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + if (op_pos.empty()) + return true; + + auto begin = pos; bool has_dir = false; std::vector has_directions; ParserOrderByExpressionList order_list; ASTPtr order_expression_list; - auto expr = getExprFromToken(pos); + ParserKeyword by("by"); + + pos = op_pos.back(); // sort only affected by last one - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + if (!by.ignore(pos, expected)) + return false; - auto pos_backup = new_pos; - if (!order_list.parse(pos_backup, order_expression_list, expected)) + if (!order_list.parse(pos,order_expression_list,expected)) + return false; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) return false; - while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon) + pos = op_pos.back(); + while (!pos->isEnd() && pos->type != TokenType::PipeMark) { - String tmp(new_pos->begin, new_pos->end); + String tmp(pos->begin,pos->end); if (tmp == "desc" or tmp == "asc") has_dir = true; - if (new_pos->type == TokenType::Comma) + if (pos->type == TokenType::Comma) { has_directions.push_back(has_dir); has_dir = false; } - ++new_pos; + + ++pos; } has_directions.push_back(has_dir); - for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) + for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) { if (!has_directions[i]) { - auto *order_expr = order_expression_list->children[i]->as(); + auto order_expr = order_expression_list->children[i]->as(); order_expr->direction = -1; // default desc if (!order_expr->nulls_direction_was_explicitly_specified) order_expr->nulls_direction = -1; else order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; + } } - node->as()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + node = order_expression_list; + pos =begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 21e480234d39..7dea87eef25d 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB { @@ -22,10 +21,10 @@ bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery kql_p; + ParserKQLWithUnionQuery KQL_p; ASTPtr query; - bool parsed = kql_p.parse(pos, query, expected); + bool parsed = KQL_p.parse(pos, query, expected); if (!parsed) return false; @@ -36,19 +35,20 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ASTPtr kql_query; + // will support union next phase + ASTPtr KQLQuery; - if (!ParserKQLQuery().parse(pos, kql_query, expected)) + if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) return false; - if (kql_query->as()) + if (KQLQuery->as()) { - node = std::move(kql_query); + node = std::move(KQLQuery); return true; } auto list_node = std::make_shared(); - list_node->children.push_back(kql_query); + list_node->children.push_back(KQLQuery); auto select_with_union_query = std::make_shared(); node = select_with_union_query; @@ -58,45 +58,4 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } -bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKQLWithUnionQuery kql_p; - ASTPtr select; - ParserToken s_lparen(TokenType::OpeningRoundBracket); - - auto begin = pos; - auto paren_count = 0 ; - String kql_statement; - - if (s_lparen.ignore(pos, expected)) - { - ++paren_count; - while (!pos->isEnd()) - { - if (pos->type == TokenType::ClosingRoundBracket) - --paren_count; - if (pos->type == TokenType::OpeningRoundBracket) - ++paren_count; - - if (paren_count == 0) - break; - - kql_statement = kql_statement + " " + String(pos->begin,pos->end); - ++pos; - } - - Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); - IParser::Pos pos_kql(token_kql, pos.max_depth); - - if (kql_p.parse(pos_kql, select, expected)) - { - node = select; - ++pos; - return true; - } - } - pos = begin; - return false; -}; - } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index ef44b2d6c8ac..1eed2d008451 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -41,12 +41,5 @@ class ParserKQLWithUnionQuery : public IParserBase bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -class ParserKQLTaleFunction : public IParserBase -{ -protected: - const char * getName() const override { return "KQL() function"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 75eacb1adbd2..f7422c02bca1 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,3 +1,7 @@ +#include +#include +#include +#include #include #include #include @@ -10,71 +14,148 @@ #include #include #include +#include #include #include #include #include - namespace DB { +std::pair removeLastWord(String input) +{ + std::istringstream ss(input); + std::string token; + std::vector temp; + + while (std::getline(ss, token, ' ')) + { + temp.push_back(token); + } + + String firstPart; + for (std::size_t i = 0; i < temp.size() - 1; i++) + { + firstPart += temp[i]; + } + + return std::make_pair(firstPart, temp[temp.size() - 1]); +} + bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ASTPtr select_expression_list; - ASTPtr group_expression_list; + if (op_pos.empty()) + return true; + if (op_pos.size() != 1) // now only support one summarize + return false; - String expr_aggregation; - String expr_groupby; - String expr_columns; - bool groupby = false; + //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName + + //summarize has syntax : + + //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] + + //right now , we only support: + + //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] + //Aggregation -> the Aggregation function on column + //GroupExpression - > columns auto begin = pos; - auto pos_groupby = pos; + + pos = op_pos.back(); + String exprAggregation; + String exprGroupby; + String exprColumns; + + bool groupby = false; + bool bin_function = false; + String bin_column; + String last_string; + String column_name; + int character_passed = 0; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { if (String(pos->begin, pos->end) == "by") - { groupby = true; - auto end = pos; - --end; - expr_aggregation = begin <= end ? String(begin->begin, end->end) : ""; - pos_groupby = pos; - ++pos_groupby; + else + { + if (groupby) + { + if (String(pos->begin, pos->end) == "bin") + { + exprGroupby = exprGroupby + "round" + " "; + bin_function = true; + } + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + + if (bin_function && last_string == "(") + { + bin_column = String(pos->begin, pos->end); + bin_function = false; + } + + last_string = String(pos->begin, pos->end); + } + + else + { + if (String(pos->begin, pos->end) == "=") + { + std::pair temp = removeLastWord(exprAggregation); + exprAggregation = temp.first; + column_name = temp.second; + } + else + { + if (!column_name.empty()) + { + exprAggregation = exprAggregation + String(pos->begin, pos->end); + character_passed++; + if (String(pos->begin, pos->end) == ")") // was 4 + { + exprAggregation = exprAggregation + " AS " + column_name; + column_name = ""; + } + } + else + { + exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + } + } + } } ++pos; } - --pos; - if (groupby) - expr_groupby = String(pos_groupby->begin, pos->end); - else - expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; - auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; - expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; + if(!bin_column.empty()) + exprGroupby = exprGroupby + " AS " + bin_column; - String converted_columns = getExprFromToken(expr_columns, pos.max_depth); - - Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); - IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); - - if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) + if (exprGroupby.empty()) + exprColumns = exprAggregation; + else + { + if (exprAggregation.empty()) + exprColumns = exprGroupby; + else + exprColumns = exprGroupby + "," + exprAggregation; + } + Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); + IParser::Pos posColumns(tokenColumns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) return false; - node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); - if (groupby) { - String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); - - Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); - IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); - - if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) + Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); + IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) return false; - node->as()->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); } + pos = begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1aad02705dfd..426ac29fe6a9 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -8,10 +8,12 @@ namespace DB class ParserKQLSummarize : public ParserKQLBase { - +public: + ASTPtr group_expression_list; protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + }; } diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 6356ad688b67..8d450799785d 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,14 +3,23 @@ #include #include #include -#include + namespace DB { +bool ParserKQLTable :: parsePrepare(Pos & pos) +{ + if (!op_pos.empty()) + return false; + + op_pos.push_back(pos); + return true; +} + bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords - ({ + ( { "SELECT", "INSERT", "CREATE", @@ -33,9 +42,14 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "TRUNCATE", "USE", "EXPLAIN" - }); + } ); + + if (op_pos.empty()) + return false; + + auto begin = pos; + pos = op_pos.back(); - ASTPtr tables; String table_name(pos->begin,pos->end); String table_name_upcase(table_name); @@ -44,10 +58,9 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) return false; - if (!ParserTablesInSelectQuery().parse(pos, tables, expected)) + if (!ParserTablesInSelectQuery().parse(pos, node, expected)) return false; - - node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + pos = begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h index c67dcb151562..1266b6e732d5 100644 --- a/src/Parsers/Kusto/ParserKQLTable.h +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -8,10 +8,11 @@ namespace DB class ParserKQLTable : public ParserKQLBase { - protected: const char * getName() const override { return "KQL Table"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool parsePrepare(Pos &pos) override; + }; } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index b452bd276429..8ffc5f77f90c 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -316,19 +316,19 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | project FirstName,LastName,Occupation | take 1 | take 3", - "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 1\n)\nLIMIT 3" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" }, { "Customers | project FirstName,LastName,Occupation | take 3 | take 1", - "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)\nLIMIT 1" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", - "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" + "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", - "SELECT\n FirstName,\n LastName,\n Education\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" + "throws Syntax error" }, { "Customers | sort by FirstName desc", @@ -336,7 +336,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | take 3 | order by FirstName desc", - "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC" + "SELECT *\nFROM Customers\nORDER BY FirstName DESC\nLIMIT 3" }, { "Customers | sort by FirstName asc", @@ -360,7 +360,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | sort by FirstName | order by Age ", - "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName DESC" + "SELECT *\nFROM Customers\nORDER BY Age DESC" }, { "Customers | sort by FirstName nulls first", @@ -408,27 +408,27 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "SELECT *\nFROM Customers\nWHERE (Education = 'Bachelors') AND (Age > 30)" + "throws Syntax error" }, { "Customers |summarize count() by Occupation", - "SELECT\n count(),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n count()\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize sum(Age) by Occupation", - "SELECT\n sum(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n sum(Age)\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize avg(Age) by Occupation", - "SELECT\n avg(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n avg(Age)\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize min(Age) by Occupation", - "SELECT\n min(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n min(Age)\nFROM Customers\nGROUP BY Occupation" }, { "Customers |summarize max(Age) by Occupation", - "SELECT\n max(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" }, { "Customers | where FirstName contains 'pet'", @@ -467,11 +467,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" }, { - "Customers | where FirstName startswith 'pet'", - "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" - }, - { - "Customers | where FirstName !startswith 'pet'", - "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" + "Customers|summarize count() by bin(Age, 10) ", + "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" } }))); From 32a4d909f922f88ed59379e01c7a26cb59e106b8 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 018/342] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 102 ++++++++++++++++++----- src/Parsers/Kusto/ParserKQLSummarize.h | 5 +- 2 files changed, 83 insertions(+), 24 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index f7422c02bca1..24473118dc04 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,7 +1,9 @@ #include #include -#include +//#include #include +#include +#include #include #include #include @@ -19,16 +21,21 @@ #include #include #include + namespace DB { -std::pair removeLastWord(String input) +std::pair ParserKQLSummarize::removeLastWord(String input) { - std::istringstream ss(input); - std::string token; + ReadBufferFromString in(input); + String token; std::vector temp; - while (std::getline(ss, token, ' ')) + while (!in.eof()) { + readStringUntilWhitespace(token, in); + if (in.eof()) + break; + skipWhitespaceIfAny(in); temp.push_back(token); } @@ -37,10 +44,65 @@ std::pair removeLastWord(String input) { firstPart += temp[i]; } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair("", ""); } +String ParserKQLSummarize::getBinGroupbyString(String exprBin) +{ + String column_name; + bool bracket_start = false; + bool comma_start = false; + String bin_duration; + + for (std::size_t i = 0; i < exprBin.size(); i++) + { + if (comma_start && exprBin[i] != ')') + bin_duration += exprBin[i]; + if (exprBin[i] == ',') + { + comma_start = true; + bracket_start = false; + } + if (bracket_start == true) + column_name += exprBin[i]; + if (exprBin[i] == '(') + bracket_start = true; + } + + + std::size_t len = bin_duration.size(); + char bin_type = bin_duration[len - 1]; // y, d, h, m, s + if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) + { + return "toInt32(" + column_name + "/" + bin_duration + ") * " + bin_duration + " AS bin_int"; + } + bin_duration = bin_duration.substr(0, len - 1); + + switch (bin_type) + { + case 'y': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / (12*30*86400)) / " + bin_duration + ") * (" + + bin_duration + " * (12*30*86400))) AS bin_year"; + case 'd': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 86400) / " + bin_duration + ") * (" + bin_duration + + " * 86400)) AS bin_day"; + case 'h': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 3600) / " + bin_duration + ") * (" + bin_duration + + " * 3600)) AS bin_hour"; + case 'm': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 60) / " + bin_duration + ") * (" + bin_duration + + " * 60)) AS bin_minute"; + case 's': + return "toDateTime(" + column_name + ") AS bin_sec"; + default: + return ""; + } +} bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -67,7 +129,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String exprAggregation; String exprGroupby; String exprColumns; - + String exprBin; bool groupby = false; bool bin_function = false; String bin_column; @@ -83,21 +145,20 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin") + if (String(pos->begin, pos->end) == "bin" || bin_function == true) { - exprGroupby = exprGroupby + "round" + " "; bin_function = true; + exprBin += String(pos->begin, pos->end); + if (String(pos->begin, pos->end) == ")") + { + exprBin = getBinGroupbyString(exprBin); + exprGroupby += exprBin; + bin_function = false; + } } + else exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; - - if (bin_function && last_string == "(") - { - bin_column = String(pos->begin, pos->end); - bin_function = false; - } - - last_string = String(pos->begin, pos->end); } else @@ -114,13 +175,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { exprAggregation = exprAggregation + String(pos->begin, pos->end); character_passed++; - if (String(pos->begin, pos->end) == ")") // was 4 + if (String(pos->begin, pos->end) == ")") { exprAggregation = exprAggregation + " AS " + column_name; column_name = ""; } } - else + else if (!bin_function) { exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; } @@ -130,9 +191,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if(!bin_column.empty()) - exprGroupby = exprGroupby + " AS " + bin_column; - if (exprGroupby.empty()) exprColumns = exprAggregation; else diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 426ac29fe6a9..1420d5ce5198 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -5,15 +5,16 @@ namespace DB { - class ParserKQLSummarize : public ParserKQLBase { public: ASTPtr group_expression_list; + protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - + std::pair removeLastWord(String input); + String getBinGroupbyString(String exprBin); }; } From aeb9acf22ef7850c4d91f5937e376fb508e2630b Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:18:49 -0700 Subject: [PATCH 019/342] added single unit test case for summarize bin() --- src/Parsers/tests/gtest_Parser.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 8ffc5f77f90c..6d33ed20f333 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -430,6 +430,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers |summarize max(Age) by Occupation", "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" }, + { + "Customers |summarize count() by bin(Age, 10)", + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" + } { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" From 886310054a6fa02a2b4abfe3db59349a25c73064 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:29:51 -0700 Subject: [PATCH 020/342] removed unwanted comments --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 24473118dc04..0260902c9379 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,6 +1,5 @@ #include #include -//#include #include #include #include @@ -111,17 +110,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName - - //summarize has syntax : - - //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] - - //right now , we only support: - - //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] - //Aggregation -> the Aggregation function on column - //GroupExpression - > columns auto begin = pos; From 00b96690171ac8fa80e5c0dd2f21d2d4e1e3e7f3 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 12:06:15 -0700 Subject: [PATCH 021/342] corrected unit test --- src/Parsers/tests/gtest_Parser.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 6d33ed20f333..1ce82cab3bd8 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -432,8 +432,8 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers |summarize count() by bin(Age, 10)", - "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" - } + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count()\nFROM Customers\nGROUP BY bin_int" + }, { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" @@ -469,9 +469,5 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" - }, - { - "Customers|summarize count() by bin(Age, 10) ", - "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" } }))); From 2fd3eb9e360d3d50beb039b9039dae16fea2389f Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 18:49:22 -0700 Subject: [PATCH 022/342] Kusto-phase1 : Add new test cases --- src/Parsers/tests/gtest_Parser.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 1ce82cab3bd8..ee1e5fa6d8c3 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -469,5 +469,13 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" } }))); From 25b0508bc8047f18734688bd9bfce618f21bb510 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 22:17:58 -0700 Subject: [PATCH 023/342] Kusto-phase1: Fixed the bug for KQL filer with multiple operations --- src/Parsers/Kusto/ParserKQLOperators.cpp | 2 ++ src/Parsers/tests/gtest_Parser.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 1db05d3c07a3..726f28308eef 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -84,6 +84,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) else --pos; } + else + --pos; if (KQLOperator.find(op) != KQLOperator.end()) opValue = KQLOperator[op]; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index ee1e5fa6d8c3..cb0b49aecbbf 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -408,7 +408,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "throws Syntax error" + "SELECT *\nFROM Customers\nWHERE (Age > 30) AND (Education = 'Bachelors')" }, { "Customers |summarize count() by Occupation", From 4c8fbd4bdc22db8089d0ff05632f44bf0ddbb4d8 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 024/342] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLFilter.cpp | 11 ++-- src/Parsers/Kusto/ParserKQLLimit.cpp | 25 ++++---- src/Parsers/Kusto/ParserKQLOperators.cpp | 34 +++++------ src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 56 ++++++++--------- src/Parsers/Kusto/ParserKQLSort.cpp | 4 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 14 ++--- src/Parsers/Kusto/ParserKQLSummarize.cpp | 76 ++++++++++++------------ src/Parsers/Kusto/ParserKQLSummarize.h | 4 +- 9 files changed, 111 insertions(+), 115 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index ad7ad807d03a..466370f5d803 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -17,17 +17,16 @@ bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) KQLOperators convetor; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po : op_pos) { - pos = *it; if (expr.empty()) - expr = "(" + convetor.getExprFromToken(pos) +")"; + expr = "(" + convetor.getExprFromToken(op_po) +")"; else - expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; } - Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos pos_filter(tokenFilter, pos.max_depth); + Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(token_filter, pos.max_depth); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 7811ebba9abb..4f7eddd96625 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -13,14 +13,13 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; auto begin = pos; - Int64 minLimit = -1; + Int64 min_limit = -1; auto final_pos = pos; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po: op_pos) { - pos = *it; - auto isNumber = [&] + auto is_number = [&] { - for (auto ch = pos->begin ; ch < pos->end; ++ch) + for (const auto *ch = op_po->begin ; ch < op_po->end; ++ch) { if (!isdigit(*ch)) return false; @@ -28,21 +27,21 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; }; - if (!isNumber()) + if (!is_number()) return false; - auto limitLength = std::strtol(pos->begin,nullptr, 10); - if (-1 == minLimit) + auto limit_length = std::strtol(op_po->begin,nullptr, 10); + if (-1 == min_limit) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } else { - if (minLimit > limitLength) + if (min_limit > limit_length) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 726f28308eef..90b37ba8aea5 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -10,33 +10,33 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) { - String new_expr, leftWildcards= "", rightWildcards=""; + String new_expr, left_wildcards, right_wildcards; - switch (wildcardsPos) + switch (wildcards_pos) { case WildcardsPos::none: break; case WildcardsPos::left: - leftWildcards ="%"; + left_wildcards ="%"; break; case WildcardsPos::right: - rightWildcards = "%"; + right_wildcards = "%"; break; case WildcardsPos::both: - leftWildcards ="%"; - rightWildcards = "%"; + left_wildcards ="%"; + right_wildcards = "%"; break; } - if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) - new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else - throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } @@ -48,7 +48,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - KQLOperatorValue opValue = KQLOperatorValue::none; + KQLOperatorValue op_value = KQLOperatorValue::none; auto token = String(pos->begin,pos->end); @@ -88,14 +88,14 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) --pos; if (KQLOperator.find(op) != KQLOperator.end()) - opValue = KQLOperator[op]; + op_value = KQLOperator[op]; String new_expr; - if (opValue == KQLOperatorValue::none) + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { - switch (opValue) + switch (op_value) { case KQLOperatorValue::contains: new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); @@ -192,7 +192,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::in_cs: new_expr = "in"; break; - + case KQLOperatorValue::not_in_cs: new_expr = "not in"; break; @@ -232,8 +232,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) ++pos; } - for (auto it=tokens.begin(); it!=tokens.end(); ++it) - res = res + *it + " "; + for (auto & token : tokens) + res = res + token + " "; return res; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9beeeda55efc..4a9a13cf14fa 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -97,7 +97,7 @@ class KQLOperators { {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 0a9fa1fc4df0..55aade6b2b92 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -35,12 +35,12 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto select_query = std::make_shared(); node = select_query; - ParserKQLFilter KQLfilter_p; - ParserKQLLimit KQLlimit_p; - ParserKQLProject KQLproject_p; - ParserKQLSort KQLsort_p; - ParserKQLSummarize KQLsummarize_p; - ParserKQLTable KQLtable_p; + ParserKQLFilter kql_filter_p; + ParserKQLLimit kql_limit_p; + ParserKQLProject kql_project_p; + ParserKQLSort kql_sort_p; + ParserKQLSummarize kql_summarize_p; + ParserKQLTable kql_table_p; ASTPtr select_expression_list; ASTPtr tables; @@ -49,16 +49,16 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr order_expression_list; ASTPtr limit_length; - std::unordered_map KQLParser = { - { "filter",&KQLfilter_p}, - { "where",&KQLfilter_p}, - { "limit",&KQLlimit_p}, - { "take",&KQLlimit_p}, - { "project",&KQLproject_p}, - { "sort",&KQLsort_p}, - { "order",&KQLsort_p}, - { "summarize",&KQLsummarize_p}, - { "table",&KQLtable_p} + std::unordered_map kql_parser = { + { "filter",&kql_filter_p}, + { "where",&kql_filter_p}, + { "limit",&kql_limit_p}, + { "take",&kql_limit_p}, + { "project",&kql_project_p}, + { "sort",&kql_sort_p}, + { "order",&kql_sort_p}, + { "summarize",&kql_summarize_p}, + { "table",&kql_table_p} }; std::vector> operation_pos; @@ -71,44 +71,44 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (pos->type == TokenType::PipeMark) { ++pos; - String KQLoperator(pos->begin,pos->end); - if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + String kql_operator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(KQLoperator,pos)); + operation_pos.push_back(std::make_pair(kql_operator,pos)); } } for (auto &op_pos : operation_pos) { - auto KQLoperator = op_pos.first; + auto kql_operator = op_pos.first; auto npos = op_pos.second; if (!npos.isValid()) return false; - if (!KQLParser[KQLoperator]->parsePrepare(npos)) + if (!kql_parser[kql_operator]->parsePrepare(npos)) return false; } - if (!KQLtable_p.parse(pos, tables, expected)) + if (!kql_table_p.parse(pos, tables, expected)) return false; - if (!KQLproject_p.parse(pos, select_expression_list, expected)) + if (!kql_project_p.parse(pos, select_expression_list, expected)) return false; - if (!KQLlimit_p.parse(pos, limit_length, expected)) + if (!kql_limit_p.parse(pos, limit_length, expected)) return false; - if (!KQLfilter_p.parse(pos, where_expression, expected)) + if (!kql_filter_p.parse(pos, where_expression, expected)) return false; - if (!KQLsort_p.parse(pos, order_expression_list, expected)) + if (!kql_sort_p.parse(pos, order_expression_list, expected)) return false; - if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else - group_expression_list = KQLsummarize_p.group_expression_list; + group_expression_list = kql_summarize_p.group_expression_list; select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 9f226c2fc824..70e3283ee3e0 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -48,11 +48,11 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } has_directions.push_back(has_dir); - for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) { if (!has_directions[i]) { - auto order_expr = order_expression_list->children[i]->as(); + auto *order_expr = order_expression_list->children[i]->as(); order_expr->direction = -1; // default desc if (!order_expr->nulls_direction_was_explicitly_specified) order_expr->nulls_direction = -1; diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 7dea87eef25d..2afbad221314 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -21,10 +21,10 @@ bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery KQL_p; + ParserKQLWithUnionQuery kql_p; ASTPtr query; - bool parsed = KQL_p.parse(pos, query, expected); + bool parsed = kql_p.parse(pos, query, expected); if (!parsed) return false; @@ -36,19 +36,19 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { // will support union next phase - ASTPtr KQLQuery; + ASTPtr kql_query; - if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + if (!ParserKQLQuery().parse(pos, kql_query, expected)) return false; - if (KQLQuery->as()) + if (kql_query->as()) { - node = std::move(KQLQuery); + node = std::move(kql_query); return true; } auto list_node = std::make_shared(); - list_node->children.push_back(KQLQuery); + list_node->children.push_back(kql_query); auto select_with_union_query = std::make_shared(); node = select_with_union_query; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 0260902c9379..48544a311041 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -38,42 +38,41 @@ std::pair ParserKQLSummarize::removeLastWord(String input) temp.push_back(token); } - String firstPart; + String first_part; for (std::size_t i = 0; i < temp.size() - 1; i++) { - firstPart += temp[i]; + first_part += temp[i]; } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); } -String ParserKQLSummarize::getBinGroupbyString(String exprBin) +String ParserKQLSummarize::getBinGroupbyString(String expr_bin) { String column_name; bool bracket_start = false; bool comma_start = false; String bin_duration; - for (std::size_t i = 0; i < exprBin.size(); i++) + for (char ch : expr_bin) { - if (comma_start && exprBin[i] != ')') - bin_duration += exprBin[i]; - if (exprBin[i] == ',') + if (comma_start && ch != ')') + bin_duration += ch; + if (ch == ',') { comma_start = true; bracket_start = false; } - if (bracket_start == true) - column_name += exprBin[i]; - if (exprBin[i] == '(') + if (bracket_start) + column_name += ch; + if (ch == '(') bracket_start = true; } - std::size_t len = bin_duration.size(); char bin_type = bin_duration[len - 1]; // y, d, h, m, s if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) @@ -110,14 +109,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - auto begin = pos; pos = op_pos.back(); - String exprAggregation; - String exprGroupby; - String exprColumns; - String exprBin; + String expr_aggregation; + String expr_groupby; + String expr_columns; + String expr_bin; bool groupby = false; bool bin_function = false; String bin_column; @@ -133,45 +131,45 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin" || bin_function == true) + if (String(pos->begin, pos->end) == "bin" || bin_function) { bin_function = true; - exprBin += String(pos->begin, pos->end); + expr_bin += String(pos->begin, pos->end); if (String(pos->begin, pos->end) == ")") { - exprBin = getBinGroupbyString(exprBin); - exprGroupby += exprBin; + expr_bin = getBinGroupbyString(expr_bin); + expr_groupby += expr_bin; bin_function = false; } } else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + expr_groupby = expr_groupby + String(pos->begin, pos->end) + " "; } else { if (String(pos->begin, pos->end) == "=") { - std::pair temp = removeLastWord(exprAggregation); - exprAggregation = temp.first; + std::pair temp = removeLastWord(expr_aggregation); + expr_aggregation = temp.first; column_name = temp.second; } else { if (!column_name.empty()) { - exprAggregation = exprAggregation + String(pos->begin, pos->end); + expr_aggregation = expr_aggregation + String(pos->begin, pos->end); character_passed++; if (String(pos->begin, pos->end) == ")") { - exprAggregation = exprAggregation + " AS " + column_name; + expr_aggregation = expr_aggregation + " AS " + column_name; column_name = ""; } } else if (!bin_function) { - exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } } } @@ -179,25 +177,25 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if (exprGroupby.empty()) - exprColumns = exprAggregation; + if (expr_groupby.empty()) + expr_columns = expr_aggregation; else { - if (exprAggregation.empty()) - exprColumns = exprGroupby; + if (expr_aggregation.empty()) + expr_columns = expr_groupby; else - exprColumns = exprGroupby + "," + exprAggregation; + expr_columns = expr_groupby + "," + expr_aggregation; } - Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); - IParser::Pos posColumns(tokenColumns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); + IParser::Pos pos_columns(token_columns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) return false; if (groupby) { - Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); - IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); + IParser::Pos postoken_groupby(token_groupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) return false; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1420d5ce5198..b243f74d08f6 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -13,8 +13,8 @@ class ParserKQLSummarize : public ParserKQLBase protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - std::pair removeLastWord(String input); - String getBinGroupbyString(String exprBin); + static std::pair removeLastWord(String input); + static String getBinGroupbyString(String expr_bin); }; } From 63676ee9e75852945f49e5200abcd6d494a44be4 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 12 Jun 2022 20:05:51 -0700 Subject: [PATCH 025/342] Kusto-pahse1: Fixed moy style issues. --- src/Parsers/Kusto/ParserKQLOperators.cpp | 8 ++++---- src/Parsers/Kusto/ParserKQLOperators.h | 3 ++- src/Parsers/Kusto/ParserKQLProject.cpp | 2 -- src/Parsers/Kusto/ParserKQLQuery.cpp | 5 ++--- src/Parsers/Kusto/ParserKQLQuery.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 10 +++++----- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 90b37ba8aea5..260c9070d513 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!" ) + if ( token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -134,7 +134,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal: break; - + case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -142,7 +142,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - + case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 4a9a13cf14fa..a780e18d3339 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -6,7 +6,8 @@ namespace DB { -class KQLOperators { +class KQLOperators +{ public: String getExprFromToken(IParser::Pos pos); protected: diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index fee8cdb612b6..0e25c9c4a6c3 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -42,6 +42,4 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected return true; } - - } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 55aade6b2b92..1a850e77f483 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include namespace DB @@ -15,8 +14,8 @@ namespace DB bool ParserKQLBase :: parsePrepare(Pos & pos) { - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } String ParserKQLBase :: getExprFromToken(Pos pos) diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 25aa4e6b83c2..0545cd00cd9e 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -11,7 +11,7 @@ class ParserKQLBase : public IParserBase protected: std::vector op_pos; - std::vector expresions; + std::vector expressions; virtual String getExprFromToken(Pos pos); }; diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 8d450799785d..a7ae7fef5795 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -9,17 +9,17 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { - if (!op_pos.empty()) + if (!op_pos.empty()) return false; - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords - ( { + ({ "SELECT", "INSERT", "CREATE", @@ -42,7 +42,7 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "TRUNCATE", "USE", "EXPLAIN" - } ); + }); if (op_pos.empty()) return false; From 38178cd34ce701b236f4237731a6ad00f4135855 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 13 Jun 2022 06:26:02 -0700 Subject: [PATCH 026/342] Kusto-phase1: Fixed misleading indentation --- src/Parsers/Kusto/ParserKQLOperators.cpp | 4 ++-- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 260c9070d513..60fa022f9bb1 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!") + if (token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index a7ae7fef5795..f1fc13d2c488 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { if (!op_pos.empty()) - return false; + return false; op_pos.push_back(pos); return true; From c34a7ad3b07802b8153c6983aeebde03423262e3 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 14 Jun 2022 07:40:06 -0700 Subject: [PATCH 027/342] Kusto-pahse2: Add support for multiple summarize --- src/Parsers/Kusto/ParserKQLQuery.cpp | 6 +++ src/Parsers/Kusto/ParserKQLSummarize.cpp | 49 +++++++++++++++++++++++- src/Parsers/Kusto/ParserKQLSummarize.h | 5 ++- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 1a850e77f483..d925f66b321b 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -63,6 +63,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) std::vector> operation_pos; operation_pos.push_back(std::make_pair("table",pos)); + String table_name(pos->begin,pos->end); while (!pos->isEnd()) { @@ -104,10 +105,15 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!kql_sort_p.parse(pos, order_expression_list, expected)) return false; + kql_summarize_p.setTableName(table_name); if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else + { group_expression_list = kql_summarize_p.group_expression_list; + if (kql_summarize_p.tables) + tables = kql_summarize_p.tables; + } select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 48544a311041..7a88fec1988b 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -106,10 +106,57 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (op_pos.empty()) return true; - if (op_pos.size() != 1) // now only support one summarize + if (op_pos.size() > 2) // now only support max 2 summarize return false; auto begin = pos; + ASTPtr sub_qurery_table; + +// rewrite this part, make it resusable (may contains bin etc, and please inmplement summarize age= avg(Age) for sub query too): + if (op_pos.size() == 2) + { + bool groupby = false; + auto sub_pos = op_pos.front(); + String sub_aggregation; + String sub_groupby; + String sub_columns; + while (!sub_pos->isEnd() && sub_pos->type != TokenType::PipeMark && sub_pos->type != TokenType::Semicolon) + { + if (String(sub_pos->begin,sub_pos->end) == "by") + groupby = true; + else + { + if (groupby) + sub_groupby = sub_groupby + String(sub_pos->begin,sub_pos->end) +" "; + else + sub_aggregation = sub_aggregation + String(sub_pos->begin,sub_pos->end) +" "; + } + ++sub_pos; + } + + String sub_query; + if (sub_groupby.empty()) + { + sub_columns =sub_aggregation; + sub_query = "(SELECT " + sub_columns+ " FROM "+ table_name+")"; + } + else + { + if (sub_aggregation.empty()) + sub_columns = sub_groupby; + else + sub_columns = sub_groupby + "," + sub_aggregation; + sub_query = "(SELECT " + sub_columns+ " FROM "+ table_name + " GROUP BY "+sub_groupby+")"; + } + + Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserTablesInSelectQuery().parse(pos_subquery, sub_qurery_table, expected)) + return false; + tables = sub_qurery_table; + } + pos = op_pos.back(); String expr_aggregation; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index b243f74d08f6..b71af138e7e6 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -9,12 +9,15 @@ class ParserKQLSummarize : public ParserKQLBase { public: ASTPtr group_expression_list; - + ASTPtr tables; + void setTableName(String table_name_) {table_name = table_name_;} protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; static std::pair removeLastWord(String input); static String getBinGroupbyString(String expr_bin); +private: + String table_name; }; } From 034466f26bf2b9796e753228402c92b51d8bb57e Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 17 Jun 2022 08:47:08 -0700 Subject: [PATCH 028/342] Kusto-phase2 : Added KQL functions interface. changed the summarize class for new aggregation functions --- src/Parsers/CMakeLists.txt | 1 + .../KustoFunctions/IParserKQLFunction.cpp | 33 + .../Kusto/KustoFunctions/IParserKQLFunction.h | 39 + .../KQLAggregationFunctions.cpp | 24 + .../KustoFunctions/KQLAggregationFunctions.h | 9 + .../KustoFunctions/KQLBinaryFunctions.cpp | 24 + .../Kusto/KustoFunctions/KQLBinaryFunctions.h | 9 + .../KustoFunctions/KQLCastingFunctions.cpp | 51 ++ .../KustoFunctions/KQLCastingFunctions.h | 50 ++ .../KustoFunctions/KQLDateTimeFunctions.cpp | 24 + .../KustoFunctions/KQLDateTimeFunctions.h | 9 + .../KustoFunctions/KQLDynamicFunctions.cpp | 24 + .../KustoFunctions/KQLDynamicFunctions.h | 9 + .../KustoFunctions/KQLFunctionFactory.cpp | 742 ++++++++++++++++++ .../Kusto/KustoFunctions/KQLFunctionFactory.h | 386 +++++++++ .../KustoFunctions/KQLGeneralFunctions.cpp | 24 + .../KustoFunctions/KQLGeneralFunctions.h | 9 + .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 24 + .../Kusto/KustoFunctions/KQLIPFunctions.h | 9 + .../KustoFunctions/KQLStringFunctions.cpp | 365 +++++++++ .../Kusto/KustoFunctions/KQLStringFunctions.h | 267 +++++++ .../KustoFunctions/KQLTimeSeriesFunctions.cpp | 24 + .../KustoFunctions/KQLTimeSeriesFunctions.h | 9 + src/Parsers/Kusto/ParserKQLOperators.cpp | 5 +- src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 24 +- src/Parsers/Kusto/ParserKQLQuery.h | 7 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 13 +- src/Parsers/Kusto/ParserKQLSummarize.h | 4 + 29 files changed, 2210 insertions(+), 10 deletions(-) create mode 100644 src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73d46593e042..1648abdbf55d 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -4,6 +4,7 @@ add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) add_headers_and_sources(clickhouse_parsers ./Kusto) +add_headers_and_sources(clickhouse_parsers ./Kusto/KustoFunctions) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp new file mode 100644 index 000000000000..5455f41a0c22 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -0,0 +1,33 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) +{ + return wrapConvertImpl(pos, IncreaseDepthTag{}, [&] + { + bool res = convertImpl(out,pos); + if (!res) + out = ""; + return res; + }); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h new file mode 100644 index 000000000000..81bf97f390ba --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +namespace DB +{ +class IParserKQLFunction //: public IParser +{ +public: + template + ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, const F & func) + { + IParser::Pos begin = pos; + bool res = func(); + if (!res) + pos = begin; + return res; + } + struct IncreaseDepthTag {}; + template + ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, IncreaseDepthTag, const F & func) + { + IParser::Pos begin = pos; + pos.increaseDepth(); + bool res = func(); + pos.decreaseDepth(); + if (!res) + pos = begin; + return res; + } + bool convert(String &out,IParser::Pos &pos); + virtual const char * getName() const = 0; + virtual ~IParserKQLFunction() = default; +protected: + virtual bool convertImpl(String &out,IParser::Pos &pos) = 0; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp new file mode 100644 index 000000000000..5f43aa16d8e2 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -0,0 +1,51 @@ + +#include +#include +#include +#include + +namespace DB +{ +bool Tobool::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToDatetime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToDouble::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToInt::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToTimespan::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h new file mode 100644 index 000000000000..ab73fb3fc218 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +namespace DB +{ +class Tobool : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tobool()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToDatetime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todatetime()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToDouble : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todouble()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToInt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toint()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tostring()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToTimespan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "totimespan()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp new file mode 100644 index 000000000000..528f906e51e7 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -0,0 +1,742 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + std::unordered_map KQLFunctionFactory::kql_functions = + { + {"datetime", KQLFunctionValue::datetime}, + {"ago", KQLFunctionValue::ago}, + {"datetime_add", KQLFunctionValue::datetime_add}, + {"datetime_part", KQLFunctionValue::datetime_part}, + {"datetime_diff", KQLFunctionValue::datetime_diff}, + {"dayofmonth", KQLFunctionValue::dayofmonth}, + {"dayofweek", KQLFunctionValue::dayofweek}, + {"dayofyear", KQLFunctionValue::dayofyear}, + {"endofday", KQLFunctionValue::endofday}, + {"endofweek", KQLFunctionValue::endofweek}, + {"endofyear", KQLFunctionValue::endofyear}, + {"format_datetime", KQLFunctionValue::format_datetime}, + {"format_timespan", KQLFunctionValue::format_timespan}, + {"getmonth", KQLFunctionValue::getmonth}, + {"getyear", KQLFunctionValue::getyear}, + {"hoursofday", KQLFunctionValue::hoursofday}, + {"make_timespan", KQLFunctionValue::make_timespan}, + {"make_datetime", KQLFunctionValue::make_datetime}, + {"now", KQLFunctionValue::now}, + {"startofday", KQLFunctionValue::startofday}, + {"startofmonth", KQLFunctionValue::startofmonth}, + {"startofweek", KQLFunctionValue::startofweek}, + {"startofyear", KQLFunctionValue::startofyear}, + {"todatetime", KQLFunctionValue::todatetime}, + {"totimespan", KQLFunctionValue::totimespan}, + {"unixtime_microseconds_todatetime", KQLFunctionValue::unixtime_microseconds_todatetime}, + {"unixtime_milliseconds_todatetime", KQLFunctionValue::unixtime_milliseconds_todatetime}, + {"unixtime_nanoseconds_todatetime", KQLFunctionValue::unixtime_nanoseconds_todatetime}, + {"unixtime_seconds_todatetime", KQLFunctionValue::unixtime_seconds_todatetime}, + {"weekofyear", KQLFunctionValue::weekofyear}, + + {"base64_encode_tostring", KQLFunctionValue::base64_encode_tostring}, + {"base64_encode_fromguid", KQLFunctionValue::base64_encode_fromguid}, + {"base64_decode_tostring", KQLFunctionValue::base64_decode_tostring}, + {"base64_decode_toarray", KQLFunctionValue::base64_decode_toarray}, + {"base64_decode_toguid", KQLFunctionValue::base64_decode_toguid}, + {"countof", KQLFunctionValue::countof}, + {"extract", KQLFunctionValue::extract}, + {"extract_all", KQLFunctionValue::extract_all}, + {"extractjson", KQLFunctionValue::extractjson}, + {"has_any_index", KQLFunctionValue::has_any_index}, + {"indexof", KQLFunctionValue::indexof}, + {"isempty", KQLFunctionValue::isempty}, + {"isnotempty", KQLFunctionValue::isnotempty}, + {"isnotnull", KQLFunctionValue::isnotnull}, + {"isnull", KQLFunctionValue::isnull}, + {"parse_command_line", KQLFunctionValue::parse_command_line}, + {"parse_csv", KQLFunctionValue::parse_csv}, + {"parse_json", KQLFunctionValue::parse_json}, + {"parse_url", KQLFunctionValue::parse_url}, + {"parse_urlquery", KQLFunctionValue::parse_urlquery}, + {"parse_version", KQLFunctionValue::parse_version}, + {"replace_regex", KQLFunctionValue::replace_regex}, + {"reverse", KQLFunctionValue::reverse}, + {"split", KQLFunctionValue::split}, + {"strcat", KQLFunctionValue::strcat}, + {"strcat_delim", KQLFunctionValue::strcat_delim}, + {"strcmp", KQLFunctionValue::strcmp}, + {"strlen", KQLFunctionValue::strlen}, + {"strrep", KQLFunctionValue::strrep}, + {"substring", KQLFunctionValue::substring}, + {"toupper", KQLFunctionValue::toupper}, + {"translate", KQLFunctionValue::translate}, + {"trim", KQLFunctionValue::trim}, + {"trim_end", KQLFunctionValue::trim_end}, + {"trim_start", KQLFunctionValue::trim_start}, + {"url_decode", KQLFunctionValue::url_decode}, + {"url_encode", KQLFunctionValue::url_encode}, + + {"array_concat", KQLFunctionValue::array_concat}, + {"array_iif", KQLFunctionValue::array_iif}, + {"array_index_of", KQLFunctionValue::array_index_of}, + {"array_length", KQLFunctionValue::array_length}, + {"array_reverse", KQLFunctionValue::array_reverse}, + {"array_rotate_left", KQLFunctionValue::array_rotate_left}, + {"array_rotate_right", KQLFunctionValue::array_rotate_right}, + {"array_shift_left", KQLFunctionValue::array_shift_left}, + {"array_shift_right", KQLFunctionValue::array_shift_right}, + {"array_slice", KQLFunctionValue::array_slice}, + {"array_sort_asc", KQLFunctionValue::array_sort_asc}, + {"array_sort_desc", KQLFunctionValue::array_sort_desc}, + {"array_split", KQLFunctionValue::array_split}, + {"array_sum", KQLFunctionValue::array_sum}, + {"bag_keys", KQLFunctionValue::bag_keys}, + {"bag_merge", KQLFunctionValue::bag_merge}, + {"bag_remove_keys", KQLFunctionValue::bag_remove_keys}, + {"jaccard_index", KQLFunctionValue::jaccard_index}, + {"pack", KQLFunctionValue::pack}, + {"pack_all", KQLFunctionValue::pack_all}, + {"pack_array", KQLFunctionValue::pack_array}, + {"repeat", KQLFunctionValue::repeat}, + {"set_difference", KQLFunctionValue::set_difference}, + {"set_has_element", KQLFunctionValue::set_has_element}, + {"set_intersect", KQLFunctionValue::set_intersect}, + {"set_union", KQLFunctionValue::set_union}, + {"treepath", KQLFunctionValue::treepath}, + {"zip", KQLFunctionValue::zip}, + + {"tobool", KQLFunctionValue::tobool}, + {"toboolean", KQLFunctionValue::tobool}, + {"todouble", KQLFunctionValue::todouble}, + {"toint", KQLFunctionValue::toint}, + {"toreal", KQLFunctionValue::todouble}, + {"tostring", KQLFunctionValue::tostring}, + {"totimespan", KQLFunctionValue::totimespan}, + + {"arg_max", KQLFunctionValue::arg_max}, + {"arg_min", KQLFunctionValue::arg_min}, + {"avg", KQLFunctionValue::avg}, + {"avgif", KQLFunctionValue::avgif}, + {"binary_all_and", KQLFunctionValue::binary_all_and}, + {"binary_all_or", KQLFunctionValue::binary_all_or}, + {"binary_all_xor", KQLFunctionValue::binary_all_xor}, + {"buildschema", KQLFunctionValue::buildschema}, + {"count", KQLFunctionValue::count}, + {"countif", KQLFunctionValue::countif}, + {"dcount", KQLFunctionValue::dcount}, + {"dcountif", KQLFunctionValue::dcountif}, + {"make_bag", KQLFunctionValue::make_bag}, + {"make_bag_if", KQLFunctionValue::make_bag_if}, + {"make_list", KQLFunctionValue::make_list}, + {"make_list_if", KQLFunctionValue::make_list_if}, + {"make_list_with_nulls", KQLFunctionValue::make_list_with_nulls}, + {"make_set", KQLFunctionValue::make_set}, + {"make_set_if", KQLFunctionValue::make_set_if}, + {"max", KQLFunctionValue::max}, + {"maxif", KQLFunctionValue::maxif}, + {"min", KQLFunctionValue::min}, + {"minif", KQLFunctionValue::minif}, + {"percentiles", KQLFunctionValue::percentiles}, + {"percentiles_array", KQLFunctionValue::percentiles_array}, + {"percentilesw", KQLFunctionValue::percentilesw}, + {"percentilesw_array", KQLFunctionValue::percentilesw_array}, + {"stdev", KQLFunctionValue::stdev}, + {"stdevif", KQLFunctionValue::stdevif}, + {"sum", KQLFunctionValue::sum}, + {"sumif", KQLFunctionValue::sumif}, + {"take_any", KQLFunctionValue::take_any}, + {"take_anyif", KQLFunctionValue::take_anyif}, + {"variance", KQLFunctionValue::variance}, + {"varianceif", KQLFunctionValue::varianceif}, + + {"series_fir", KQLFunctionValue::series_fir}, + {"series_iir", KQLFunctionValue::series_iir}, + {"series_fit_line", KQLFunctionValue::series_fit_line}, + {"series_fit_line_dynamic", KQLFunctionValue::series_fit_line_dynamic}, + {"series_fit_2lines", KQLFunctionValue::series_fit_2lines}, + {"series_fit_2lines_dynamic", KQLFunctionValue::series_fit_2lines_dynamic}, + {"series_outliers", KQLFunctionValue::series_outliers}, + {"series_periods_detect", KQLFunctionValue::series_periods_detect}, + {"series_periods_validate", KQLFunctionValue::series_periods_validate}, + {"series_stats_dynamic", KQLFunctionValue::series_stats_dynamic}, + {"series_stats", KQLFunctionValue::series_stats}, + {"series_fill_backward", KQLFunctionValue::series_fill_backward}, + {"series_fill_const", KQLFunctionValue::series_fill_const}, + {"series_fill_forward", KQLFunctionValue::series_fill_forward}, + {"series_fill_linear", KQLFunctionValue::series_fill_linear}, + + {"ipv4_compare", KQLFunctionValue::ipv4_compare}, + {"ipv4_is_in_range", KQLFunctionValue::ipv4_is_in_range}, + {"ipv4_is_match", KQLFunctionValue::ipv4_is_match}, + {"ipv4_is_private", KQLFunctionValue::ipv4_is_private}, + {"ipv4_netmask_suffix", KQLFunctionValue::ipv4_netmask_suffix}, + {"parse_ipv4", KQLFunctionValue::parse_ipv4}, + {"parse_ipv4_mask", KQLFunctionValue::parse_ipv4_mask}, + {"ipv6_compare", KQLFunctionValue::ipv6_compare}, + {"ipv6_is_match", KQLFunctionValue::ipv6_is_match}, + {"parse_ipv6", KQLFunctionValue::parse_ipv6}, + {"parse_ipv6_mask", KQLFunctionValue::parse_ipv6_mask}, + {"format_ipv4", KQLFunctionValue::format_ipv4}, + {"format_ipv4_mask", KQLFunctionValue::format_ipv4_mask}, + + {"binary_and", KQLFunctionValue::binary_and}, + {"binary_not", KQLFunctionValue::binary_not}, + {"binary_or", KQLFunctionValue::binary_or}, + {"binary_shift_left", KQLFunctionValue::binary_shift_left}, + {"binary_shift_right", KQLFunctionValue::binary_shift_right}, + {"binary_xor", KQLFunctionValue::binary_xor}, + {"bitset_count_ones", KQLFunctionValue::bitset_count_ones}, + {"bin", KQLFunctionValue::bin} + }; + + +std::unique_ptr KQLFunctionFactory::get(String &kql_function) +{ +/* if (kql_function=="strrep") + return std::make_unique(); + else if (kql_function=="strcat") + return std::make_unique(); + else + return nullptr;*/ + if (kql_functions.find(kql_function) == kql_functions.end()) + return nullptr; + + auto kql_function_id = kql_functions[kql_function]; + switch (kql_function_id) + { + case KQLFunctionValue::none: + return nullptr; + + case KQLFunctionValue::timespan: + return nullptr; + + case KQLFunctionValue::datetime: + return nullptr; + + case KQLFunctionValue::ago: + return nullptr; + + case KQLFunctionValue::datetime_add: + return nullptr; + + case KQLFunctionValue::datetime_part: + return nullptr; + + case KQLFunctionValue::datetime_diff: + return nullptr; + + case KQLFunctionValue::dayofmonth: + return nullptr; + + case KQLFunctionValue::dayofweek: + return nullptr; + + case KQLFunctionValue::dayofyear: + return nullptr; + + case KQLFunctionValue::endofday: + return nullptr; + + case KQLFunctionValue::endofweek: + return nullptr; + + case KQLFunctionValue::endofyear: + return nullptr; + + case KQLFunctionValue::format_datetime: + return nullptr; + + case KQLFunctionValue::format_timespan: + return nullptr; + + case KQLFunctionValue::getmonth: + return nullptr; + + case KQLFunctionValue::getyear: + return nullptr; + + case KQLFunctionValue::hoursofday: + return nullptr; + + case KQLFunctionValue::make_timespan: + return nullptr; + + case KQLFunctionValue::make_datetime: + return nullptr; + + case KQLFunctionValue::now: + return nullptr; + + case KQLFunctionValue::startofday: + return nullptr; + + case KQLFunctionValue::startofmonth: + return nullptr; + + case KQLFunctionValue::startofweek: + return nullptr; + + case KQLFunctionValue::startofyear: + return nullptr; + + case KQLFunctionValue::unixtime_microseconds_todatetime: + return nullptr; + + case KQLFunctionValue::unixtime_milliseconds_todatetime: + return nullptr; + + case KQLFunctionValue::unixtime_nanoseconds_todatetime: + return nullptr; + + case KQLFunctionValue::unixtime_seconds_todatetime: + return nullptr; + + case KQLFunctionValue::weekofyear: + return nullptr; + + + case KQLFunctionValue::base64_encode_tostring: + return nullptr; + + case KQLFunctionValue::base64_encode_fromguid: + return nullptr; + + case KQLFunctionValue::base64_decode_tostring: + return nullptr; + + case KQLFunctionValue::base64_decode_toarray: + return nullptr; + + case KQLFunctionValue::base64_decode_toguid: + return nullptr; + + case KQLFunctionValue::countof: + return nullptr; + + case KQLFunctionValue::extract: + return nullptr; + + case KQLFunctionValue::extract_all: + return nullptr; + + case KQLFunctionValue::extractjson: + return nullptr; + + case KQLFunctionValue::has_any_index: + return nullptr; + + case KQLFunctionValue::indexof: + return nullptr; + + case KQLFunctionValue::isempty: + return nullptr; + + case KQLFunctionValue::isnotempty: + return nullptr; + + case KQLFunctionValue::isnotnull: + return nullptr; + + case KQLFunctionValue::isnull: + return nullptr; + + case KQLFunctionValue::parse_command_line: + return nullptr; + + case KQLFunctionValue::parse_csv: + return nullptr; + + case KQLFunctionValue::parse_json: + return nullptr; + + case KQLFunctionValue::parse_url: + return nullptr; + + case KQLFunctionValue::parse_urlquery: + return nullptr; + + case KQLFunctionValue::parse_version: + return nullptr; + + case KQLFunctionValue::replace_regex: + return nullptr; + + case KQLFunctionValue::reverse: + return nullptr; + + case KQLFunctionValue::split: + return nullptr; + + case KQLFunctionValue::strcat: + return std::make_unique(); + + case KQLFunctionValue::strcat_delim: + return nullptr; + + case KQLFunctionValue::strcmp: + return nullptr; + + case KQLFunctionValue::strlen: + return nullptr; + + case KQLFunctionValue::strrep: + return std::make_unique(); + + case KQLFunctionValue::substring: + return nullptr; + + case KQLFunctionValue::toupper: + return nullptr; + + case KQLFunctionValue::translate: + return nullptr; + + case KQLFunctionValue::trim: + return nullptr; + + case KQLFunctionValue::trim_end: + return nullptr; + + case KQLFunctionValue::trim_start: + return nullptr; + + case KQLFunctionValue::url_decode: + return nullptr; + + case KQLFunctionValue::url_encode: + return nullptr; + + case KQLFunctionValue::array_concat: + return nullptr; + + case KQLFunctionValue::array_iif: + return nullptr; + + case KQLFunctionValue::array_index_of: + return nullptr; + + case KQLFunctionValue::array_length: + return nullptr; + + case KQLFunctionValue::array_reverse: + return nullptr; + + case KQLFunctionValue::array_rotate_left: + return nullptr; + + case KQLFunctionValue::array_rotate_right: + return nullptr; + + case KQLFunctionValue::array_shift_left: + return nullptr; + + case KQLFunctionValue::array_shift_right: + return nullptr; + + case KQLFunctionValue::array_slice: + return nullptr; + + case KQLFunctionValue::array_sort_asc: + return nullptr; + + case KQLFunctionValue::array_sort_desc: + return nullptr; + + case KQLFunctionValue::array_split: + return nullptr; + + case KQLFunctionValue::array_sum: + return nullptr; + + case KQLFunctionValue::bag_keys: + return nullptr; + + case KQLFunctionValue::bag_merge: + return nullptr; + + case KQLFunctionValue::bag_remove_keys: + return nullptr; + + case KQLFunctionValue::jaccard_index: + return nullptr; + + case KQLFunctionValue::pack: + return nullptr; + + case KQLFunctionValue::pack_all: + return nullptr; + + case KQLFunctionValue::pack_array: + return nullptr; + + case KQLFunctionValue::repeat: + return nullptr; + + case KQLFunctionValue::set_difference: + return nullptr; + + case KQLFunctionValue::set_has_element: + return nullptr; + + case KQLFunctionValue::set_intersect: + return nullptr; + + case KQLFunctionValue::set_union: + return nullptr; + + case KQLFunctionValue::treepath: + return nullptr; + + case KQLFunctionValue::zip: + return nullptr; + + case KQLFunctionValue::tobool: + return std::make_unique(); + + case KQLFunctionValue::todatetime: + return std::make_unique(); + + case KQLFunctionValue::todouble: + return std::make_unique(); + + case KQLFunctionValue::toint: + return std::make_unique(); + + case KQLFunctionValue::tostring: + return std::make_unique(); + + case KQLFunctionValue::totimespan: + return std::make_unique(); + + case KQLFunctionValue::arg_max: + return nullptr; + + case KQLFunctionValue::arg_min: + return nullptr; + + case KQLFunctionValue::avg: + return nullptr; + + case KQLFunctionValue::avgif: + return nullptr; + + case KQLFunctionValue::binary_all_and: + return nullptr; + + case KQLFunctionValue::binary_all_or: + return nullptr; + + case KQLFunctionValue::binary_all_xor: + return nullptr; + case KQLFunctionValue::buildschema: + return nullptr; + + case KQLFunctionValue::count: + return nullptr; + + case KQLFunctionValue::countif: + return nullptr; + + case KQLFunctionValue::dcount: + return nullptr; + + case KQLFunctionValue::dcountif: + return nullptr; + + case KQLFunctionValue::make_bag: + return nullptr; + + case KQLFunctionValue::make_bag_if: + return nullptr; + + case KQLFunctionValue::make_list: + return nullptr; + + case KQLFunctionValue::make_list_if: + return nullptr; + + case KQLFunctionValue::make_list_with_nulls: + return nullptr; + + case KQLFunctionValue::make_set: + return nullptr; + + case KQLFunctionValue::make_set_if: + return nullptr; + + case KQLFunctionValue::max: + return nullptr; + + case KQLFunctionValue::maxif: + return nullptr; + + case KQLFunctionValue::min: + return nullptr; + + case KQLFunctionValue::minif: + return nullptr; + + case KQLFunctionValue::percentiles: + return nullptr; + + case KQLFunctionValue::percentiles_array: + return nullptr; + + case KQLFunctionValue::percentilesw: + return nullptr; + + case KQLFunctionValue::percentilesw_array: + return nullptr; + + case KQLFunctionValue::stdev: + return nullptr; + + case KQLFunctionValue::stdevif: + return nullptr; + + case KQLFunctionValue::sum: + return nullptr; + + case KQLFunctionValue::sumif: + return nullptr; + + case KQLFunctionValue::take_any: + return nullptr; + + case KQLFunctionValue::take_anyif: + return nullptr; + + case KQLFunctionValue::variance: + return nullptr; + + case KQLFunctionValue::varianceif: + return nullptr; + + + case KQLFunctionValue::series_fir: + return nullptr; + + case KQLFunctionValue::series_iir: + return nullptr; + + case KQLFunctionValue::series_fit_line: + return nullptr; + + case KQLFunctionValue::series_fit_line_dynamic: + return nullptr; + + case KQLFunctionValue::series_fit_2lines: + return nullptr; + + case KQLFunctionValue::series_fit_2lines_dynamic: + return nullptr; + + case KQLFunctionValue::series_outliers: + return nullptr; + + case KQLFunctionValue::series_periods_detect: + return nullptr; + + case KQLFunctionValue::series_periods_validate: + return nullptr; + + case KQLFunctionValue::series_stats_dynamic: + return nullptr; + + case KQLFunctionValue::series_stats: + return nullptr; + + case KQLFunctionValue::series_fill_backward: + return nullptr; + + case KQLFunctionValue::series_fill_const: + return nullptr; + + case KQLFunctionValue::series_fill_forward: + return nullptr; + + case KQLFunctionValue::series_fill_linear: + return nullptr; + + + case KQLFunctionValue::ipv4_compare: + return nullptr; + + case KQLFunctionValue::ipv4_is_in_range: + return nullptr; + + case KQLFunctionValue::ipv4_is_match: + return nullptr; + + case KQLFunctionValue::ipv4_is_private: + return nullptr; + + case KQLFunctionValue::ipv4_netmask_suffix: + return nullptr; + + case KQLFunctionValue::parse_ipv4: + return nullptr; + + case KQLFunctionValue::parse_ipv4_mask: + return nullptr; + + case KQLFunctionValue::ipv6_compare: + return nullptr; + + case KQLFunctionValue::ipv6_is_match: + return nullptr; + + case KQLFunctionValue::parse_ipv6: + return nullptr; + + case KQLFunctionValue::parse_ipv6_mask: + return nullptr; + + case KQLFunctionValue::format_ipv4: + return nullptr; + + case KQLFunctionValue::format_ipv4_mask: + return nullptr; + + + case KQLFunctionValue::binary_and: + return nullptr; + + case KQLFunctionValue::binary_not: + return nullptr; + + case KQLFunctionValue::binary_or: + return nullptr; + + case KQLFunctionValue::binary_shift_left: + return nullptr; + + case KQLFunctionValue::binary_shift_right: + return nullptr; + + case KQLFunctionValue::binary_xor: + return nullptr; + + case KQLFunctionValue::bitset_count_ones: + return nullptr; + + case KQLFunctionValue::bin: + return nullptr; + } +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h new file mode 100644 index 000000000000..86e879b4668e --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -0,0 +1,386 @@ +#pragma once + +#include +#include +#include +namespace DB +{ + enum class KQLFunctionValue : uint16_t + { none, + timespan, + datetime, + ago, + datetime_add, + datetime_part, + datetime_diff, + dayofmonth, + dayofweek, + dayofyear, + endofday, + endofweek, + endofyear, + format_datetime, + format_timespan, + getmonth, + getyear, + hoursofday, + make_timespan, + make_datetime, + now, + startofday, + startofmonth, + startofweek, + startofyear, + todatetime, + totimespan, + unixtime_microseconds_todatetime, + unixtime_milliseconds_todatetime, + unixtime_nanoseconds_todatetime, + unixtime_seconds_todatetime, + weekofyear, + + base64_encode_tostring, + base64_encode_fromguid, + base64_decode_tostring, + base64_decode_toarray, + base64_decode_toguid, + countof, + extract, + extract_all, + extractjson, + has_any_index, + indexof, + isempty, + isnotempty, + isnotnull, + isnull, + parse_command_line, + parse_csv, + parse_json, + parse_url, + parse_urlquery, + parse_version, + replace_regex, + reverse, + split, + strcat, + strcat_delim, + strcmp, + strlen, + strrep, + substring, + toupper, + translate, + trim, + trim_end, + trim_start, + url_decode, + url_encode, + + array_concat, + array_iif, + array_index_of, + array_length, + array_reverse, + array_rotate_left, + array_rotate_right, + array_shift_left, + array_shift_right, + array_slice, + array_sort_asc, + array_sort_desc, + array_split, + array_sum, + bag_keys, + bag_merge, + bag_remove_keys, + jaccard_index, + pack, + pack_all, + pack_array, + repeat, + set_difference, + set_has_element, + set_intersect, + set_union, + treepath, + zip, + + tobool, + todouble, + toint, + tostring, + + arg_max, + arg_min, + avg, + avgif, + binary_all_and, + binary_all_or, + binary_all_xor, + buildschema, + count, + countif, + dcount, + dcountif, + make_bag, + make_bag_if, + make_list, + make_list_if, + make_list_with_nulls, + make_set, + make_set_if, + max, + maxif, + min, + minif, + percentiles, + percentiles_array, + percentilesw, + percentilesw_array, + stdev, + stdevif, + sum, + sumif, + take_any, + take_anyif, + variance, + varianceif, + + series_fir, + series_iir, + series_fit_line, + series_fit_line_dynamic, + series_fit_2lines, + series_fit_2lines_dynamic, + series_outliers, + series_periods_detect, + series_periods_validate, + series_stats_dynamic, + series_stats, + series_fill_backward, + series_fill_const, + series_fill_forward, + series_fill_linear, + + ipv4_compare, + ipv4_is_in_range, + ipv4_is_match, + ipv4_is_private, + ipv4_netmask_suffix, + parse_ipv4, + parse_ipv4_mask, + ipv6_compare, + ipv6_is_match, + parse_ipv6, + parse_ipv6_mask, + format_ipv4, + format_ipv4_mask, + + binary_and, + binary_not, + binary_or, + binary_shift_left, + binary_shift_right, + binary_xor, + bitset_count_ones, + + bin + }; + +class KQLFunctionFactory +{ +public: + static std::unique_ptr get(String &kql_function); + +protected: + + + static std::unordered_map kql_functions;/* = + { + {"datetime", KQLFunctionValue::datetime}, + {"ago", KQLFunctionValue::ago}, + {"datetime_add", KQLFunctionValue::datetime_add}, + {"datetime_part", KQLFunctionValue::datetime_part}, + {"datetime_diff", KQLFunctionValue::datetime_diff}, + {"dayofmonth", KQLFunctionValue::dayofmonth}, + {"dayofweek", KQLFunctionValue::dayofweek}, + {"dayofyear", KQLFunctionValue::dayofyear}, + {"endofday", KQLFunctionValue::endofday}, + {"endofweek", KQLFunctionValue::endofweek}, + {"endofyear", KQLFunctionValue::endofyear}, + {"format_datetime", KQLFunctionValue::format_datetime}, + {"format_timespan", KQLFunctionValue::format_timespan}, + {"getmonth", KQLFunctionValue::getmonth}, + {"getyear", KQLFunctionValue::getyear}, + {"hoursofday", KQLFunctionValue::hoursofday}, + {"make_timespan", KQLFunctionValue::make_timespan}, + {"make_datetime", KQLFunctionValue::make_datetime}, + {"now", KQLFunctionValue::now}, + {"startofday", KQLFunctionValue::startofday}, + {"startofmonth", KQLFunctionValue::startofmonth}, + {"startofweek", KQLFunctionValue::startofweek}, + {"startofyear", KQLFunctionValue::startofyear}, + {"todatetime", KQLFunctionValue::todatetime}, + {"totimespan", KQLFunctionValue::totimespan}, + {"unixtime_microseconds_todatetime", KQLFunctionValue::unixtime_microseconds_todatetime}, + {"unixtime_milliseconds_todatetime", KQLFunctionValue::unixtime_milliseconds_todatetime}, + {"unixtime_nanoseconds_todatetime", KQLFunctionValue::unixtime_nanoseconds_todatetime}, + {"unixtime_seconds_todatetime", KQLFunctionValue::unixtime_seconds_todatetime}, + {"weekofyear", KQLFunctionValue::weekofyear}, + + {"base64_encode_tostring", KQLFunctionValue::base64_encode_tostring}, + {"base64_encode_fromguid", KQLFunctionValue::base64_encode_fromguid}, + {"base64_decode_tostring", KQLFunctionValue::base64_decode_tostring}, + {"base64_decode_toarray", KQLFunctionValue::base64_decode_toarray}, + {"base64_decode_toguid", KQLFunctionValue::base64_decode_toguid}, + {"countof", KQLFunctionValue::countof}, + {"extract", KQLFunctionValue::extract}, + {"extract_all", KQLFunctionValue::extract_all}, + {"extractjson", KQLFunctionValue::extractjson}, + {"has_any_index", KQLFunctionValue::has_any_index}, + {"indexof", KQLFunctionValue::indexof}, + {"isempty", KQLFunctionValue::isempty}, + {"isnotempty", KQLFunctionValue::isnotempty}, + {"isnotnull", KQLFunctionValue::isnotnull}, + {"isnull", KQLFunctionValue::isnull}, + {"parse_command_line", KQLFunctionValue::parse_command_line}, + {"parse_csv", KQLFunctionValue::parse_csv}, + {"parse_json", KQLFunctionValue::parse_json}, + {"parse_url", KQLFunctionValue::parse_url}, + {"parse_urlquery", KQLFunctionValue::parse_urlquery}, + {"parse_version", KQLFunctionValue::parse_version}, + {"replace_regex", KQLFunctionValue::replace_regex}, + {"reverse", KQLFunctionValue::reverse}, + {"split", KQLFunctionValue::split}, + {"strcat", KQLFunctionValue::strcat}, + {"strcat_delim", KQLFunctionValue::strcat_delim}, + {"strcmp", KQLFunctionValue::strcmp}, + {"strlen", KQLFunctionValue::strlen}, + {"strrep", KQLFunctionValue::strrep}, + {"substring", KQLFunctionValue::substring}, + {"toupper", KQLFunctionValue::toupper}, + {"translate", KQLFunctionValue::translate}, + {"trim", KQLFunctionValue::trim}, + {"trim_end", KQLFunctionValue::trim_end}, + {"trim_start", KQLFunctionValue::trim_start}, + {"url_decode", KQLFunctionValue::url_decode}, + {"url_encode", KQLFunctionValue::url_encode}, + + {"array_concat", KQLFunctionValue::array_concat}, + {"array_iif", KQLFunctionValue::array_iif}, + {"array_index_of", KQLFunctionValue::array_index_of}, + {"array_length", KQLFunctionValue::array_length}, + {"array_reverse", KQLFunctionValue::array_reverse}, + {"array_rotate_left", KQLFunctionValue::array_rotate_left}, + {"array_rotate_right", KQLFunctionValue::array_rotate_right}, + {"array_shift_left", KQLFunctionValue::array_shift_left}, + {"array_shift_right", KQLFunctionValue::array_shift_right}, + {"array_slice", KQLFunctionValue::array_slice}, + {"array_sort_asc", KQLFunctionValue::array_sort_asc}, + {"array_sort_desc", KQLFunctionValue::array_sort_desc}, + {"array_split", KQLFunctionValue::array_split}, + {"array_sum", KQLFunctionValue::array_sum}, + {"bag_keys", KQLFunctionValue::bag_keys}, + {"bag_merge", KQLFunctionValue::bag_merge}, + {"bag_remove_keys", KQLFunctionValue::bag_remove_keys}, + {"jaccard_index", KQLFunctionValue::jaccard_index}, + {"pack", KQLFunctionValue::pack}, + {"pack_all", KQLFunctionValue::pack_all}, + {"pack_array", KQLFunctionValue::pack_array}, + {"repeat", KQLFunctionValue::repeat}, + {"set_difference", KQLFunctionValue::set_difference}, + {"set_has_element", KQLFunctionValue::set_has_element}, + {"set_intersect", KQLFunctionValue::set_intersect}, + {"set_union", KQLFunctionValue::set_union}, + {"treepath", KQLFunctionValue::treepath}, + {"zip", KQLFunctionValue::zip}, + + {"tobool", KQLFunctionValue::tobool}, + {"toboolean", KQLFunctionValue::tobool}, + {"todouble", KQLFunctionValue::todouble}, + {"toint", KQLFunctionValue::toint}, + {"toreal", KQLFunctionValue::todouble}, + {"tostring", KQLFunctionValue::tostring}, + {"totimespan", KQLFunctionValue::totimespan}, + + {"arg_max", KQLFunctionValue::arg_max}, + {"arg_min", KQLFunctionValue::arg_min}, + {"avg", KQLFunctionValue::avg}, + {"avgif", KQLFunctionValue::avgif}, + {"binary_all_and", KQLFunctionValue::binary_all_and}, + {"binary_all_or", KQLFunctionValue::binary_all_or}, + {"binary_all_xor", KQLFunctionValue::binary_all_xor}, + {"buildschema", KQLFunctionValue::buildschema}, + {"count", KQLFunctionValue::count}, + {"countif", KQLFunctionValue::countif}, + {"dcount", KQLFunctionValue::dcount}, + {"dcountif", KQLFunctionValue::dcountif}, + {"make_bag", KQLFunctionValue::make_bag}, + {"make_bag_if", KQLFunctionValue::make_bag_if}, + {"make_list", KQLFunctionValue::make_list}, + {"make_list_if", KQLFunctionValue::make_list_if}, + {"make_list_with_nulls", KQLFunctionValue::make_list_with_nulls}, + {"make_set", KQLFunctionValue::make_set}, + {"make_set_if", KQLFunctionValue::make_set_if}, + {"max", KQLFunctionValue::max}, + {"maxif", KQLFunctionValue::maxif}, + {"min", KQLFunctionValue::min}, + {"minif", KQLFunctionValue::minif}, + {"percentiles", KQLFunctionValue::percentiles}, + {"percentiles_array", KQLFunctionValue::percentiles_array}, + {"percentilesw", KQLFunctionValue::percentilesw}, + {"percentilesw_array", KQLFunctionValue::percentilesw_array}, + {"stdev", KQLFunctionValue::stdev}, + {"stdevif", KQLFunctionValue::stdevif}, + {"sum", KQLFunctionValue::sum}, + {"sumif", KQLFunctionValue::sumif}, + {"take_any", KQLFunctionValue::take_any}, + {"take_anyif", KQLFunctionValue::take_anyif}, + {"variance", KQLFunctionValue::variance}, + {"varianceif", KQLFunctionValue::varianceif}, + + {"series_fir", KQLFunctionValue::series_fir}, + {"series_iir", KQLFunctionValue::series_iir}, + {"series_fit_line", KQLFunctionValue::series_fit_line}, + {"series_fit_line_dynamic", KQLFunctionValue::series_fit_line_dynamic}, + {"series_fit_2lines", KQLFunctionValue::series_fit_2lines}, + {"series_fit_2lines_dynamic", KQLFunctionValue::series_fit_2lines_dynamic}, + {"series_outliers", KQLFunctionValue::series_outliers}, + {"series_periods_detect", KQLFunctionValue::series_periods_detect}, + {"series_periods_validate", KQLFunctionValue::series_periods_validate}, + {"series_stats_dynamic", KQLFunctionValue::series_stats_dynamic}, + {"series_stats", KQLFunctionValue::series_stats}, + {"series_fill_backward", KQLFunctionValue::series_fill_backward}, + {"series_fill_const", KQLFunctionValue::series_fill_const}, + {"series_fill_forward", KQLFunctionValue::series_fill_forward}, + {"series_fill_linear", KQLFunctionValue::series_fill_linear}, + + {"ipv4_compare", KQLFunctionValue::ipv4_compare}, + {"ipv4_is_in_range", KQLFunctionValue::ipv4_is_in_range}, + {"ipv4_is_match", KQLFunctionValue::ipv4_is_match}, + {"ipv4_is_private", KQLFunctionValue::ipv4_is_private}, + {"ipv4_netmask_suffix", KQLFunctionValue::ipv4_netmask_suffix}, + {"parse_ipv4", KQLFunctionValue::parse_ipv4}, + {"parse_ipv4_mask", KQLFunctionValue::parse_ipv4_mask}, + {"ipv6_compare", KQLFunctionValue::ipv6_compare}, + {"ipv6_is_match", KQLFunctionValue::ipv6_is_match}, + {"parse_ipv6", KQLFunctionValue::parse_ipv6}, + {"parse_ipv6_mask", KQLFunctionValue::parse_ipv6_mask}, + {"format_ipv4", KQLFunctionValue::format_ipv4}, + {"format_ipv4_mask", KQLFunctionValue::format_ipv4_mask}, + + {"binary_and", KQLFunctionValue::binary_and}, + {"binary_not", KQLFunctionValue::binary_not}, + {"binary_or", KQLFunctionValue::binary_or}, + {"binary_shift_left", KQLFunctionValue::binary_shift_left}, + {"binary_shift_right", KQLFunctionValue::binary_shift_right}, + {"binary_xor", KQLFunctionValue::binary_xor}, + {"bitset_count_ones", KQLFunctionValue::bitset_count_ones}, + {"bin", KQLFunctionValue::bin} + };*/ + +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp new file mode 100644 index 000000000000..851c631d1ceb --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -0,0 +1,365 @@ +#include +#include +#include +#include + +namespace DB +{ + +bool Base64EncodeToString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Base64EncodeFromGuid::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Base64DecodeToString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Base64DecodeToArray::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Base64DecodeToGuid::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool CountOf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Extract::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ExtractAll::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ExtractJson::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool HasAnyIndex::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IndexOf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IsEmpty::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IsNotEmpty::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IsNotNull::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseCommandLine::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IsNull::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseCsv::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseJson::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseUrl::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseUrlQuery::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseVersion::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ReplaceRegex::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Reverse::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Split::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StrCat::convertImpl(String &out,IParser::Pos &pos) +{ + std::unique_ptr fun; + std::vector args; + String res = "concat("; + + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + --pos; + return false; + } + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + String tmp_arg = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { + String new_arg; + fun = KQLFunctionFactory::get(tmp_arg); + if (fun && fun->convert(new_arg,pos)) + tmp_arg = new_arg; + } + else if (pos->type == TokenType::ClosingRoundBracket) + { + for (auto arg : args) + res+=arg; + + res += ")"; + out = res; + return true; + } + args.push_back(tmp_arg); + } + return false; +} + +bool StrCatDelim::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StrCmp::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StrLen::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StrRep::convertImpl(String &out,IParser::Pos &pos) +{ + std::unique_ptr fun; + String res = String(pos->begin,pos->end); + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + --pos; + return false; + } + ++pos; + String value = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { String func_value; + fun = KQLFunctionFactory::get(value); + if (fun && fun->convert(func_value,pos)) + value = func_value; + } + ++pos; + if (pos->type != TokenType::Comma) + return false; + + ++pos; + String multiplier = String(pos->begin,pos->end); + String new_multiplier; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::BareWord ) + { + String fun_multiplier; + fun = KQLFunctionFactory::get(multiplier); + if ( fun && fun->convert(fun_multiplier,pos)) + new_multiplier += fun_multiplier; + } + else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) // has delimiter + { + break; + } + else + new_multiplier += String(pos->begin,pos->end); + ++pos; + } + + if (!new_multiplier.empty()) + multiplier = new_multiplier; + + String delimiter ; + if (pos->type == TokenType::Comma) + { + ++pos; + delimiter = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { String func_delimiter; + fun = KQLFunctionFactory::get(delimiter); + if (fun && fun->convert(func_delimiter,pos)) + delimiter = func_delimiter; + } + ++pos; + } + if (pos->type == TokenType::ClosingRoundBracket) + { + if (!delimiter.empty()) + { + String repeated_str = "repeat(concat("+value+"," + delimiter + ")," + multiplier + ")"; + res = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; + } + else + res = "repeat("+ value + ", " + multiplier + ")"; + out = res; + return true; + } + return false; +} + +bool SubString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToUpper::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Translate::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Trim::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TrimEnd::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TrimStart::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UrlDecode::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UrlEncode::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h new file mode 100644 index 000000000000..db7ab5077502 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h @@ -0,0 +1,267 @@ +#pragma once + +#include +#include +namespace DB +{ +class Base64EncodeToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_encode_tostring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64EncodeFromGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_encode_fromguid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_tostring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_toarray()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_toguid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "countof()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Extract : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ExtractAll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract_all()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ExtractJson : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extractjson()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class HasAnyIndex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_index()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IndexOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "indexof()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsEmpty : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isempty()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNotEmpty : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnotempty()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNotNull : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnotnull()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNull : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnull()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseCommandLine : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_command_line()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseCsv : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_csv()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseJson : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_json()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseUrl : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_url()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseUrlQuery : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_urlquery()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseVersion : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_version()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ReplaceRegex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "replace_regex()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Reverse : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "reverse()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Split : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "split()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCatDelim : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcat_delim()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCmp : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcmp()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrLen : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strlen()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrRep : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strrep()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SubString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "substring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToUpper : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toupper()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Translate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "translate()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Trim : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TrimEnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim_end()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TrimStart : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim_start()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UrlDecode : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "url_decode()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UrlEncode : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "url_encode()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 60fa022f9bb1..c3d0843b1f08 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -41,11 +41,13 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos return new_expr; } -String KQLOperators::getExprFromToken(IParser::Pos pos) +String KQLOperators::getExprFromToken(IParser::Pos &pos) { String res; std::vector tokens; + auto begin = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { KQLOperatorValue op_value = KQLOperatorValue::none; @@ -235,6 +237,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) for (auto & token : tokens) res = res + token + " "; + pos = begin; return res; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index a780e18d3339..9920593c7aec 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -9,7 +9,7 @@ namespace DB class KQLOperators { public: - String getExprFromToken(IParser::Pos pos); + String getExprFromToken(IParser::Pos &pos) ; protected: enum class WildcardsPos:uint8_t diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index d925f66b321b..f1348c4b3c68 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -8,7 +8,9 @@ #include #include #include - +#include +#include +#include namespace DB { @@ -18,12 +20,22 @@ bool ParserKQLBase :: parsePrepare(Pos & pos) return true; } -String ParserKQLBase :: getExprFromToken(Pos pos) +String ParserKQLBase :: getExprFromToken(Pos &pos) { String res; - while (!pos->isEnd() && pos->type != TokenType::PipeMark) + std::unique_ptr kql_function; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - res = res + String(pos->begin,pos->end) +" "; + String token = String(pos->begin,pos->end); + String new_token; + if (pos->type == TokenType::BareWord ) + { + kql_function = KQLFunctionFactory::get(token); + if (kql_function && kql_function->convert(new_token,pos)) + token = new_token; + } + res = res + token +" "; ++pos; } return res; @@ -106,6 +118,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; kql_summarize_p.setTableName(table_name); + kql_summarize_p.setFilterPos(kql_filter_p.op_pos); if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else @@ -113,6 +126,9 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) group_expression_list = kql_summarize_p.group_expression_list; if (kql_summarize_p.tables) tables = kql_summarize_p.tables; + + if (kql_summarize_p.where_expression) + where_expression = kql_summarize_p.where_expression; } select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 0545cd00cd9e..42122fb6e00a 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -7,12 +7,13 @@ namespace DB class ParserKQLBase : public IParserBase { public: - virtual bool parsePrepare(Pos & pos) ; + virtual bool parsePrepare(Pos & pos); + std::vector op_pos; protected: - std::vector op_pos; + std::vector expressions; - virtual String getExprFromToken(Pos pos); + virtual String getExprFromToken(Pos &pos); }; class ParserKQLQuery : public IParserBase diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 2afbad221314..cc4bece7ebf6 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -4,7 +4,7 @@ #include #include #include - +#include namespace DB { @@ -57,5 +57,16 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +/* +bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) +{ + return wrapParseImpl(pos, IncreaseDepthTag{}, [&] + { + bool res = convertImpl(out,pos); + if (!res) + out = ""; + return res; + }); +}*/ } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index b71af138e7e6..8a92412d87c0 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -10,7 +10,10 @@ class ParserKQLSummarize : public ParserKQLBase public: ASTPtr group_expression_list; ASTPtr tables; + ASTPtr where_expression; + void setTableName(String table_name_) {table_name = table_name_;} + void setFilterPos(std::vector &filter_pos_) {filter_pos = filter_pos_;} protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; @@ -18,6 +21,7 @@ class ParserKQLSummarize : public ParserKQLBase static String getBinGroupbyString(String expr_bin); private: String table_name; + std::vector filter_pos; }; } From 26ab8123e94c5b7426ccb81b81725dfa6044d0b1 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 21 Jun 2022 09:33:07 -0700 Subject: [PATCH 029/342] Kusto-phase2: Add KQL functions parser --- .../KustoFunctions/IParserKQLFunction.cpp | 45 ++- .../Kusto/KustoFunctions/IParserKQLFunction.h | 3 +- .../KQLAggregationFunctions.cpp | 244 +++++++++++- .../KustoFunctions/KQLAggregationFunctions.h | 245 ++++++++++++ .../KustoFunctions/KQLBinaryFunctions.cpp | 48 ++- .../Kusto/KustoFunctions/KQLBinaryFunctions.h | 48 +++ .../KustoFunctions/KQLCastingFunctions.cpp | 7 +- .../KustoFunctions/KQLCastingFunctions.h | 6 +- .../KustoFunctions/KQLDateTimeFunctions.cpp | 202 +++++++++- .../KustoFunctions/KQLDateTimeFunctions.h | 203 ++++++++++ .../KustoFunctions/KQLDynamicFunctions.cpp | 195 +++++++++- .../KustoFunctions/KQLDynamicFunctions.h | 195 ++++++++++ .../KustoFunctions/KQLFunctionFactory.cpp | 350 +++++++++--------- .../Kusto/KustoFunctions/KQLFunctionFactory.h | 190 +--------- .../KustoFunctions/KQLGeneralFunctions.cpp | 8 +- .../KustoFunctions/KQLGeneralFunctions.h | 6 + .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 90 ++++- .../Kusto/KustoFunctions/KQLIPFunctions.h | 90 +++++ .../KustoFunctions/KQLStringFunctions.cpp | 92 ++--- .../Kusto/KustoFunctions/KQLStringFunctions.h | 17 +- .../KustoFunctions/KQLTimeSeriesFunctions.cpp | 104 +++++- .../KustoFunctions/KQLTimeSeriesFunctions.h | 104 ++++++ src/Parsers/Kusto/ParserKQLFilter.cpp | 6 +- src/Parsers/Kusto/ParserKQLOperators.cpp | 43 ++- src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 16 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 11 - src/Parsers/Kusto/ParserKQLStatement.h | 1 - src/Parsers/Kusto/ParserKQLTable.h | 1 - 29 files changed, 2080 insertions(+), 492 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 5455f41a0c22..e7134678e958 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -15,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -30,4 +30,47 @@ bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) }); } +bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const String &ch_fn) +{ + std::unique_ptr fun; + std::vector args; + + String res =ch_fn + "("; + out = res; + auto begin = pos; + + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + pos = begin; + return false; + } + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + String tmp_arg = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { + String new_arg; + fun = KQLFunctionFactory::get(tmp_arg); + if (fun && fun->convert(new_arg,pos)) + tmp_arg = new_arg; + } + else if (pos->type == TokenType::ClosingRoundBracket) + { + for (auto arg : args) + res+=arg; + + res += ")"; + out = res; + return true; + } + args.push_back(tmp_arg); + } + + pos = begin; + return false; +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 81bf97f390ba..c633f78fa335 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -4,7 +4,7 @@ #include namespace DB { -class IParserKQLFunction //: public IParser +class IParserKQLFunction { public: template @@ -33,6 +33,7 @@ class IParserKQLFunction //: public IParser virtual ~IParserKQLFunction() = default; protected: virtual bool convertImpl(String &out,IParser::Pos &pos) = 0; + static bool directMapping(String &out,IParser::Pos &pos,const String &ch_fn); }; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 20b4b880a83f..91c3639ace40 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,249 @@ namespace DB { +bool ArgMax::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArgMin::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Avg::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool AvgIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryAllAnd::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryAllOr::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryAllXor::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BuildSchema::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Count::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool CountIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DCount::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DCountIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeBag::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeBagIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeList::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeListIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeListWithNulls::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeSet::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeSetIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Max::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MaxIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Min::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MinIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Percentiles::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Stdev::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StdevIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Sum::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SumIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool TakeAny::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TakeAnyIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Variance::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool VarianceIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h index 457590328262..6e7130420f4c 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h @@ -4,6 +4,251 @@ #include namespace DB { +class ArgMax : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "arg_max()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArgMin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "arg_min()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Avg : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "avg()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class AvgIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "avgif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllAnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_and()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllOr : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_or()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllXor : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_xor()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BuildSchema : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "buildschema()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Count : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "count()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "countif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DCount : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcount()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DCountIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcountif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeBag : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_bag()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeBagIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_bag_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeList : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeListIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeListWithNulls : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list_with_nulls()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeSet : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_set()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeSetIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_set_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Max : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "max()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MaxIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "maxif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Min : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "min()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MinIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "minif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentiles : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentiles()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PercentilesArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentiles_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentilesw : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilesw()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PercentileswArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilesw_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Stdev : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "stdev()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StdevIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "stdevif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Sum : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sum()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SumIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sumif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TakeAny : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "take_any()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TakeAnyIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "take_anyif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Variance : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "variance()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class VarianceIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "varianceif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp index 20b4b880a83f..2a06c4e715be 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,53 @@ namespace DB { +bool BinaryAnd::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryNot::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryOr::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryShiftLeft::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryShiftRight::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryXor::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool BitsetCountOnes::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h index 457590328262..94ca3a5abbfe 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h @@ -4,6 +4,54 @@ #include namespace DB { +class BinaryAnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_and()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryNot : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_not()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryOr : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_or()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryShiftLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_shift_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryShiftRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_shift_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryXor : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_xor()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BitsetCountOnes : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bitset_count_ones()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp index 5f43aa16d8e2..9129d82aa780 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -6,14 +5,14 @@ namespace DB { -bool Tobool::convertImpl(String &out,IParser::Pos &pos) +bool ToBool::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ToDatetime::convertImpl(String &out,IParser::Pos &pos) +bool ToDateTime::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; @@ -41,7 +40,7 @@ bool ToString::convertImpl(String &out,IParser::Pos &pos) return false; } -bool ToTimespan::convertImpl(String &out,IParser::Pos &pos) +bool ToTimeSpan::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h index ab73fb3fc218..fa6a20e60687 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h @@ -4,14 +4,14 @@ #include namespace DB { -class Tobool : public IParserKQLFunction +class ToBool : public IParserKQLFunction { protected: const char * getName() const override { return "tobool()";} bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ToDatetime : public IParserKQLFunction +class ToDateTime : public IParserKQLFunction { protected: const char * getName() const override { return "todatetime()";} @@ -39,7 +39,7 @@ class ToString : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ToTimespan : public IParserKQLFunction +class ToTimeSpan : public IParserKQLFunction { protected: const char * getName() const override { return "totimespan()";} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 20b4b880a83f..0f098cbebda3 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,207 @@ namespace DB { +bool TimeSpan::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ago::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatetimeAdd::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +}; + +bool DatetimePart::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatetimeDiff::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool DayOfMonth::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DayOfWeek::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DayOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool EndOfDay::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool EndOfWeek::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool EndOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool FormatDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool FormatTimeSpan::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool GetMonth::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool GetYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool HoursOfDay::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeTimeSpan::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Now::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StartOfDay::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StartOfMonth::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StartOfWeek::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StartOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UnixTimeMicrosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UnixTimeMillisecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UnixTimeNanosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UnixTimeSecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool WeekOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h index 457590328262..7627465ab5bc 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -5,5 +5,208 @@ namespace DB { +class TimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ago : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ago()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimeAdd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_add()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimePart : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_part()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimeDiff : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_diff()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class GetMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "getmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class GetYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "getyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class HoursOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "hoursofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Now : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "now()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeMicrosecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_microseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeMillisecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_milliseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeNanosecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_nanoseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeSecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_seconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class WeekOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "weekofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp index 20b4b880a83f..a6ff0a374ebc 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,200 @@ namespace DB { +bool ArrayConcat::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayIif::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayIndexOf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayLength::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayReverse::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayRotateLeft::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayRotateRight::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayShiftLeft::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayShiftRight::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySlice::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySortAsc::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySortDesc::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySplit::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySum::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool BagKeys::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BagMerge::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BagRemoveKeys::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool JaccardIndex::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Pack::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool PackAll::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool PackArray::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Repeat::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SetDifference::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SetHasElement::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SetIntersect::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SetUnion::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TreePath::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Zip::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h index 457590328262..e36fd60eaeaf 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h @@ -4,6 +4,201 @@ #include namespace DB { +class ArrayConcat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_concat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayIif : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_iif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayIndexOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_index_of()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayLength : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_length()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayReverse : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_reverse()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayRotateLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_rotate_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayRotateRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_rotate_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayShiftLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_shift_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayShiftRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_shift_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySlice : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_slice()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySortAsc : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sort_asc()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySortDesc : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sort_desc()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySplit : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_split()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySum : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sum()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagKeys : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_keys()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagMerge : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_merge()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagRemoveKeys : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_remove_keys()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class JaccardIndex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "jaccard_index()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Pack : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PackAll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack_all()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PackArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Repeat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "repeat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetDifference : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_difference()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetHasElement : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_has_element()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetIntersect : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_intersect()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetUnion : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_union()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TreePath : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "treepath()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Zip : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "zip()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index 528f906e51e7..25e0c2af2f91 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -65,7 +64,9 @@ namespace DB {"indexof", KQLFunctionValue::indexof}, {"isempty", KQLFunctionValue::isempty}, {"isnotempty", KQLFunctionValue::isnotempty}, + {"notempty", KQLFunctionValue::isnotempty}, {"isnotnull", KQLFunctionValue::isnotnull}, + {"notnull", KQLFunctionValue::isnotnull}, {"isnull", KQLFunctionValue::isnull}, {"parse_command_line", KQLFunctionValue::parse_command_line}, {"parse_csv", KQLFunctionValue::parse_csv}, @@ -82,6 +83,7 @@ namespace DB {"strlen", KQLFunctionValue::strlen}, {"strrep", KQLFunctionValue::strrep}, {"substring", KQLFunctionValue::substring}, + {"tolower", KQLFunctionValue::tolower}, {"toupper", KQLFunctionValue::toupper}, {"translate", KQLFunctionValue::translate}, {"trim", KQLFunctionValue::trim}, @@ -206,12 +208,6 @@ namespace DB std::unique_ptr KQLFunctionFactory::get(String &kql_function) { -/* if (kql_function=="strrep") - return std::make_unique(); - else if (kql_function=="strcat") - return std::make_unique(); - else - return nullptr;*/ if (kql_functions.find(kql_function) == kql_functions.end()) return nullptr; @@ -222,293 +218,295 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function return nullptr; case KQLFunctionValue::timespan: - return nullptr; + return std::make_unique(); case KQLFunctionValue::datetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ago: - return nullptr; + return std::make_unique(); case KQLFunctionValue::datetime_add: - return nullptr; + return std::make_unique(); case KQLFunctionValue::datetime_part: - return nullptr; + return std::make_unique(); case KQLFunctionValue::datetime_diff: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dayofmonth: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dayofweek: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dayofyear: - return nullptr; + return std::make_unique(); case KQLFunctionValue::endofday: - return nullptr; + return std::make_unique(); case KQLFunctionValue::endofweek: - return nullptr; + return std::make_unique(); case KQLFunctionValue::endofyear: - return nullptr; + return std::make_unique(); case KQLFunctionValue::format_datetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::format_timespan: - return nullptr; + return std::make_unique(); case KQLFunctionValue::getmonth: - return nullptr; + return std::make_unique(); case KQLFunctionValue::getyear: - return nullptr; + return std::make_unique(); case KQLFunctionValue::hoursofday: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_timespan: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_datetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::now: - return nullptr; + return std::make_unique(); case KQLFunctionValue::startofday: - return nullptr; + return std::make_unique(); case KQLFunctionValue::startofmonth: - return nullptr; + return std::make_unique(); case KQLFunctionValue::startofweek: - return nullptr; + return std::make_unique(); case KQLFunctionValue::startofyear: - return nullptr; + return std::make_unique(); case KQLFunctionValue::unixtime_microseconds_todatetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::unixtime_milliseconds_todatetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::unixtime_nanoseconds_todatetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::unixtime_seconds_todatetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::weekofyear: - return nullptr; - + return std::make_unique(); case KQLFunctionValue::base64_encode_tostring: - return nullptr; + return std::make_unique(); case KQLFunctionValue::base64_encode_fromguid: - return nullptr; + return std::make_unique(); case KQLFunctionValue::base64_decode_tostring: - return nullptr; + return std::make_unique(); case KQLFunctionValue::base64_decode_toarray: - return nullptr; + return std::make_unique(); case KQLFunctionValue::base64_decode_toguid: - return nullptr; + return std::make_unique(); case KQLFunctionValue::countof: - return nullptr; + return std::make_unique(); case KQLFunctionValue::extract: - return nullptr; + return std::make_unique(); case KQLFunctionValue::extract_all: - return nullptr; + return std::make_unique(); case KQLFunctionValue::extractjson: - return nullptr; + return std::make_unique(); case KQLFunctionValue::has_any_index: - return nullptr; + return std::make_unique(); case KQLFunctionValue::indexof: - return nullptr; + return std::make_unique(); case KQLFunctionValue::isempty: - return nullptr; + return std::make_unique(); case KQLFunctionValue::isnotempty: - return nullptr; + return std::make_unique(); case KQLFunctionValue::isnotnull: - return nullptr; + return std::make_unique(); case KQLFunctionValue::isnull: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_command_line: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_csv: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_json: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_url: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_urlquery: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_version: - return nullptr; + return std::make_unique(); case KQLFunctionValue::replace_regex: - return nullptr; + return std::make_unique(); case KQLFunctionValue::reverse: - return nullptr; + return std::make_unique(); case KQLFunctionValue::split: - return nullptr; + return std::make_unique(); case KQLFunctionValue::strcat: return std::make_unique(); case KQLFunctionValue::strcat_delim: - return nullptr; + return std::make_unique(); case KQLFunctionValue::strcmp: - return nullptr; + return std::make_unique(); case KQLFunctionValue::strlen: - return nullptr; + return std::make_unique(); case KQLFunctionValue::strrep: return std::make_unique(); case KQLFunctionValue::substring: - return nullptr; + return std::make_unique(); + + case KQLFunctionValue::tolower: + return std::make_unique(); case KQLFunctionValue::toupper: - return nullptr; + return std::make_unique(); case KQLFunctionValue::translate: - return nullptr; + return std::make_unique(); case KQLFunctionValue::trim: - return nullptr; + return std::make_unique(); case KQLFunctionValue::trim_end: - return nullptr; + return std::make_unique(); case KQLFunctionValue::trim_start: - return nullptr; + return std::make_unique(); case KQLFunctionValue::url_decode: - return nullptr; + return std::make_unique(); case KQLFunctionValue::url_encode: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_concat: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_iif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_index_of: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_length: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_reverse: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_rotate_left: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_rotate_right: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_shift_left: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_shift_right: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_slice: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_sort_asc: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_sort_desc: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_split: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_sum: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bag_keys: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bag_merge: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bag_remove_keys: - return nullptr; + return std::make_unique(); case KQLFunctionValue::jaccard_index: - return nullptr; + return std::make_unique(); case KQLFunctionValue::pack: - return nullptr; + return std::make_unique(); case KQLFunctionValue::pack_all: - return nullptr; + return std::make_unique(); case KQLFunctionValue::pack_array: - return nullptr; + return std::make_unique(); case KQLFunctionValue::repeat: - return nullptr; + return std::make_unique(); case KQLFunctionValue::set_difference: - return nullptr; + return std::make_unique(); case KQLFunctionValue::set_has_element: - return nullptr; + return std::make_unique(); case KQLFunctionValue::set_intersect: - return nullptr; + return std::make_unique(); case KQLFunctionValue::set_union: - return nullptr; + return std::make_unique(); case KQLFunctionValue::treepath: - return nullptr; + return std::make_unique(); case KQLFunctionValue::zip: - return nullptr; + return std::make_unique(); case KQLFunctionValue::tobool: - return std::make_unique(); + return std::make_unique(); case KQLFunctionValue::todatetime: - return std::make_unique(); + return std::make_unique(); case KQLFunctionValue::todouble: return std::make_unique(); @@ -520,222 +518,220 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function return std::make_unique(); case KQLFunctionValue::totimespan: - return std::make_unique(); + return std::make_unique(); case KQLFunctionValue::arg_max: - return nullptr; + return std::make_unique(); case KQLFunctionValue::arg_min: - return nullptr; + return std::make_unique(); case KQLFunctionValue::avg: - return nullptr; + return std::make_unique(); case KQLFunctionValue::avgif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_all_and: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_all_or: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_all_xor: - return nullptr; + return std::make_unique(); + case KQLFunctionValue::buildschema: - return nullptr; + return std::make_unique(); case KQLFunctionValue::count: - return nullptr; + return std::make_unique(); case KQLFunctionValue::countif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dcount: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dcountif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_bag: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_bag_if: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_list: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_list_if: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_list_with_nulls: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_set: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_set_if: - return nullptr; + return std::make_unique(); case KQLFunctionValue::max: - return nullptr; + return std::make_unique(); case KQLFunctionValue::maxif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::min: - return nullptr; + return std::make_unique(); case KQLFunctionValue::minif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::percentiles: - return nullptr; + return std::make_unique(); case KQLFunctionValue::percentiles_array: - return nullptr; + return std::make_unique(); case KQLFunctionValue::percentilesw: - return nullptr; + return std::make_unique(); case KQLFunctionValue::percentilesw_array: - return nullptr; + return std::make_unique(); case KQLFunctionValue::stdev: - return nullptr; + return std::make_unique(); case KQLFunctionValue::stdevif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::sum: - return nullptr; + return std::make_unique(); case KQLFunctionValue::sumif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::take_any: - return nullptr; + return std::make_unique(); case KQLFunctionValue::take_anyif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::variance: - return nullptr; + return std::make_unique(); case KQLFunctionValue::varianceif: - return nullptr; - + return std::make_unique(); case KQLFunctionValue::series_fir: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_iir: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fit_line: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fit_line_dynamic: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fit_2lines: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fit_2lines_dynamic: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_outliers: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_periods_detect: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_periods_validate: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_stats_dynamic: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_stats: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fill_backward: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fill_const: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fill_forward: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fill_linear: - return nullptr; - + return std::make_unique(); case KQLFunctionValue::ipv4_compare: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv4_is_in_range: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv4_is_match: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv4_is_private: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv4_netmask_suffix: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_ipv4: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_ipv4_mask: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv6_compare: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv6_is_match: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_ipv6: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_ipv6_mask: - return nullptr; + return std::make_unique(); case KQLFunctionValue::format_ipv4: - return nullptr; + return std::make_unique(); case KQLFunctionValue::format_ipv4_mask: - return nullptr; - + return std::make_unique(); case KQLFunctionValue::binary_and: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_not: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_or: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_shift_left: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_shift_right: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_xor: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bitset_count_ones: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bin: - return nullptr; + return std::make_unique(); } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index 86e879b4668e..8f57133c0713 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -69,6 +69,7 @@ namespace DB strlen, strrep, substring, + tolower, toupper, translate, trim, @@ -187,199 +188,14 @@ namespace DB bin }; - + class KQLFunctionFactory { public: static std::unique_ptr get(String &kql_function); protected: - - - static std::unordered_map kql_functions;/* = - { - {"datetime", KQLFunctionValue::datetime}, - {"ago", KQLFunctionValue::ago}, - {"datetime_add", KQLFunctionValue::datetime_add}, - {"datetime_part", KQLFunctionValue::datetime_part}, - {"datetime_diff", KQLFunctionValue::datetime_diff}, - {"dayofmonth", KQLFunctionValue::dayofmonth}, - {"dayofweek", KQLFunctionValue::dayofweek}, - {"dayofyear", KQLFunctionValue::dayofyear}, - {"endofday", KQLFunctionValue::endofday}, - {"endofweek", KQLFunctionValue::endofweek}, - {"endofyear", KQLFunctionValue::endofyear}, - {"format_datetime", KQLFunctionValue::format_datetime}, - {"format_timespan", KQLFunctionValue::format_timespan}, - {"getmonth", KQLFunctionValue::getmonth}, - {"getyear", KQLFunctionValue::getyear}, - {"hoursofday", KQLFunctionValue::hoursofday}, - {"make_timespan", KQLFunctionValue::make_timespan}, - {"make_datetime", KQLFunctionValue::make_datetime}, - {"now", KQLFunctionValue::now}, - {"startofday", KQLFunctionValue::startofday}, - {"startofmonth", KQLFunctionValue::startofmonth}, - {"startofweek", KQLFunctionValue::startofweek}, - {"startofyear", KQLFunctionValue::startofyear}, - {"todatetime", KQLFunctionValue::todatetime}, - {"totimespan", KQLFunctionValue::totimespan}, - {"unixtime_microseconds_todatetime", KQLFunctionValue::unixtime_microseconds_todatetime}, - {"unixtime_milliseconds_todatetime", KQLFunctionValue::unixtime_milliseconds_todatetime}, - {"unixtime_nanoseconds_todatetime", KQLFunctionValue::unixtime_nanoseconds_todatetime}, - {"unixtime_seconds_todatetime", KQLFunctionValue::unixtime_seconds_todatetime}, - {"weekofyear", KQLFunctionValue::weekofyear}, - - {"base64_encode_tostring", KQLFunctionValue::base64_encode_tostring}, - {"base64_encode_fromguid", KQLFunctionValue::base64_encode_fromguid}, - {"base64_decode_tostring", KQLFunctionValue::base64_decode_tostring}, - {"base64_decode_toarray", KQLFunctionValue::base64_decode_toarray}, - {"base64_decode_toguid", KQLFunctionValue::base64_decode_toguid}, - {"countof", KQLFunctionValue::countof}, - {"extract", KQLFunctionValue::extract}, - {"extract_all", KQLFunctionValue::extract_all}, - {"extractjson", KQLFunctionValue::extractjson}, - {"has_any_index", KQLFunctionValue::has_any_index}, - {"indexof", KQLFunctionValue::indexof}, - {"isempty", KQLFunctionValue::isempty}, - {"isnotempty", KQLFunctionValue::isnotempty}, - {"isnotnull", KQLFunctionValue::isnotnull}, - {"isnull", KQLFunctionValue::isnull}, - {"parse_command_line", KQLFunctionValue::parse_command_line}, - {"parse_csv", KQLFunctionValue::parse_csv}, - {"parse_json", KQLFunctionValue::parse_json}, - {"parse_url", KQLFunctionValue::parse_url}, - {"parse_urlquery", KQLFunctionValue::parse_urlquery}, - {"parse_version", KQLFunctionValue::parse_version}, - {"replace_regex", KQLFunctionValue::replace_regex}, - {"reverse", KQLFunctionValue::reverse}, - {"split", KQLFunctionValue::split}, - {"strcat", KQLFunctionValue::strcat}, - {"strcat_delim", KQLFunctionValue::strcat_delim}, - {"strcmp", KQLFunctionValue::strcmp}, - {"strlen", KQLFunctionValue::strlen}, - {"strrep", KQLFunctionValue::strrep}, - {"substring", KQLFunctionValue::substring}, - {"toupper", KQLFunctionValue::toupper}, - {"translate", KQLFunctionValue::translate}, - {"trim", KQLFunctionValue::trim}, - {"trim_end", KQLFunctionValue::trim_end}, - {"trim_start", KQLFunctionValue::trim_start}, - {"url_decode", KQLFunctionValue::url_decode}, - {"url_encode", KQLFunctionValue::url_encode}, - - {"array_concat", KQLFunctionValue::array_concat}, - {"array_iif", KQLFunctionValue::array_iif}, - {"array_index_of", KQLFunctionValue::array_index_of}, - {"array_length", KQLFunctionValue::array_length}, - {"array_reverse", KQLFunctionValue::array_reverse}, - {"array_rotate_left", KQLFunctionValue::array_rotate_left}, - {"array_rotate_right", KQLFunctionValue::array_rotate_right}, - {"array_shift_left", KQLFunctionValue::array_shift_left}, - {"array_shift_right", KQLFunctionValue::array_shift_right}, - {"array_slice", KQLFunctionValue::array_slice}, - {"array_sort_asc", KQLFunctionValue::array_sort_asc}, - {"array_sort_desc", KQLFunctionValue::array_sort_desc}, - {"array_split", KQLFunctionValue::array_split}, - {"array_sum", KQLFunctionValue::array_sum}, - {"bag_keys", KQLFunctionValue::bag_keys}, - {"bag_merge", KQLFunctionValue::bag_merge}, - {"bag_remove_keys", KQLFunctionValue::bag_remove_keys}, - {"jaccard_index", KQLFunctionValue::jaccard_index}, - {"pack", KQLFunctionValue::pack}, - {"pack_all", KQLFunctionValue::pack_all}, - {"pack_array", KQLFunctionValue::pack_array}, - {"repeat", KQLFunctionValue::repeat}, - {"set_difference", KQLFunctionValue::set_difference}, - {"set_has_element", KQLFunctionValue::set_has_element}, - {"set_intersect", KQLFunctionValue::set_intersect}, - {"set_union", KQLFunctionValue::set_union}, - {"treepath", KQLFunctionValue::treepath}, - {"zip", KQLFunctionValue::zip}, - - {"tobool", KQLFunctionValue::tobool}, - {"toboolean", KQLFunctionValue::tobool}, - {"todouble", KQLFunctionValue::todouble}, - {"toint", KQLFunctionValue::toint}, - {"toreal", KQLFunctionValue::todouble}, - {"tostring", KQLFunctionValue::tostring}, - {"totimespan", KQLFunctionValue::totimespan}, - - {"arg_max", KQLFunctionValue::arg_max}, - {"arg_min", KQLFunctionValue::arg_min}, - {"avg", KQLFunctionValue::avg}, - {"avgif", KQLFunctionValue::avgif}, - {"binary_all_and", KQLFunctionValue::binary_all_and}, - {"binary_all_or", KQLFunctionValue::binary_all_or}, - {"binary_all_xor", KQLFunctionValue::binary_all_xor}, - {"buildschema", KQLFunctionValue::buildschema}, - {"count", KQLFunctionValue::count}, - {"countif", KQLFunctionValue::countif}, - {"dcount", KQLFunctionValue::dcount}, - {"dcountif", KQLFunctionValue::dcountif}, - {"make_bag", KQLFunctionValue::make_bag}, - {"make_bag_if", KQLFunctionValue::make_bag_if}, - {"make_list", KQLFunctionValue::make_list}, - {"make_list_if", KQLFunctionValue::make_list_if}, - {"make_list_with_nulls", KQLFunctionValue::make_list_with_nulls}, - {"make_set", KQLFunctionValue::make_set}, - {"make_set_if", KQLFunctionValue::make_set_if}, - {"max", KQLFunctionValue::max}, - {"maxif", KQLFunctionValue::maxif}, - {"min", KQLFunctionValue::min}, - {"minif", KQLFunctionValue::minif}, - {"percentiles", KQLFunctionValue::percentiles}, - {"percentiles_array", KQLFunctionValue::percentiles_array}, - {"percentilesw", KQLFunctionValue::percentilesw}, - {"percentilesw_array", KQLFunctionValue::percentilesw_array}, - {"stdev", KQLFunctionValue::stdev}, - {"stdevif", KQLFunctionValue::stdevif}, - {"sum", KQLFunctionValue::sum}, - {"sumif", KQLFunctionValue::sumif}, - {"take_any", KQLFunctionValue::take_any}, - {"take_anyif", KQLFunctionValue::take_anyif}, - {"variance", KQLFunctionValue::variance}, - {"varianceif", KQLFunctionValue::varianceif}, - - {"series_fir", KQLFunctionValue::series_fir}, - {"series_iir", KQLFunctionValue::series_iir}, - {"series_fit_line", KQLFunctionValue::series_fit_line}, - {"series_fit_line_dynamic", KQLFunctionValue::series_fit_line_dynamic}, - {"series_fit_2lines", KQLFunctionValue::series_fit_2lines}, - {"series_fit_2lines_dynamic", KQLFunctionValue::series_fit_2lines_dynamic}, - {"series_outliers", KQLFunctionValue::series_outliers}, - {"series_periods_detect", KQLFunctionValue::series_periods_detect}, - {"series_periods_validate", KQLFunctionValue::series_periods_validate}, - {"series_stats_dynamic", KQLFunctionValue::series_stats_dynamic}, - {"series_stats", KQLFunctionValue::series_stats}, - {"series_fill_backward", KQLFunctionValue::series_fill_backward}, - {"series_fill_const", KQLFunctionValue::series_fill_const}, - {"series_fill_forward", KQLFunctionValue::series_fill_forward}, - {"series_fill_linear", KQLFunctionValue::series_fill_linear}, - - {"ipv4_compare", KQLFunctionValue::ipv4_compare}, - {"ipv4_is_in_range", KQLFunctionValue::ipv4_is_in_range}, - {"ipv4_is_match", KQLFunctionValue::ipv4_is_match}, - {"ipv4_is_private", KQLFunctionValue::ipv4_is_private}, - {"ipv4_netmask_suffix", KQLFunctionValue::ipv4_netmask_suffix}, - {"parse_ipv4", KQLFunctionValue::parse_ipv4}, - {"parse_ipv4_mask", KQLFunctionValue::parse_ipv4_mask}, - {"ipv6_compare", KQLFunctionValue::ipv6_compare}, - {"ipv6_is_match", KQLFunctionValue::ipv6_is_match}, - {"parse_ipv6", KQLFunctionValue::parse_ipv6}, - {"parse_ipv6_mask", KQLFunctionValue::parse_ipv6_mask}, - {"format_ipv4", KQLFunctionValue::format_ipv4}, - {"format_ipv4_mask", KQLFunctionValue::format_ipv4_mask}, - - {"binary_and", KQLFunctionValue::binary_and}, - {"binary_not", KQLFunctionValue::binary_not}, - {"binary_or", KQLFunctionValue::binary_or}, - {"binary_shift_left", KQLFunctionValue::binary_shift_left}, - {"binary_shift_right", KQLFunctionValue::binary_shift_right}, - {"binary_xor", KQLFunctionValue::binary_xor}, - {"bitset_count_ones", KQLFunctionValue::bitset_count_ones}, - {"bin", KQLFunctionValue::bin} - };*/ - + static std::unordered_map kql_functions; }; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp index 20b4b880a83f..253292a7d9df 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,11 @@ namespace DB { - +bool Bin::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h index 457590328262..802fd152333f 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h @@ -4,6 +4,12 @@ #include namespace DB { +class Bin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bin()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 20b4b880a83f..f271d924affc 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,95 @@ namespace DB { +bool Ipv4Compare::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv4IsInRange::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv4IsMatch::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv4IsPrivate::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv4NetmaskSuffix::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseIpv4::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseIpv4Mask::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv6Compare::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv6IsMatch::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool ParseIpv6::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseIpv6Mask::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool FormatIpv4::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool FormatIpv4Mask::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h index 457590328262..3ee5dda4c839 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h @@ -4,6 +4,96 @@ #include namespace DB { +class Ipv4Compare : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_compare()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv4IsInRange : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_in_range()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv4IsMatch : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_match()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv4IsPrivate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_private()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv4NetmaskSuffix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_netmask_suffix()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv4()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseIpv4Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv4_mask()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv6Compare : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv6_compare()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv6IsMatch : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv6_is_match()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseIpv6 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv6()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseIpv6Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv6_mask()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_ipv4()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatIpv4Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_ipv4_mask()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 851c631d1ceb..a7f7c373566d 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -8,16 +8,12 @@ namespace DB bool Base64EncodeToString::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"base64Encode"); } bool Base64EncodeFromGuid::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"base64Decode"); } bool Base64DecodeToString::convertImpl(String &out,IParser::Pos &pos) @@ -85,23 +81,17 @@ bool IndexOf::convertImpl(String &out,IParser::Pos &pos) bool IsEmpty::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"empty"); } bool IsNotEmpty::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"notEmpty"); } bool IsNotNull::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"isNotNull"); } bool ParseCommandLine::convertImpl(String &out,IParser::Pos &pos) @@ -113,12 +103,10 @@ bool ParseCommandLine::convertImpl(String &out,IParser::Pos &pos) bool IsNull::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"isNull"); } -bool ParseCsv::convertImpl(String &out,IParser::Pos &pos) +bool ParseCSV::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; @@ -132,14 +120,14 @@ bool ParseJson::convertImpl(String &out,IParser::Pos &pos) return false; } -bool ParseUrl::convertImpl(String &out,IParser::Pos &pos) +bool ParseURL::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ParseUrlQuery::convertImpl(String &out,IParser::Pos &pos) +bool ParseURLQuery::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; @@ -176,39 +164,7 @@ bool Split::convertImpl(String &out,IParser::Pos &pos) bool StrCat::convertImpl(String &out,IParser::Pos &pos) { - std::unique_ptr fun; - std::vector args; - String res = "concat("; - - ++pos; - if (pos->type != TokenType::OpeningRoundBracket) - { - --pos; - return false; - } - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) - { - ++pos; - String tmp_arg = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) - { - String new_arg; - fun = KQLFunctionFactory::get(tmp_arg); - if (fun && fun->convert(new_arg,pos)) - tmp_arg = new_arg; - } - else if (pos->type == TokenType::ClosingRoundBracket) - { - for (auto arg : args) - res+=arg; - - res += ")"; - out = res; - return true; - } - args.push_back(tmp_arg); - } - return false; + return directMapping(out,pos,"concat"); } bool StrCatDelim::convertImpl(String &out,IParser::Pos &pos) @@ -227,9 +183,7 @@ bool StrCmp::convertImpl(String &out,IParser::Pos &pos) bool StrLen::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"lengthUTF8"); } bool StrRep::convertImpl(String &out,IParser::Pos &pos) @@ -265,6 +219,8 @@ bool StrRep::convertImpl(String &out,IParser::Pos &pos) fun = KQLFunctionFactory::get(multiplier); if ( fun && fun->convert(fun_multiplier,pos)) new_multiplier += fun_multiplier; + else + new_multiplier = multiplier; } else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) // has delimiter { @@ -313,11 +269,15 @@ bool SubString::convertImpl(String &out,IParser::Pos &pos) return false; } +bool ToLower::convertImpl(String &out,IParser::Pos &pos) +{ + return directMapping(out,pos,"lower"); +} + + bool ToUpper::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"upper"); } bool Translate::convertImpl(String &out,IParser::Pos &pos) @@ -348,18 +308,14 @@ bool TrimStart::convertImpl(String &out,IParser::Pos &pos) return false; } -bool UrlDecode::convertImpl(String &out,IParser::Pos &pos) +bool URLDecode::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"decodeURLComponent"); } -bool UrlEncode::convertImpl(String &out,IParser::Pos &pos) +bool URLEncode::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"encodeURLComponent"); } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h index db7ab5077502..43840c1253f1 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h @@ -116,7 +116,7 @@ class ParseCommandLine : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ParseCsv : public IParserKQLFunction +class ParseCSV : public IParserKQLFunction { protected: const char * getName() const override { return "parse_csv()"; } @@ -130,14 +130,14 @@ class ParseJson : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ParseUrl : public IParserKQLFunction +class ParseURL : public IParserKQLFunction { protected: const char * getName() const override { return "parse_url()"; } bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ParseUrlQuery : public IParserKQLFunction +class ParseURLQuery : public IParserKQLFunction { protected: const char * getName() const override { return "parse_urlquery()"; } @@ -214,6 +214,13 @@ class SubString : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; +class ToLower : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tolower()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + class ToUpper : public IParserKQLFunction { protected: @@ -249,14 +256,14 @@ class TrimStart : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; -class UrlDecode : public IParserKQLFunction +class URLDecode : public IParserKQLFunction { protected: const char * getName() const override { return "url_decode()"; } bool convertImpl(String &out,IParser::Pos &pos) override; }; -class UrlEncode : public IParserKQLFunction +class URLEncode : public IParserKQLFunction { protected: const char * getName() const override { return "url_encode()"; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp index 20b4b880a83f..74b7811f29ef 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,109 @@ namespace DB { +bool SeriesFir::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesIir::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFitLine::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFitLineDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFit2lines::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFit2linesDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesOutliers::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesPeriodsDetect::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesPeriodsValidate::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesStatsDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesStats::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillBackward::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillConst::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool SeriesFillForward::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillLinear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h index 457590328262..fa97dec151c7 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h @@ -4,6 +4,110 @@ #include namespace DB { +class SeriesFir : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fir()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesIir : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_iir()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFitLine : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_line()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFitLineDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_line_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFit2lines : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_2lines()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFit2linesDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_2lines_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesOutliers : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_outliers()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesPeriodsDetect : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_periods_detect()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesPeriodsValidate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_periods_validate()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesStatsDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_stats_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesStats : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_stats()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillBackward : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_backward()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillConst : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_const()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillForward : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_forward()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillLinear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_linear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 466370f5d803..ceb59f1d86ed 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -15,14 +15,12 @@ bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Pos begin = pos; String expr; - KQLOperators convetor; - for (auto op_po : op_pos) { if (expr.empty()) - expr = "(" + convetor.getExprFromToken(op_po) +")"; + expr = "(" + getExprFromToken(op_po) +")"; else - expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; + expr = expr + " and (" + getExprFromToken(op_po) +")"; } Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index c3d0843b1f08..b68d27a10f00 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include namespace DB { @@ -33,22 +35,33 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; } - if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + ++token_pos; + + if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; + else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) + { + String tmp_arg = String(token_pos->begin,token_pos->end); + if (token_pos->type == TokenType::BareWord ) + { + String new_arg; + auto fun = KQLFunctionFactory::get(tmp_arg); + if (fun && fun->convert(new_arg,token_pos)) + tmp_arg = new_arg; + } + new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + "', " + tmp_arg +", '"+ right_wildcards + "'))"; + } else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } -String KQLOperators::getExprFromToken(IParser::Pos &pos) +bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) { - String res; - std::vector tokens; - auto begin = pos; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { KQLOperatorValue op_value = KQLOperatorValue::none; @@ -89,8 +102,13 @@ String KQLOperators::getExprFromToken(IParser::Pos &pos) else --pos; - if (KQLOperator.find(op) != KQLOperator.end()) - op_value = KQLOperator[op]; + if (KQLOperator.find(op) == KQLOperator.end()) + { + pos = begin; + return false; + } + + op_value = KQLOperator[op]; String new_expr; if (op_value == KQLOperatorValue::none) @@ -231,14 +249,9 @@ String KQLOperators::getExprFromToken(IParser::Pos &pos) tokens.push_back(new_expr); } - ++pos; + return true; } - - for (auto & token : tokens) - res = res + token + " "; - - pos = begin; - return res; + return false; } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9920593c7aec..969a1e5c48a0 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -9,7 +9,7 @@ namespace DB class KQLOperators { public: - String getExprFromToken(IParser::Pos &pos) ; + bool convert(std::vector &tokens,IParser::Pos &pos); protected: enum class WildcardsPos:uint8_t diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index f1348c4b3c68..0334722041fb 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -23,21 +23,27 @@ bool ParserKQLBase :: parsePrepare(Pos & pos) String ParserKQLBase :: getExprFromToken(Pos &pos) { String res; + std::vector tokens; std::unique_ptr kql_function; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); String new_token; - if (pos->type == TokenType::BareWord ) + if (!KQLOperators().convert(tokens,pos)) { - kql_function = KQLFunctionFactory::get(token); - if (kql_function && kql_function->convert(new_token,pos)) - token = new_token; + if (pos->type == TokenType::BareWord ) + { + kql_function = KQLFunctionFactory::get(token); + if (kql_function && kql_function->convert(new_token,pos)) + token = new_token; + } + tokens.push_back(token); } - res = res + token +" "; ++pos; } + for (auto token:tokens) + res = res + token +" "; return res; } diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index cc4bece7ebf6..6ce29b8024f9 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -57,16 +57,5 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } -/* -bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) -{ - return wrapParseImpl(pos, IncreaseDepthTag{}, [&] - { - bool res = convertImpl(out,pos); - if (!res) - out = ""; - return res; - }); -}*/ } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index 1eed2d008451..aa974504d92f 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -19,7 +19,6 @@ class ParserKQLStatement : public IParserBase {} }; - class ParserKQLWithOutput : public IParserBase { protected: diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h index 1266b6e732d5..b5302897adaa 100644 --- a/src/Parsers/Kusto/ParserKQLTable.h +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -12,7 +12,6 @@ class ParserKQLTable : public ParserKQLBase const char * getName() const override { return "KQL Table"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool parsePrepare(Pos &pos) override; - }; } From a5c2ef514a5062613efd80e189f4faf3519c5c41 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 8 Jun 2022 10:14:03 -0700 Subject: [PATCH 030/342] Kusto-phase1: Add Support to Kusto Query Language This is the initial implement of Kusto Query Language. in this commit, we support the following features as MVP : Tabular expression statements Limit returned results Select Column (basic project) sort, order Perform string equality operations Filter using a list of elements Filter using common string operations Some string operators Aggregate by columns Base aggregate functions only support avg, count ,min, max, sum Aggregate by time intervals --- src/Parsers/Kusto/ParserKQLOperators.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index b68d27a10f00..77dcd41255ee 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -154,7 +154,6 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) case KQLOperatorValue::not_equal: break; - case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -162,7 +161,6 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; @@ -212,7 +210,6 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) case KQLOperatorValue::in_cs: new_expr = "in"; break; - case KQLOperatorValue::not_in_cs: new_expr = "not in"; break; From d120ec5296b490e80116db6170996a17d0c47096 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 031/342] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 7a88fec1988b..a4d8fb3081cb 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -47,6 +47,10 @@ std::pair ParserKQLSummarize::removeLastWord(String input) { return std::make_pair(first_part, temp[temp.size() - 1]); } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } return std::make_pair("", ""); } From 5d9bb18f8d1f41e1642db850f0539108f36a2ba9 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 032/342] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index a4d8fb3081cb..cdac747edf08 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -47,9 +47,9 @@ std::pair ParserKQLSummarize::removeLastWord(String input) { return std::make_pair(first_part, temp[temp.size() - 1]); } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); From 657eac2f5277db6fb473785ae7c292eb5edb63ea Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 17 Jun 2022 08:47:08 -0700 Subject: [PATCH 033/342] Kusto-phase2 : Added KQL functions interface. changed the summarize class for new aggregation functions --- src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h | 1 - src/Parsers/Kusto/ParserKQLStatement.cpp | 11 +++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index 8f57133c0713..7c5f0d547345 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -188,7 +188,6 @@ namespace DB bin }; - class KQLFunctionFactory { public: diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 6ce29b8024f9..cc4bece7ebf6 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -57,5 +57,16 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +/* +bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) +{ + return wrapParseImpl(pos, IncreaseDepthTag{}, [&] + { + bool res = convertImpl(out,pos); + if (!res) + out = ""; + return res; + }); +}*/ } From 775abba496206f8766f2510c469a5e6983e322d7 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 21 Jun 2022 09:33:07 -0700 Subject: [PATCH 034/342] Kusto-phase2: Add KQL functions parser --- src/Parsers/Kusto/ParserKQLStatement.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index cc4bece7ebf6..6ce29b8024f9 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -57,16 +57,5 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } -/* -bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) -{ - return wrapParseImpl(pos, IncreaseDepthTag{}, [&] - { - bool res = convertImpl(out,pos); - if (!res) - out = ""; - return res; - }); -}*/ } From e9492e513240cdf571e12e8cc83fb23b08b36139 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 22 Jun 2022 12:00:47 -0700 Subject: [PATCH 035/342] Kusto-phase2: Add common function to get argument for function convertion --- .../KustoFunctions/IParserKQLFunction.cpp | 52 +++++++++ .../Kusto/KustoFunctions/IParserKQLFunction.h | 2 + .../KustoFunctions/KQLStringFunctions.cpp | 103 +++++++++--------- 3 files changed, 103 insertions(+), 54 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index e7134678e958..ed90c865f511 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -19,6 +19,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + + bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) { return wrapConvertImpl(pos, IncreaseDepthTag{}, [&] @@ -73,4 +79,50 @@ bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const Strin return false; } +String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser::Pos &pos) +{ + String converted_arg; + std::unique_ptr fun; + + if (pos->type == TokenType::ClosingRoundBracket) + return converted_arg; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Syntax error near " + fn_name, ErrorCodes::SYNTAX_ERROR); + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + String token = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { + String converted; + fun = KQLFunctionFactory::get(token); + if ( fun && fun->convert(converted,pos)) + converted_arg += converted; + else + converted_arg += token; + } + else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) + { + break; + } + else + converted_arg += token; + ++pos; + } + return converted_arg; +} + +String IParserKQLFunction::getKQLFunctionName(IParser::Pos &pos) +{ + String fn_name = String(pos->begin, pos->end); + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + --pos; + return ""; + } + return fn_name; +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index c633f78fa335..8af2623a984d 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -34,6 +34,8 @@ class IParserKQLFunction protected: virtual bool convertImpl(String &out,IParser::Pos &pos) = 0; static bool directMapping(String &out,IParser::Pos &pos,const String &ch_fn); + static String getConvertedArgument(const String &fn_name, IParser::Pos &pos); + static String getKQLFunctionName(IParser::Pos &pos); }; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index a7f7c373566d..0c8a0891a013 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -188,84 +188,80 @@ bool StrLen::convertImpl(String &out,IParser::Pos &pos) bool StrRep::convertImpl(String &out,IParser::Pos &pos) { - std::unique_ptr fun; - String res = String(pos->begin,pos->end); - ++pos; - if (pos->type != TokenType::OpeningRoundBracket) - { - --pos; + String fn_name = getKQLFunctionName(pos); //String(pos->begin,pos->end); + + if (fn_name.empty()) return false; - } - ++pos; - String value = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) - { String func_value; - fun = KQLFunctionFactory::get(value); - if (fun && fun->convert(func_value,pos)) - value = func_value; - } + + auto begin = pos; + ++pos; + String value = getConvertedArgument(fn_name,pos); if (pos->type != TokenType::Comma) return false; ++pos; - String multiplier = String(pos->begin,pos->end); - String new_multiplier; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) - { - if (pos->type == TokenType::BareWord ) - { - String fun_multiplier; - fun = KQLFunctionFactory::get(multiplier); - if ( fun && fun->convert(fun_multiplier,pos)) - new_multiplier += fun_multiplier; - else - new_multiplier = multiplier; - } - else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) // has delimiter - { - break; - } - else - new_multiplier += String(pos->begin,pos->end); - ++pos; - } - - if (!new_multiplier.empty()) - multiplier = new_multiplier; + String multiplier = getConvertedArgument(fn_name,pos); - String delimiter ; + String delimiter; if (pos->type == TokenType::Comma) { ++pos; - delimiter = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) - { String func_delimiter; - fun = KQLFunctionFactory::get(delimiter); - if (fun && fun->convert(func_delimiter,pos)) - delimiter = func_delimiter; - } - ++pos; + delimiter = getConvertedArgument(fn_name,pos); } + if (pos->type == TokenType::ClosingRoundBracket) { if (!delimiter.empty()) { String repeated_str = "repeat(concat("+value+"," + delimiter + ")," + multiplier + ")"; - res = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; + out = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; } else - res = "repeat("+ value + ", " + multiplier + ")"; - out = res; + out = "repeat("+ value + ", " + multiplier + ")"; + return true; } + + pos = begin; return false; } + bool SubString::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + String source = getConvertedArgument(fn_name,pos); + + if (pos->type != TokenType::Comma) + return false; + + ++pos; + String startingIndex = getConvertedArgument(fn_name,pos); + + String length; + if (pos->type == TokenType::Comma) + { + ++pos; + length = getConvertedArgument(fn_name,pos); + } + + if (pos->type == TokenType::ClosingRoundBracket) + { + if (length.empty()) + out = "substr("+ source + "," + startingIndex +" + 1)"; + else + out = "substr("+ source + ", " + startingIndex +" + 1, " + length + ")"; + return true; + } + pos = begin; return false; } @@ -274,7 +270,6 @@ bool ToLower::convertImpl(String &out,IParser::Pos &pos) return directMapping(out,pos,"lower"); } - bool ToUpper::convertImpl(String &out,IParser::Pos &pos) { return directMapping(out,pos,"upper"); From a460726e517e8ec05795a0f9829faf90a8eab270 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 23 Jun 2022 14:26:37 -0700 Subject: [PATCH 036/342] Kusto-phase2: add kusto_auto dialect --- src/Client/ClientBase.cpp | 18 ++++- src/Interpreters/executeQuery.cpp | 17 +++- .../KustoFunctions/IParserKQLFunction.cpp | 78 ++++++++++--------- src/Parsers/Kusto/ParserKQLOperators.cpp | 1 + 4 files changed, 74 insertions(+), 40 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 09e44a3ac098..103de6a55e67 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -320,12 +320,15 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - std::unique_ptr parser; + std::shared_ptr parser; + ParserKQLStatement kql_parser(end, global_context->getSettings().allow_settings_after_format_in_insert); ASTPtr res; const auto & settings = global_context->getSettingsRef(); size_t max_length = 0; + auto begin = pos; + if (!allow_multi_statements) max_length = settings.max_query_size; @@ -343,13 +346,22 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!res) { - std::cerr << std::endl << message << std::endl << std::endl; - return nullptr; + if (sql_dialect != "kusto") + res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + + if (!res) + { + std::cerr << std::endl << message << std::endl << std::endl; + return nullptr; + } } } else { res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + + if (!res && sql_dialect != "kusto") + res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 2bd204a0d424..1702db21cdce 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -357,12 +357,27 @@ static std::tuple executeQueryImpl( /// Parse the query from string. try { - if (settings.dialect == Dialect::kusto && !internal) + const String & sql_dialect = settings.sql_dialect; + assert(sql_dialect == "clickhouse" || sql_dialect == "kusto" || sql_dialect == "kusto_auto"); + + if (sql_dialect == "kusto" && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else if (sql_dialect == "kusto_auto" && !internal) + { + try { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + catch(...) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } } else { diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index ed90c865f511..73472a42010e 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace DB { @@ -38,41 +39,36 @@ bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const String &ch_fn) { - std::unique_ptr fun; - std::vector args; + std::vector arguments; - String res =ch_fn + "("; - out = res; - auto begin = pos; + String fn_name = getKQLFunctionName(pos); - ++pos; - if (pos->type != TokenType::OpeningRoundBracket) - { - pos = begin; + if (fn_name.empty()) return false; - } + String res; + auto begin = pos; + ++pos; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - ++pos; - String tmp_arg = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) - { - String new_arg; - fun = KQLFunctionFactory::get(tmp_arg); - if (fun && fun->convert(new_arg,pos)) - tmp_arg = new_arg; - } - else if (pos->type == TokenType::ClosingRoundBracket) - { - for (auto arg : args) - res+=arg; + String argument = getConvertedArgument(fn_name,pos); + arguments.push_back(argument); + if (pos->type == TokenType::ClosingRoundBracket) + { + for (auto arg : arguments) + { + if (res.empty()) + res = ch_fn + "(" + arg; + else + res = res + ", "+ arg; + } res += ")"; + out = res; return true; } - args.push_back(tmp_arg); + ++pos; } pos = begin; @@ -82,6 +78,7 @@ bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const Strin String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser::Pos &pos) { String converted_arg; + std::vector tokens; std::unique_ptr fun; if (pos->type == TokenType::ClosingRoundBracket) @@ -93,23 +90,32 @@ String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser:: while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) + String new_token; + if (!KQLOperators().convert(tokens,pos)) { - String converted; - fun = KQLFunctionFactory::get(token); - if ( fun && fun->convert(converted,pos)) - converted_arg += converted; + if (pos->type == TokenType::BareWord ) + { + String converted; + fun = KQLFunctionFactory::get(token); + if ( fun && fun->convert(converted,pos)) + tokens.push_back(converted); + else + tokens.push_back(token); + } + else if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + { + break; + } else - converted_arg += token; + tokens.push_back(token); } - else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) - { - break; - } - else - converted_arg += token; ++pos; + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + break; } + for (auto token : tokens) + converted_arg = converted_arg + token +" "; + return converted_arg; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 77dcd41255ee..a6b909b142bd 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -248,6 +248,7 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) } return true; } + pos = begin; return false; } From 6c58e31ee3ee072d6b3f56e4ad47920efd4fd48c Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 24 Jun 2022 13:05:52 -0700 Subject: [PATCH 037/342] Kusto-phase2: Add alias support --- src/Parsers/Kusto/ParserKQLProject.cpp | 18 ---------------- src/Parsers/Kusto/ParserKQLQuery.cpp | 29 +++++++++++++++++++++++++- src/Parsers/tests/gtest_Parser.cpp | 4 ---- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index 0e25c9c4a6c3..47ecbbfce3e0 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -11,25 +11,7 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (op_pos.empty()) expr = "*"; else - { - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) - { - pos = *it ; - while (!pos->isEnd() && pos->type != TokenType::PipeMark) - { - if (pos->type == TokenType::BareWord) - { - String tmp(pos->begin,pos->end); - - if (it != op_pos.begin() && columns.find(tmp) == columns.end()) - return false; - columns.insert(tmp); - } - ++pos; - } - } expr = getExprFromToken(op_pos.back()); - } Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos new_pos(tokens, pos.max_depth); diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 0334722041fb..d54344e9ea98 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -25,12 +25,23 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) String res; std::vector tokens; std::unique_ptr kql_function; + String alias; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); String new_token; - if (!KQLOperators().convert(tokens,pos)) + if (token == "=") + { + ++pos; + if (String(pos->begin,pos->end) != "~" ) + { + alias = tokens.back(); + tokens.pop_back(); + } + --pos; + } + else if (!KQLOperators().convert(tokens,pos)) { if (pos->type == TokenType::BareWord ) { @@ -40,8 +51,24 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) } tokens.push_back(token); } + + if (pos->type == TokenType::Comma && !alias.empty()) + { + tokens.pop_back(); + tokens.push_back("AS"); + tokens.push_back(alias); + tokens.push_back(","); + alias.clear(); + } ++pos; } + + if (!alias.empty()) + { + tokens.push_back("AS"); + tokens.push_back(alias); + } + for (auto token:tokens) res = res + token +" "; return res; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index cb0b49aecbbf..5ba7fbdc3fd8 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -326,10 +326,6 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" }, - { - "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", - "throws Syntax error" - }, { "Customers | sort by FirstName desc", "SELECT *\nFROM Customers\nORDER BY FirstName DESC" From e8632be0c0ff90d14a300ee09d57c603c469f9f9 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 29 Jun 2022 11:23:13 -0700 Subject: [PATCH 038/342] Aggregate functions initial code - Priority:HIGHT(Easy and Medium) --- .../KQLAggregationFunctions.cpp | 61 ++++++++++--------- src/Parsers/Kusto/ParserKQLSummarize.cpp | 58 +++++++++++++++--- 2 files changed, 82 insertions(+), 37 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 91c3639ace40..1bfb094518f7 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -21,29 +21,25 @@ namespace DB bool ArgMax::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"argMax"); } bool ArgMin::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"argMin"); } bool Avg::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"avg"); } bool AvgIf::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"avgIf"); } bool BinaryAllAnd::convertImpl(String &out,IParser::Pos &pos) @@ -77,29 +73,40 @@ bool BuildSchema::convertImpl(String &out,IParser::Pos &pos) bool Count::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"count"); } bool CountIf::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"countIf"); } bool DCount::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + String value = getConvertedArgument(fn_name,pos); + + out = "count ( DISTINCT " + value + " ) "; + return true; } bool DCountIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + String value = getConvertedArgument(fn_name,pos); + ++pos; + String condition = getConvertedArgument(fn_name,pos); + out = "countIf ( DISTINCT " + value + " , " + condition + " ) "; + return true; } bool MakeBag::convertImpl(String &out,IParser::Pos &pos) @@ -154,29 +161,25 @@ bool MakeSetIf::convertImpl(String &out,IParser::Pos &pos) bool Max::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"max"); } bool MaxIf::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"maxIf"); } bool Min::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"min"); } bool MinIf::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"minIf"); } bool Percentiles::convertImpl(String &out,IParser::Pos &pos) @@ -224,15 +227,13 @@ bool StdevIf::convertImpl(String &out,IParser::Pos &pos) bool Sum::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"sum"); } bool SumIf::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"sumIf"); } bool TakeAny::convertImpl(String &out,IParser::Pos &pos) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index cdac747edf08..eea72798f825 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -150,13 +150,23 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte sub_columns = sub_groupby; else sub_columns = sub_groupby + "," + sub_aggregation; - sub_query = "(SELECT " + sub_columns+ " FROM "+ table_name + " GROUP BY "+sub_groupby+")"; + sub_query = "SELECT " + sub_columns+ " FROM "+ table_name + " GROUP BY "+sub_groupby+""; } Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + String converted_columns = getExprFromToken(pos_subquery); + converted_columns = "(" + converted_columns + ")"; + + //std::cout << "MALLIK converted_columns: " << converted_columns << std::endl; - if (!ParserTablesInSelectQuery().parse(pos_subquery, sub_qurery_table, expected)) + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + //if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, node, expected)) + //return false; + if (!ParserTablesInSelectQuery().parse(pos_converted_columns, sub_qurery_table, expected)) return false; tables = sub_qurery_table; } @@ -200,14 +210,14 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte else { - if (String(pos->begin, pos->end) == "=") + /*if (String(pos->begin, pos->end) == "=") { std::pair temp = removeLastWord(expr_aggregation); expr_aggregation = temp.first; column_name = temp.second; - } - else - { + }*/ + //else + //{ if (!column_name.empty()) { expr_aggregation = expr_aggregation + String(pos->begin, pos->end); @@ -222,7 +232,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } - } + //} } } ++pos; @@ -237,6 +247,11 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte else expr_columns = expr_groupby + "," + expr_aggregation; } + + + /* + Original + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); IParser::Pos pos_columns(token_columns, pos.max_depth); if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) @@ -249,6 +264,35 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) return false; } + */ + + // For function + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); + IParser::Pos pos_columns(token_columns, pos.max_depth); + + String converted_columns = getExprFromToken(pos_columns); + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, node, expected)) + return false; + + if (groupby) + { + Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); + IParser::Pos postoken_groupby(token_groupby, pos.max_depth); + + String converted_groupby = getExprFromToken(postoken_groupby); + + Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); + IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); + + if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) + return false; + } + + pos = begin; return true; From 950b76c18a349f13ea5728a648ebb06e3de1cb82 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 29 Jun 2022 21:17:17 -0700 Subject: [PATCH 039/342] Aggregate function working with two pipes --- src/Parsers/Kusto/ParserKQLQuery.cpp | 27 ++++++++++++------ src/Parsers/Kusto/ParserKQLSummarize.cpp | 35 ++---------------------- 2 files changed, 20 insertions(+), 42 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index d54344e9ea98..94d31d5d5238 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -11,6 +11,8 @@ #include #include #include + +#include namespace DB { @@ -26,7 +28,7 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) std::vector tokens; std::unique_ptr kql_function; String alias; - + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); @@ -46,19 +48,27 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) if (pos->type == TokenType::BareWord ) { kql_function = KQLFunctionFactory::get(token); - if (kql_function && kql_function->convert(new_token,pos)) + if (kql_function && kql_function->convert(new_token,pos)){ token = new_token; + } + } tokens.push_back(token); } - if (pos->type == TokenType::Comma && !alias.empty()) + if (!alias.empty()) { - tokens.pop_back(); - tokens.push_back("AS"); - tokens.push_back(alias); - tokens.push_back(","); - alias.clear(); + if(pos->type == TokenType::Comma || token == "FROM") + { + tokens.pop_back(); + tokens.push_back("AS"); + tokens.push_back(alias); + if(pos->type == TokenType::Comma) + tokens.push_back(","); + else + tokens.push_back("FROM"); + alias.clear(); + } } ++pos; } @@ -68,7 +78,6 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) tokens.push_back("AS"); tokens.push_back(alias); } - for (auto token:tokens) res = res + token +" "; return res; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index eea72798f825..74a32bdba63e 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -142,7 +142,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (sub_groupby.empty()) { sub_columns =sub_aggregation; - sub_query = "(SELECT " + sub_columns+ " FROM "+ table_name+")"; + sub_query = "SELECT " + sub_columns+ " FROM "+ table_name+""; } else { @@ -155,17 +155,12 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); - String converted_columns = getExprFromToken(pos_subquery); converted_columns = "(" + converted_columns + ")"; - //std::cout << "MALLIK converted_columns: " << converted_columns << std::endl; - Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); - //if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, node, expected)) - //return false; if (!ParserTablesInSelectQuery().parse(pos_converted_columns, sub_qurery_table, expected)) return false; tables = sub_qurery_table; @@ -210,14 +205,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte else { - /*if (String(pos->begin, pos->end) == "=") - { - std::pair temp = removeLastWord(expr_aggregation); - expr_aggregation = temp.first; - column_name = temp.second; - }*/ - //else - //{ if (!column_name.empty()) { expr_aggregation = expr_aggregation + String(pos->begin, pos->end); @@ -232,7 +219,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } - //} + } } ++pos; @@ -247,24 +234,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte else expr_columns = expr_groupby + "," + expr_aggregation; } - - - /* - Original - - Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); - IParser::Pos pos_columns(token_columns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) - return false; - - if (groupby) - { - Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); - IParser::Pos postoken_groupby(token_groupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) - return false; - } - */ // For function Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); From 53b10c22ca5c5b5328b3abe08039050e7baf8506 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 28 Jun 2022 22:03:36 -0700 Subject: [PATCH 040/342] Kusto-phase2: Add table function kql() --- src/Parsers/ExpressionElementParsers.cpp | 46 ++++++++++++++---------- src/Parsers/Kusto/ParserKQLStatement.cpp | 43 ++++++++++++++++++++++ src/Parsers/Kusto/ParserKQLStatement.h | 7 ++++ src/Parsers/ParserCreateQuery.cpp | 24 ++++++++----- src/Parsers/ParserInsertQuery.cpp | 8 ++++- 5 files changed, 100 insertions(+), 28 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 74d142924596..29202d8e5397 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -46,7 +46,7 @@ #include #include - +#include namespace DB { @@ -109,30 +109,38 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserSelectWithUnionQuery select; ParserExplainQuery explain; - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - ++pos; - ASTPtr result_node = nullptr; + ParserKeyword s_kql("KQL"); - if (ASTPtr select_node; select.parse(pos, select_node, expected)) - { - result_node = std::move(select_node); - } - else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected)) + if (s_kql.ignore(pos, expected)) { - /// Replace SELECT * FROM (EXPLAIN SELECT ...) with SELECT * FROM viewExplain(EXPLAIN SELECT ...) - result_node = buildSelectFromTableFunction(makeASTFunction("viewExplain", explain_node)); + if (!ParserKQLTaleFunction().parse(pos, result_node, expected)) + return false; } - else + else { - return false; - } + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; + if (ASTPtr select_node; select.parse(pos, select_node, expected)) + { + result_node = std::move(select_node); + } + else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected)) + { + /// Replace SELECT * FROM (EXPLAIN SELECT ...) with SELECT * FROM viewExplain(EXPLAIN SELECT ...) + result_node = buildSelectFromTableFunction(makeASTFunction("viewExplain", explain_node)); + } + else + { + return false; + } + + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + } node = std::make_shared(); node->children.push_back(result_node); diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 6ce29b8024f9..140684597bde 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -5,6 +5,8 @@ #include #include #include +#include + namespace DB { @@ -58,4 +60,45 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery kql_p; + ASTPtr select; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + auto begin = pos; + auto paren_count = 0 ; + String kql_statement; + + if (s_lparen.ignore(pos, expected)) + { + ++paren_count; + while (!pos->isEnd()) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (paren_count == 0) + break; + + kql_statement = kql_statement + " " + String(pos->begin,pos->end); + ++pos; + } + + Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); + IParser::Pos pos_kql(token_kql, pos.max_depth); + + if (kql_p.parse(pos_kql, select, expected)) + { + node = select; + ++pos; + return true; + } + } + pos = begin; + return false; +}; + } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index aa974504d92f..864cda5531ad 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -40,5 +40,12 @@ class ParserKQLWithUnionQuery : public IParserBase bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserKQLTaleFunction : public IParserBase +{ +protected: + const char * getName() const override { return "KQL() function"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 90df8a8f79a0..9408fce3d781 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -20,7 +20,7 @@ #include #include #include - +#include namespace DB { @@ -631,17 +631,25 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe /// ENGINE can not be specified for table functions. if (storage || !table_function_p.parse(pos, as_table_function, expected)) { - /// AS [db.]table - if (!name_p.parse(pos, as_table, expected)) - return false; - - if (s_dot.ignore(pos, expected)) + ParserKeyword s_kql("KQL"); + if (s_kql.ignore(pos, expected)) { - as_database = as_table; - if (!name_p.parse(pos, as_table, expected)) + if (!ParserKQLTaleFunction().parse(pos, select, expected)) return false; } + else + { + /// AS [db.]table + if (!name_p.parse(pos, as_table, expected)) + return false; + if (s_dot.ignore(pos, expected)) + { + as_database = as_table; + if (!name_p.parse(pos, as_table, expected)) + return false; + } + } /// Optional - ENGINE can be specified. if (!storage) storage_p.parse(pos, storage, expected); diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index 7f8a8d59fd05..5263a7fb3adf 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -12,7 +12,7 @@ #include #include #include "Parsers/IAST_fwd.h" - +#include namespace DB { @@ -47,6 +47,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserFunction table_function_p{false}; ParserStringLiteral infile_name_p; ParserExpressionWithOptionalAlias exp_elem_p(false); + ParserKeyword s_kql("KQL"); /// create ASTPtr variables (result of parsing will be put in them). /// They will be used to initialize ASTInsertQuery's fields. @@ -183,6 +184,11 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserWatchQuery watch_p; watch_p.parse(pos, watch, expected); } + else if (!infile && s_kql.ignore(pos, expected)) + { + if (!ParserKQLTaleFunction().parse(pos, select, expected)) + return false; + } else if (!infile) { /// If all previous conditions were false and it's not FROM INFILE, query is incorrect From f4d7f525fdcde438f5094810998e9bedef23449d Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 29 Jun 2022 13:02:14 -0700 Subject: [PATCH 041/342] Kusto-phase 2: Add more string operators --- src/Client/ClientBase.cpp | 45 ++++++++++------- src/Parsers/Kusto/ParserKQLOperators.cpp | 64 ++++++++++++++++++++++-- src/Parsers/Kusto/ParserKQLOperators.h | 4 +- 3 files changed, 88 insertions(+), 25 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 103de6a55e67..355c3ca9ca37 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -320,48 +320,55 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - std::shared_ptr parser; + ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); ParserKQLStatement kql_parser(end, global_context->getSettings().allow_settings_after_format_in_insert); ASTPtr res; const auto & settings = global_context->getSettingsRef(); size_t max_length = 0; - auto begin = pos; - if (!allow_multi_statements) max_length = settings.max_query_size; const Dialect & dialect = settings.dialect; - if (dialect == Dialect::kusto) - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); - else - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + auto begin = pos; if (is_interactive || ignore_error) { String message; - res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); - - if (!res) + if (sql_dialect == "kusto") + res = tryParseQuery(kql_parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + else if (sql_dialect == "kusto_auto") { - if (sql_dialect != "kusto") - res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + + res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) - { - std::cerr << std::endl << message << std::endl << std::endl; - return nullptr; - } + res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + } + else + res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + + if (!res) + { + std::cerr << std::endl << message << std::endl << std::endl; + return nullptr; } } else { - res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + if (sql_dialect == "kusto") + res = parseQueryAndMovePosition(kql_parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + else if (sql_dialect == "kusto_auto") + { + res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); - if (!res && sql_dialect != "kusto") - res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + if (!res) + res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + } + else + res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index a6b909b142bd..0d0a0cffc161 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -12,9 +12,9 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) { - String new_expr, left_wildcards, right_wildcards; + String new_expr, left_wildcards, right_wildcards, left_space, right_space; switch (wildcards_pos) { @@ -35,10 +35,29 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; } + switch (space_pos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + left_space =" "; + break; + + case WildcardsPos::right: + right_space = " "; + break; + + case WildcardsPos::both: + left_space =" "; + right_space = " "; + break; + } + ++token_pos; if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) { String tmp_arg = String(token_pos->begin,token_pos->end); @@ -49,7 +68,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos if (fun && fun->convert(new_arg,token_pos)) tmp_arg = new_arg; } - new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + "', " + tmp_arg +", '"+ right_wildcards + "'))"; + new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; } else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); @@ -111,10 +130,15 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) op_value = KQLOperator[op]; String new_expr; + + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { + auto last_op = tokens.back(); + auto last_pos = pos; + switch (op_value) { case KQLOperatorValue::contains: @@ -184,27 +208,59 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) break; case KQLOperatorValue::hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::in_cs: diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 969a1e5c48a0..b36187799273 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -83,7 +83,7 @@ class KQLOperators {"hasprefix" , KQLOperatorValue::hasprefix}, {"!hasprefix" , KQLOperatorValue::not_hasprefix}, {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, - {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, + {"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs}, {"hassuffix" , KQLOperatorValue::hassuffix}, {"!hassuffix" , KQLOperatorValue::not_hassuffix}, {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, @@ -98,7 +98,7 @@ class KQLOperators {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); }; } From 290f86d625374bdd414a2708d7b4de5640bb00c0 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 29 Jun 2022 23:01:17 -0700 Subject: [PATCH 042/342] Kusto-phase2 : Fix the function base64_decode_tostring() --- src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 0c8a0891a013..ba36e4e2e315 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -13,14 +13,14 @@ bool Base64EncodeToString::convertImpl(String &out,IParser::Pos &pos) bool Base64EncodeFromGuid::convertImpl(String &out,IParser::Pos &pos) { - return directMapping(out,pos,"base64Decode"); + String res = String(pos->begin,pos->end); + out = res; + return false; } bool Base64DecodeToString::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"base64Decode"); } bool Base64DecodeToArray::convertImpl(String &out,IParser::Pos &pos) From 13ce2315159f3d1ee32419c4c5f741bc81eb3633 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 12 Jul 2022 08:49:42 -0700 Subject: [PATCH 043/342] Kusto-phase2: Changed dialect to use enumerate, Added subquery for in operator, fixed the multi query issue --- src/Client/ClientBase.cpp | 18 ++++++---- src/Core/SettingsEnums.cpp | 4 ++- src/Interpreters/executeQuery.cpp | 7 ++-- src/Parsers/Kusto/ParserKQLOperators.cpp | 45 ++++++++++++++++++++++-- src/Parsers/Kusto/ParserKQLOperators.h | 1 + src/Parsers/Kusto/ParserKQLQuery.cpp | 45 +++++++++++++----------- src/Parsers/Kusto/ParserKQLQuery.h | 7 ++-- src/Parsers/Kusto/ParserKQLSummarize.cpp | 42 ++++------------------ src/Parsers/tests/gtest_Parser.cpp | 5 +++ 9 files changed, 99 insertions(+), 75 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 355c3ca9ca37..962768cc79ff 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -337,15 +337,18 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (is_interactive || ignore_error) { String message; - if (sql_dialect == "kusto") + if (dialect == Dialect::kusto) res = tryParseQuery(kql_parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); - else if (sql_dialect == "kusto_auto") + else if (dialect == Dialect::kusto_auto) { res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) - res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + { + pos = begin; + res = tryParseQuery(kql_parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + } } else res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); @@ -358,14 +361,17 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - if (sql_dialect == "kusto") + if (dialect == Dialect::kusto) res = parseQueryAndMovePosition(kql_parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); - else if (sql_dialect == "kusto_auto") + else if (dialect == Dialect::kusto_auto) { res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) - res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + { + pos = begin; + res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + } } else res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 632587106a1a..19d43727cabb 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -161,5 +161,7 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, {{"clickhouse", Dialect::clickhouse}, - {"kusto", Dialect::kusto}}) + {"kusto", Dialect::kusto}, + {"kusto_auto", Dialect::kusto_auto}}) + } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 1702db21cdce..d26bd1fa3b33 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -357,15 +357,14 @@ static std::tuple executeQueryImpl( /// Parse the query from string. try { - const String & sql_dialect = settings.sql_dialect; - assert(sql_dialect == "clickhouse" || sql_dialect == "kusto" || sql_dialect == "kusto_auto"); + const Dialect & dialect = settings.dialect; - if (sql_dialect == "kusto" && !internal) + if (dialect == Dialect::kusto && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } - else if (sql_dialect == "kusto_auto" && !internal) + else if (dialect == Dialect::kusto_auto && !internal) { try { ParserQuery parser(end, settings.allow_settings_after_format_in_insert); diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 0d0a0cffc161..e3f3978fdeb6 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include namespace DB { @@ -12,6 +14,44 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } +String KQLOperators::genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op) +{ + ParserKQLTaleFunction kqlfun_p; + String new_expr; + + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ASTPtr select; + Expected expected; + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + auto pos = token_pos; + if (kqlfun_p.parse(pos,select,expected)) + { + new_expr = ch_op + " kql"; + auto tmp_pos = token_pos; + while (tmp_pos != pos) + { + new_expr = new_expr + " " + String(tmp_pos->begin,tmp_pos->end); + ++tmp_pos; + } + + if (pos->type != TokenType::ClosingRoundBracket) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + token_pos = pos; + return new_expr; + } + + --token_pos; + --token_pos; + return ch_op; + +} + String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) { String new_expr, left_wildcards, right_wildcards, left_space, right_space; @@ -131,7 +171,6 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) String new_expr; - if (op_value == KQLOperatorValue::none) tokens.push_back(op); else @@ -264,10 +303,10 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) break; case KQLOperatorValue::in_cs: - new_expr = "in"; + new_expr = genInOpExpr(pos,op,"in"); break; case KQLOperatorValue::not_in_cs: - new_expr = "not in"; + new_expr = genInOpExpr(pos,op,"not in"); break; case KQLOperatorValue::in: diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index b36187799273..d181fb89936b 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -99,6 +99,7 @@ class KQLOperators {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); + static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 94d31d5d5238..cd8c071e0fc5 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -11,11 +11,14 @@ #include #include #include - -#include namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_FUNCTION; +} + bool ParserKQLBase :: parsePrepare(Pos & pos) { op_pos.push_back(pos); @@ -28,7 +31,7 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) std::vector tokens; std::unique_ptr kql_function; String alias; - + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); @@ -48,27 +51,25 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) if (pos->type == TokenType::BareWord ) { kql_function = KQLFunctionFactory::get(token); - if (kql_function && kql_function->convert(new_token,pos)){ + if (kql_function && kql_function->convert(new_token,pos)) token = new_token; - } - + /* else if (!kql_function) + { + if ((++pos)->type == TokenType::OpeningRoundBracket) + throw Exception("Unknown function " + token, ErrorCodes::UNKNOWN_FUNCTION); + --pos; + }*/ } tokens.push_back(token); } - if (!alias.empty()) + if (pos->type == TokenType::Comma && !alias.empty()) { - if(pos->type == TokenType::Comma || token == "FROM") - { - tokens.pop_back(); - tokens.push_back("AS"); - tokens.push_back(alias); - if(pos->type == TokenType::Comma) - tokens.push_back(","); - else - tokens.push_back("FROM"); - alias.clear(); - } + tokens.pop_back(); + tokens.push_back("AS"); + tokens.push_back(alias); + tokens.push_back(","); + alias.clear(); } ++pos; } @@ -78,6 +79,7 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) tokens.push_back("AS"); tokens.push_back(alias); } + for (auto token:tokens) res = res + token +" "; return res; @@ -119,9 +121,9 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) operation_pos.push_back(std::make_pair("table",pos)); String table_name(pos->begin,pos->end); - while (!pos->isEnd()) + ++pos; + while (!pos->isEnd() && pos->type != TokenType::Semicolon) { - ++pos; if (pos->type == TokenType::PipeMark) { ++pos; @@ -130,7 +132,10 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; ++pos; operation_pos.push_back(std::make_pair(kql_operator,pos)); + kql_parser[kql_operator]->getExprFromToken(pos); } + else + ++pos; } for (auto &op_pos : operation_pos) diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 42122fb6e00a..2cfec703fd4f 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -8,12 +8,9 @@ class ParserKQLBase : public IParserBase { public: virtual bool parsePrepare(Pos & pos); - std::vector op_pos; - -protected: - - std::vector expressions; virtual String getExprFromToken(Pos &pos); + + std::vector op_pos; }; class ParserKQLQuery : public IParserBase diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 74a32bdba63e..49a3569f9638 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -116,57 +116,27 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte auto begin = pos; ASTPtr sub_qurery_table; -// rewrite this part, make it resusable (may contains bin etc, and please inmplement summarize age= avg(Age) for sub query too): if (op_pos.size() == 2) { - bool groupby = false; + String sub_query = "kql("+ table_name +"|summarize "; auto sub_pos = op_pos.front(); - String sub_aggregation; - String sub_groupby; - String sub_columns; + while (!sub_pos->isEnd() && sub_pos->type != TokenType::PipeMark && sub_pos->type != TokenType::Semicolon) { - if (String(sub_pos->begin,sub_pos->end) == "by") - groupby = true; - else - { - if (groupby) - sub_groupby = sub_groupby + String(sub_pos->begin,sub_pos->end) +" "; - else - sub_aggregation = sub_aggregation + String(sub_pos->begin,sub_pos->end) +" "; - } + sub_query = sub_query + " " +String(sub_pos->begin,sub_pos->end); ++sub_pos; } - - String sub_query; - if (sub_groupby.empty()) - { - sub_columns =sub_aggregation; - sub_query = "SELECT " + sub_columns+ " FROM "+ table_name+""; - } - else - { - if (sub_aggregation.empty()) - sub_columns = sub_groupby; - else - sub_columns = sub_groupby + "," + sub_aggregation; - sub_query = "SELECT " + sub_columns+ " FROM "+ table_name + " GROUP BY "+sub_groupby+""; - } + sub_query+=")"; Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); - String converted_columns = getExprFromToken(pos_subquery); - converted_columns = "(" + converted_columns + ")"; - - Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); - IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); - if (!ParserTablesInSelectQuery().parse(pos_converted_columns, sub_qurery_table, expected)) + if (!ParserTablesInSelectQuery().parse(pos_subquery, sub_qurery_table, expected)) return false; + tables = sub_qurery_table; } - pos = op_pos.back(); String expr_aggregation; String expr_groupby; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 5ba7fbdc3fd8..70730d534771 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -473,5 +473,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName !startswith 'pet'", "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" + }, + { + "Customers | where Age in ((Customers|project Age|where Age < 30))", + "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" } + }))); From 3cb6b8a0d6eafadd5a04ecefeca3a482e9395fa7 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Thu, 14 Jul 2022 09:00:51 -0700 Subject: [PATCH 044/342] Implement some IP-handling functions --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 117 +++++++++++------- src/Parsers/tests/gtest_Parser.cpp | 25 +++- 2 files changed, 99 insertions(+), 43 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index f271d924affc..bdda0827d03f 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -1,40 +1,69 @@ -#include -#include #include #include -#include -#include +#include #include -#include -#include -#include -#include #include -#include -#include #include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB::ErrorCodes +{ +extern const int SYNTAX_ERROR; +} + +namespace +{ +String trimQuotes(const String & str) +{ + static constexpr auto sQuote = '\''; + + const auto firstIndex = str.find(sQuote); + const auto lastIndex = str.rfind(sQuote); + if (firstIndex == String::npos || lastIndex == String::npos) + throw DB::Exception("Syntax error, improper quotation: " + str, DB::ErrorCodes::SYNTAX_ERROR); + + return str.substr(firstIndex + 1, lastIndex - firstIndex - 1); +} +} namespace DB { -bool Ipv4Compare::convertImpl(String &out,IParser::Pos &pos) +bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Ipv4IsInRange::convertImpl(String &out,IParser::Pos &pos) +bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto functionName = getKQLFunctionName(pos); + ++pos; + + const auto ipAddress = getConvertedArgument(functionName, pos); + ++pos; + + const auto ipRange = getConvertedArgument(functionName, pos); + const auto slashIndex = ipRange.find('/'); + out = std::format(slashIndex == String::npos ? "{0} = {1}" : "isIPAddressInRange({0}, {1})", ipAddress, ipRange); + return true; } -bool Ipv4IsMatch::convertImpl(String &out,IParser::Pos &pos) +bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } @@ -46,65 +75,69 @@ bool Ipv4IsPrivate::convertImpl(String &out,IParser::Pos &pos) return false; } -bool Ipv4NetmaskSuffix::convertImpl(String &out,IParser::Pos &pos) +bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + static constexpr auto sDefaultNetmask = 32; + + const auto functionName = getKQLFunctionName(pos); + ++pos; + + const auto ipRange = trimQuotes(getConvertedArgument(functionName, pos)); + const auto slashIndex = ipRange.find('/'); + const auto ipAddress = ipRange.substr(0, slashIndex); + const auto netmask = slashIndex == String::npos ? sDefaultNetmask : std::strtol(ipRange.c_str() + slashIndex + 1, nullptr, 10); + out = std::format("if(and(isIPv4String('{0}'), {1} between 1 and 32), {1}, null)", ipAddress, netmask); + return true; } -bool ParseIpv4::convertImpl(String &out,IParser::Pos &pos) +bool ParseIpv4::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toIPv4OrNull"); } -bool ParseIpv4Mask::convertImpl(String &out,IParser::Pos &pos) +bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Ipv6Compare::convertImpl(String &out,IParser::Pos &pos) +bool Ipv6Compare::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Ipv6IsMatch::convertImpl(String &out,IParser::Pos &pos) +bool Ipv6IsMatch::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ParseIpv6::convertImpl(String &out,IParser::Pos &pos) +bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toIPv6OrNull"); } -bool ParseIpv6Mask::convertImpl(String &out,IParser::Pos &pos) +bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool FormatIpv4::convertImpl(String &out,IParser::Pos &pos) +bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool FormatIpv4Mask::convertImpl(String &out,IParser::Pos &pos) +bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 70730d534771..18057a132b05 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -477,6 +477,29 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where Age in ((Customers|project Age|where Age < 30))", "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" + }, + { + "Customers | project ipv4_is_in_range('127.0.0.1', '127.0.0.1')", + "SELECT '127.0.0.1' = '127.0.0.1'\nFROM Customers" + }, + { + "Customers | project ipv4_is_in_range('192.168.1.6', '192.168.1.1/24')", + "SELECT isIPAddressInRange('192.168.1.6', '192.168.1.1/24')\nFROM Customers" + }, + { + "Customers | project ipv4_netmask_suffix('192.168.1.1/24')", + "SELECT if(isIPv4String('192.168.1.1') AND ((24 >= 1) AND (24 <= 32)), 24, NULL)\nFROM Customers" + }, + { + "Customers | project ipv4_netmask_suffix('192.168.1.1')", + "SELECT if(isIPv4String('192.168.1.1') AND ((32 >= 1) AND (32 <= 32)), 32, NULL)\nFROM Customers" + }, + { + "Customers | project parse_ipv4('127.0.0.1')", + "SELECT toIPv4OrNull('127.0.0.1')\nFROM Customers" + }, + { + "Customers | project parse_ipv6('127.0.0.1')", + "SELECT toIPv6OrNull('127.0.0.1')\nFROM Customers" } - }))); From d23fb105fbc578d883105a48229ee3c4f1baa08b Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 15 Jul 2022 10:13:44 -0700 Subject: [PATCH 045/342] Implement review comments --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index bdda0827d03f..8e10d59c7870 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -26,14 +26,14 @@ namespace { String trimQuotes(const String & str) { - static constexpr auto sQuote = '\''; + static constexpr auto QUOTE = '\''; - const auto firstIndex = str.find(sQuote); - const auto lastIndex = str.rfind(sQuote); - if (firstIndex == String::npos || lastIndex == String::npos) + const auto first_index = str.find(QUOTE); + const auto last_index = str.rfind(QUOTE); + if (first_index == String::npos || last_index == String::npos) throw DB::Exception("Syntax error, improper quotation: " + str, DB::ErrorCodes::SYNTAX_ERROR); - return str.substr(firstIndex + 1, lastIndex - firstIndex - 1); + return str.substr(first_index + 1, last_index - first_index - 1); } } @@ -49,15 +49,18 @@ bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) { - const auto functionName = getKQLFunctionName(pos); + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + ++pos; - const auto ipAddress = getConvertedArgument(functionName, pos); + const auto ip_address = getConvertedArgument(function_name, pos); ++pos; - const auto ipRange = getConvertedArgument(functionName, pos); - const auto slashIndex = ipRange.find('/'); - out = std::format(slashIndex == String::npos ? "{0} = {1}" : "isIPAddressInRange({0}, {1})", ipAddress, ipRange); + const auto ip_range = getConvertedArgument(function_name, pos); + const auto slash_index = ip_range.find('/'); + out = std::format(slash_index == String::npos ? "{0} = {1}" : "isIPAddressInRange({0}, {1})", ip_address, ip_range); return true; } @@ -72,21 +75,24 @@ bool Ipv4IsPrivate::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; - return false; + return false; } bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) { - static constexpr auto sDefaultNetmask = 32; + static constexpr auto DEFAULT_NETMASK = 32; + + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; - const auto functionName = getKQLFunctionName(pos); ++pos; - const auto ipRange = trimQuotes(getConvertedArgument(functionName, pos)); - const auto slashIndex = ipRange.find('/'); - const auto ipAddress = ipRange.substr(0, slashIndex); - const auto netmask = slashIndex == String::npos ? sDefaultNetmask : std::strtol(ipRange.c_str() + slashIndex + 1, nullptr, 10); - out = std::format("if(and(isIPv4String('{0}'), {1} between 1 and 32), {1}, null)", ipAddress, netmask); + const auto ip_range = trimQuotes(getConvertedArgument(function_name, pos)); + const auto slash_index = ip_range.find('/'); + const std::string_view ip_address(ip_range.c_str(), std::min(ip_range.length(), slash_index)); + const auto netmask = slash_index == String::npos ? DEFAULT_NETMASK : std::strtol(ip_range.c_str() + slash_index + 1, nullptr, 10); + out = std::format("if(and(isIPv4String('{0}'), {1} between 1 and 32), {1}, null)", ip_address, netmask); return true; } @@ -141,5 +147,4 @@ bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) out = res; return false; } - } From d3b8e515eb7a7ede02521ab088e5a5688f5a1d26 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 15 Jul 2022 10:14:01 -0700 Subject: [PATCH 046/342] Implement ipv4_is_private --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 30 +++++++++++++++++-- src/Parsers/tests/gtest_Parser.cpp | 8 +++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 8e10d59c7870..d8de9cc4e9a9 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -71,11 +71,35 @@ bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) return false; } -bool Ipv4IsPrivate::convertImpl(String &out,IParser::Pos &pos) +bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; + static const std::array PRIVATE_SUBNETS{"10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"}; + + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) return false; + + const auto ip_address = trimQuotes(getConvertedArgument(function_name, pos)); + const auto slash_index = ip_address.find('/'); + + out += "or("; + for (int i = 0; i < std::ssize(PRIVATE_SUBNETS); ++i) + { + out += i > 0 ? ", " : ""; + + const auto & subnet = PRIVATE_SUBNETS[i]; + out += slash_index == String::npos + ? std::format("isIPAddressInRange('{0}', '{1}')", ip_address, subnet) + : std::format( + "and(isIPAddressInRange(IPv4NumToString(tupleElement((IPv4CIDRToRange(toIPv4('{0}'), {1}) as range), 1)) as begin, '{2}'), " + "isIPAddressInRange(IPv4NumToString(tupleElement(range, 2)) as end, '{2}'))", + std::string_view(ip_address.c_str(), slash_index), + std::string_view(ip_address.c_str() + slash_index + 1, ip_address.length() - slash_index - 1), + subnet); + } + + out += ")"; + return true; } bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 18057a132b05..ff7bad6c3553 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -486,6 +486,14 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | project ipv4_is_in_range('192.168.1.6', '192.168.1.1/24')", "SELECT isIPAddressInRange('192.168.1.6', '192.168.1.1/24')\nFROM Customers" }, + { + "Customers | project ipv4_is_private('192.168.1.6')", + "SELECT isIPAddressInRange('192.168.1.6', '10.0.0.0/8') OR isIPAddressInRange('192.168.1.6', '172.16.0.0/12') OR isIPAddressInRange('192.168.1.6', '192.168.0.0/16')\nFROM Customers" + }, + { + "Customers | project ipv4_is_private('192.168.1.6/24')", + "SELECT (isIPAddressInRange(IPv4NumToString((IPv4CIDRToRange(toIPv4('192.168.1.6'), 24) AS range).1) AS begin, '10.0.0.0/8') AND isIPAddressInRange(IPv4NumToString(range.2) AS end, '10.0.0.0/8')) OR (isIPAddressInRange(begin, '172.16.0.0/12') AND isIPAddressInRange(end, '172.16.0.0/12')) OR (isIPAddressInRange(begin, '192.168.0.0/16') AND isIPAddressInRange(end, '192.168.0.0/16'))\nFROM Customers" + }, { "Customers | project ipv4_netmask_suffix('192.168.1.1/24')", "SELECT if(isIPv4String('192.168.1.1') AND ((24 >= 1) AND (24 <= 32)), 24, NULL)\nFROM Customers" From 543aa70d799ea93b641ef4cc9072672c94aa0200 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 15 Jul 2022 06:54:23 -0700 Subject: [PATCH 047/342] Kusto-phase2: Added some string functions and release note --- src/Parsers/Kusto/KQL_ReleaseNote.md | 213 +++++++++ .../KustoFunctions/KQLStringFunctions.cpp | 423 +++++++++++++++--- src/Parsers/Kusto/ParserKQLOperators.cpp | 42 ++ src/Parsers/Kusto/ParserKQLOperators.h | 1 + 4 files changed, 608 insertions(+), 71 deletions(-) create mode 100644 src/Parsers/Kusto/KQL_ReleaseNote.md diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md new file mode 100644 index 000000000000..f5ee880db8b7 --- /dev/null +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -0,0 +1,213 @@ +# KQL implemented features. + + +# July 17, 2022 + +## Renamed dialect from sql_dialect to dialect + +`set sql_dialect='clickhouse'` +`set sql_dialect='kusto'` +`set sql_dialect='kusto_auto'` + +## string functions +- **support subquery for `in` orerator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) + (subquery need to be wraped with bracket inside bracket) + + `Customers | where Age in ((Customers|project Age|where Age < 30))` + Note: case-insensitive not supported yet +- **has_all** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator) + `Customers|where Occupation has_any ('Skilled','abcd')` + note : subquery not supported yet +- **has _any** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator) + `Customers|where Occupation has_all ('Skilled','abcd')` + note : subquery not supported yet +- **countof** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) + `Customers | project countof('The cat sat on the mat', 'at')` + `Customers | project countof('The cat sat on the mat', 'at', 'normal')` + `Customers | project countof('The cat sat on the mat', 'at', 'regex')` +- **extract** ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real))` + +- **extract_all** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction) + + `Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20')` + note: captureGroups not supported yet + +- **split** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) + `Customers | project split('aa_bb', '_')` + `Customers | project split('aaa_bbb_ccc', '_', 1)` + `Customers | project split('', '_')` + `Customers | project split('a__b', '_')` + `Customers | project split('aabbcc', 'bb')` + +- **strcat_delim** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction) + `Customers | project strcat_delim('-', '1', '2', 'A') , 1s)` + `Customers | project strcat_delim('-', '1', '2', strcat('A','b'))` + note: only support string now. + +- **indexof** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction) + `Customers | project indexof('abcdefg','cde')` + `Customers | project indexof('abcdefg','cde',2)` + `Customers | project indexof('abcdefg','cde',6)` + note: length and occurrence not supported yet + + + + +# July 4, 2022 + +## sql_dialect + +- default is `clickhouse` + `set sql_dialect='clickhouse'` +- only process kql + `set sql_dialect='kusto'` +- process both kql and CH sql + `set sql_dialect='kusto_auto'` +## KQL() function + + - create table + `CREATE TABLE kql_table4 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName,Age);` + verify the content of `kql_table` + `select * from kql_table` + + - insert into table + create a tmp table: + ``` + CREATE TABLE temp + ( + FirstName Nullable(String), + LastName String, + Age Nullable(UInt8) + ) ENGINE = Memory; + ``` + `INSERT INTO temp select * from kql(Customers|project FirstName,LastName,Age);` + verify the content of `temp` + `select * from temp` + + - Select from kql() + `Select * from kql(Customers|project FirstName)` + +## KQL operators: + - Tabular expression statements + `Customers` + - Select Column + `Customers | project FirstName,LastName,Occupation` + - Limit returned results + `Customers | project FirstName,LastName,Occupation | take 1 | take 3` + - sort, order + `Customers | order by Age desc , FirstName asc` + - Filter + `Customers | where Occupation == 'Skilled Manual'` + - summarize + `Customers |summarize max(Age) by Occupation` + +## KQL string operators and functions + - contains + `Customers |where Education contains 'degree'` + - !contains + `Customers |where Education !contains 'degree'` + - contains_cs + `Customers |where Education contains 'Degree'` + - !contains_cs + `Customers |where Education !contains 'Degree'` + - endswith + `Customers | where FirstName endswith 'RE'` + - !endswith + `Customers | where !FirstName endswith 'RE'` + - endswith_cs + `Customers | where FirstName endswith_cs 're'` + - !endswith_cs + `Customers | where FirstName !endswith_cs 're'` + - == + `Customers | where Occupation == 'Skilled Manual'` + - != + `Customers | where Occupation != 'Skilled Manual'` + - has + `Customers | where Occupation has 'skilled'` + - !has + `Customers | where Occupation !has 'skilled'` + - has_cs + `Customers | where Occupation has 'Skilled'` + - !has_cs + `Customers | where Occupation !has 'Skilled'` + - hasprefix + `Customers | where Occupation hasprefix_cs 'Ab'` + - !hasprefix + `Customers | where Occupation !hasprefix_cs 'Ab'` + - hasprefix_cs + `Customers | where Occupation hasprefix_cs 'ab'` + - !hasprefix_cs + `Customers | where Occupation! hasprefix_cs 'ab'` + - hassuffix + `Customers | where Occupation hassuffix 'Ent'` + - !hassuffix + `Customers | where Occupation !hassuffix 'Ent'` + - hassuffix_cs + `Customers | where Occupation hassuffix 'ent'` + - !hassuffix_cs + `Customers | where Occupation hassuffix 'ent'` + - in + `Customers |where Education in ('Bachelors','High School')` + - !in + `Customers | where Education !in ('Bachelors','High School')` + - matches regex + `Customers | where FirstName matches regex 'P.*r'` + - startswith + `Customers | where FirstName startswith 'pet'` + - !startswith + `Customers | where FirstName !startswith 'pet'` + - startswith_cs + `Customers | where FirstName startswith_cs 'pet'` + - !startswith_cs + `Customers | where FirstName !startswith_cs 'pet'` + + - base64_encode_tostring() + `Customers | project base64_encode_tostring('Kusto1') | take 1` + - base64_decode_tostring() + `Customers | project base64_decode_tostring('S3VzdG8x') | take 1` + - isempty() + `Customers | where isempty(LastName)` + - isnotempty() + `Customers | where isnotempty(LastName)` + - isnotnull() + `Customers | where isnotnull(FirstName)` + - isnull() + `Customers | where isnull(FirstName)` + - url_decode() + `Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1` + - url_encode() + `Customers | project url_encode('https://www.test.com/hello word') | take 1` + - substring() + `Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))` + - strcat() + `Customers | project name = strcat(FirstName, ' ', LastName)` + - strlen() + `Customers | project FirstName, strlen(FirstName)` + - strrep() + `Customers | project strrep(FirstName,2,'_')` + - toupper() + `Customers | project toupper(FirstName)` + - tolower() + `Customers | project tolower(FirstName)` + + ## Aggregate Functions + - arg_max() + - arg_min() + - avg() + - avgif() + - count() + - countif() + - max() + - maxif() + - min() + - minif() + - sum() + - sumif() + - dcount() + - dcountif() + - bin \ No newline at end of file diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index ba36e4e2e315..919e620dac06 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -2,193 +2,475 @@ #include #include #include +#include +#include + +namespace DB::ErrorCodes +{ +extern const int SYNTAX_ERROR; +} namespace DB { -bool Base64EncodeToString::convertImpl(String &out,IParser::Pos &pos) +bool Base64EncodeToString::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"base64Encode"); } -bool Base64EncodeFromGuid::convertImpl(String &out,IParser::Pos &pos) +bool Base64EncodeFromGuid::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Base64DecodeToString::convertImpl(String &out,IParser::Pos &pos) +bool Base64DecodeToString::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"base64Decode"); } -bool Base64DecodeToArray::convertImpl(String &out,IParser::Pos &pos) +bool Base64DecodeToArray::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Base64DecodeToGuid::convertImpl(String &out,IParser::Pos &pos) +bool Base64DecodeToGuid::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool CountOf::convertImpl(String &out,IParser::Pos &pos) +bool CountOf::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const String search = getConvertedArgument(fn_name, pos); + + String kind = "'normal' "; + if (pos->type == TokenType::Comma) + { + ++pos; + kind = getConvertedArgument(fn_name,pos); + } + assert (kind =="'normal' " || kind =="'regex' "); + + if (pos->type == TokenType::ClosingRoundBracket) + { + if (kind == "'normal' " ) + out = "countSubstrings(" + source + ", " + search + ")"; + else + out = "countMatches("+ source + ", " + search + ")"; + return true; + } + pos = begin; return false; } -bool Extract::convertImpl(String &out,IParser::Pos &pos) +bool Extract::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + + if (pos->type != TokenType::Comma) + return false; + + ++pos; + size_t capture_group = stoi(getConvertedArgument(fn_name, pos)); + + ++pos; + String source = getConvertedArgument(fn_name, pos); + + String type_literal; + + if (pos->type == TokenType::Comma) + { + ++pos; + type_literal = getConvertedArgument(fn_name, pos); + } + + if (capture_group == 0) + { + String tmp_regex; + for (auto c : regex) + { + if (c != '(' && c != ')') + tmp_regex += c; + } + regex = std::move(tmp_regex); + } + else + { + size_t group_idx = 0; + size_t str_idx = -1; + for (size_t i = 0; i < regex.length(); ++i) + { + if (regex[i] == '(') + { + ++group_idx; + if (group_idx == capture_group) + { + str_idx = i + 1; + break; + } + } + } + String tmp_regex; + if (str_idx > 0) + { + for (size_t i = str_idx; i < regex.length(); ++i) + { + if (regex[i] == ')') + break; + tmp_regex += regex[i]; + } + } + regex = "'" + tmp_regex + "'"; + } + + if (pos->type == TokenType::ClosingRoundBracket) + { + out = "extract(" + source + ", " + regex + ")"; + if (!type_literal.empty()) + { + std::unordered_map type_cast = + { {"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"dynamic", "Array"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"}, + {"decimal", "Decimal"} + }; + + Tokens token_type(type_literal.c_str(), type_literal.c_str() + type_literal.size()); + IParser::Pos pos_type(token_type, pos.max_depth); + ParserKeyword s_kql("typeof"); + Expected expected; + + if (s_kql.ignore(pos_type, expected)) + { + ++pos_type; + auto kql_type= String(pos_type->begin,pos_type->end); + if (type_cast.find(kql_type) == type_cast.end()) + return false; + auto ch_type = type_cast[kql_type]; + out = "CAST(" + out + ", '" + ch_type + "')"; + } + else + return false; + } + return true; + } + + pos = begin; return false; } -bool ExtractAll::convertImpl(String &out,IParser::Pos &pos) +bool ExtractAll::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + const String regex = getConvertedArgument(fn_name, pos); + + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const String second_arg = getConvertedArgument(fn_name, pos); + + String third_arg; + if (pos->type == TokenType::Comma) + { + ++pos; + third_arg = getConvertedArgument(fn_name, pos); + } + + if (!third_arg.empty()) // currently the captureGroups not supported + return false; + + if (pos->type == TokenType::ClosingRoundBracket) + { + out = "extractAllGroups(" + second_arg + ", " + regex + ")"; + return true; + } + pos = begin; return false; } -bool ExtractJson::convertImpl(String &out,IParser::Pos &pos) +bool ExtractJson::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool HasAnyIndex::convertImpl(String &out,IParser::Pos &pos) +bool HasAnyIndex::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool IndexOf::convertImpl(String &out,IParser::Pos &pos) +bool IndexOf::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; + int start_index = 0, length = -1, occurrence = 1; + + String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const String lookup = getConvertedArgument(fn_name, pos); + + if (pos->type == TokenType::Comma) + { + ++pos; + start_index = stoi(getConvertedArgument(fn_name, pos)); + + if (pos->type == TokenType::Comma) + { + ++pos; + length = stoi(getConvertedArgument(fn_name, pos)); + + if (pos->type == TokenType::Comma) + { + ++pos; + occurrence = stoi(getConvertedArgument(fn_name, pos)); + + } + } + } + + if (pos->type == TokenType::ClosingRoundBracket) + { + if (occurrence < 0 || length < -1) + out = ""; + else if (length == -1) + out = "position(" + source + ", " + lookup + ", " + std::to_string(start_index + 1) + ") - 1"; + else + { + + } + + return true; + } + + pos = begin; return false; } -bool IsEmpty::convertImpl(String &out,IParser::Pos &pos) +bool IsEmpty::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"empty"); + return directMapping(out, pos, "empty"); } -bool IsNotEmpty::convertImpl(String &out,IParser::Pos &pos) +bool IsNotEmpty::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"notEmpty"); + return directMapping(out, pos, "notEmpty"); } -bool IsNotNull::convertImpl(String &out,IParser::Pos &pos) +bool IsNotNull::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"isNotNull"); + return directMapping(out, pos, "isNotNull"); } -bool ParseCommandLine::convertImpl(String &out,IParser::Pos &pos) +bool ParseCommandLine::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool IsNull::convertImpl(String &out,IParser::Pos &pos) +bool IsNull::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"isNull"); + return directMapping(out, pos, "isNull"); } -bool ParseCSV::convertImpl(String &out,IParser::Pos &pos) +bool ParseCSV::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ParseJson::convertImpl(String &out,IParser::Pos &pos) +bool ParseJson::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ParseURL::convertImpl(String &out,IParser::Pos &pos) +bool ParseURL::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ParseURLQuery::convertImpl(String &out,IParser::Pos &pos) +bool ParseURLQuery::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ParseVersion::convertImpl(String &out,IParser::Pos &pos) +bool ParseVersion::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ReplaceRegex::convertImpl(String &out,IParser::Pos &pos) +bool ReplaceRegex::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Reverse::convertImpl(String &out,IParser::Pos &pos) +bool Reverse::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Split::convertImpl(String &out,IParser::Pos &pos) +bool Split::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const String delimiter = getConvertedArgument(fn_name, pos); + + int requestedIndex = -1; + if (pos->type == TokenType::Comma) + { + ++pos; + requestedIndex = std::stoi(getConvertedArgument(fn_name, pos)); + } + + if (pos->type == TokenType::ClosingRoundBracket) + { + out = "splitByString(" + delimiter + ", " + source + ")"; + if (requestedIndex >= 0) + { + out = "arrayPushBack([],arrayElement(" + out + ", " + std::to_string(requestedIndex + 1) + "))"; + } + return true; + } + + pos = begin; return false; } -bool StrCat::convertImpl(String &out,IParser::Pos &pos) +bool StrCat::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"concat"); + return directMapping(out, pos, "concat"); } -bool StrCatDelim::convertImpl(String &out,IParser::Pos &pos) +bool StrCatDelim::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + const String delimiter = getConvertedArgument(fn_name, pos); + if (pos->type != TokenType::Comma) + return false; + + int arg_count = 0; + String args; + + while (!pos->isEnd() && pos->type != TokenType::Semicolon && pos->type != TokenType::ClosingRoundBracket) + { + ++pos; + String arg = getConvertedArgument(fn_name, pos); + if (args.empty()) + args = "concat(" + arg; + else + args = args + ", " + delimiter + ", " + arg; + ++arg_count; + } + args += ")"; + + if (arg_count < 2 || arg_count > 64) + throw Exception("argument count out of bound in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); + + if (pos->type == TokenType::ClosingRoundBracket) + { + out = std::move(args); + return true; + } + + pos = begin; return false; } -bool StrCmp::convertImpl(String &out,IParser::Pos &pos) +bool StrCmp::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool StrLen::convertImpl(String &out,IParser::Pos &pos) +bool StrLen::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"lengthUTF8"); + return directMapping(out, pos, "lengthUTF8"); } -bool StrRep::convertImpl(String &out,IParser::Pos &pos) +bool StrRep::convertImpl(String & out,IParser::Pos & pos) { - String fn_name = getKQLFunctionName(pos); //String(pos->begin,pos->end); + String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; @@ -196,18 +478,18 @@ bool StrRep::convertImpl(String &out,IParser::Pos &pos) auto begin = pos; ++pos; - String value = getConvertedArgument(fn_name,pos); + String value = getConvertedArgument(fn_name, pos); if (pos->type != TokenType::Comma) return false; ++pos; - String multiplier = getConvertedArgument(fn_name,pos); + String multiplier = getConvertedArgument(fn_name, pos); String delimiter; if (pos->type == TokenType::Comma) { ++pos; - delimiter = getConvertedArgument(fn_name,pos); + delimiter = getConvertedArgument(fn_name, pos); } if (pos->type == TokenType::ClosingRoundBracket) @@ -227,10 +509,9 @@ bool StrRep::convertImpl(String &out,IParser::Pos &pos) return false; } - -bool SubString::convertImpl(String &out,IParser::Pos &pos) +bool SubString::convertImpl(String & out,IParser::Pos & pos) { - String fn_name = getKQLFunctionName(pos); + String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; @@ -238,19 +519,19 @@ bool SubString::convertImpl(String &out,IParser::Pos &pos) auto begin = pos; ++pos; - String source = getConvertedArgument(fn_name,pos); + String source = getConvertedArgument(fn_name, pos); if (pos->type != TokenType::Comma) return false; ++pos; - String startingIndex = getConvertedArgument(fn_name,pos); + String startingIndex = getConvertedArgument(fn_name, pos); String length; if (pos->type == TokenType::Comma) { ++pos; - length = getConvertedArgument(fn_name,pos); + length = getConvertedArgument(fn_name, pos); } if (pos->type == TokenType::ClosingRoundBracket) @@ -265,52 +546,52 @@ bool SubString::convertImpl(String &out,IParser::Pos &pos) return false; } -bool ToLower::convertImpl(String &out,IParser::Pos &pos) +bool ToLower::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"lower"); + return directMapping(out, pos, "lower"); } -bool ToUpper::convertImpl(String &out,IParser::Pos &pos) +bool ToUpper::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"upper"); + return directMapping(out, pos, "upper"); } -bool Translate::convertImpl(String &out,IParser::Pos &pos) +bool Translate::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Trim::convertImpl(String &out,IParser::Pos &pos) +bool Trim::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool TrimEnd::convertImpl(String &out,IParser::Pos &pos) +bool TrimEnd::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool TrimStart::convertImpl(String &out,IParser::Pos &pos) +bool TrimStart::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool URLDecode::convertImpl(String &out,IParser::Pos &pos) +bool URLDecode::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"decodeURLComponent"); + return directMapping(out, pos, "decodeURLComponent"); } -bool URLEncode::convertImpl(String &out,IParser::Pos &pos) +bool URLEncode::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"encodeURLComponent"); + return directMapping(out, pos, "encodeURLComponent"); } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index e3f3978fdeb6..fbab8e829d14 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -14,6 +14,46 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } +String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op) +{ + String new_expr; + Expected expected; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + auto haystack = tokens.back(); + + String logic_op = (kql_op == "has_all") ? " and " : " or "; + + while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) + { + String tmp_arg = String(token_pos->begin, token_pos->end); + if (token_pos->type == TokenType::BareWord ) + { + String new_arg; + auto fun = KQLFunctionFactory::get(tmp_arg); + if (fun && fun->convert(new_arg,token_pos)) + tmp_arg = new_arg; + } + + if (token_pos->type == TokenType::Comma ) + new_expr = new_expr + logic_op; + else + new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; + + ++token_pos; + if (token_pos->type == TokenType::ClosingRoundBracket) + break; + + } + + tokens.pop_back(); + return new_expr; +} + String KQLOperators::genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op) { ParserKQLTaleFunction kqlfun_p; @@ -233,9 +273,11 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) break; case KQLOperatorValue::has_all: + new_expr = genHasAnyAllOpExpr(tokens,pos,"has_all", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_any: + new_expr = genHasAnyAllOpExpr(tokens,pos,"has_any", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_cs: diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index d181fb89936b..038850f981de 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -100,6 +100,7 @@ class KQLOperators }; static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); + static String genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op); }; } From d1c0ef9e8ce3e5e25aa1cc7a1859356de91e2a90 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 15 Jul 2022 10:32:49 -0700 Subject: [PATCH 048/342] Update release notes --- src/Parsers/Kusto/KQL_ReleaseNote.md | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index f5ee880db8b7..9b5bfd182fa3 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -5,9 +5,24 @@ ## Renamed dialect from sql_dialect to dialect -`set sql_dialect='clickhouse'` -`set sql_dialect='kusto'` -`set sql_dialect='kusto_auto'` +`set dialect='clickhouse'` +`set dialect='kusto'` +`set dialect='kusto_auto'` + +## IP functions +- [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) + `"Customers | project ipv4_is_private('192.168.1.6/24')"` + `"Customers | project ipv4_is_private('192.168.1.6')"` +- [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) + `"Customers | project ipv4_is_in_range('127.0.0.1', '127.0.0.1')"` + `"Customers | project ipv4_is_in_range('192.168.1.6', '192.168.1.1/24')"` +- [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) + `"Customers | project ipv4_netmask_suffix('192.168.1.1/24')"` + `"Customers | project ipv4_netmask_suffix('192.168.1.1')"` +- [parse_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4function) + `"Customers | project parse_ipv4('127.0.0.1')"` +- [parse_ipv6](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6function) + `"Customers | project parse_ipv6('127.0.0.1')"` ## string functions - **support subquery for `in` orerator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) From cbf0d79df8098ec3fd96db55748b0f038cf15fb4 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 15 Jul 2022 12:13:50 -0700 Subject: [PATCH 049/342] Updated release notes to indicate deficiencies --- src/Parsers/Kusto/KQL_ReleaseNote.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 9b5bfd182fa3..47e2e8173916 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -10,6 +10,13 @@ `set dialect='kusto_auto'` ## IP functions +- [parse_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4function) + `"Customers | project parse_ipv4('127.0.0.1')"` +- [parse_ipv6](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6function) + `"Customers | project parse_ipv6('127.0.0.1')"` + +Please note that the functions listed below only take constant parameters for now. Further improvement is to be expected to support expressions. + - [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) `"Customers | project ipv4_is_private('192.168.1.6/24')"` `"Customers | project ipv4_is_private('192.168.1.6')"` @@ -19,10 +26,6 @@ - [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) `"Customers | project ipv4_netmask_suffix('192.168.1.1/24')"` `"Customers | project ipv4_netmask_suffix('192.168.1.1')"` -- [parse_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4function) - `"Customers | project parse_ipv4('127.0.0.1')"` -- [parse_ipv6](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6function) - `"Customers | project parse_ipv6('127.0.0.1')"` ## string functions - **support subquery for `in` orerator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) From 598aa9e1d1c91f17a09ae5f4aa3504c43b2c8a73 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 050/342] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 49a3569f9638..651281fcdf5c 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -51,6 +51,10 @@ std::pair ParserKQLSummarize::removeLastWord(String input) { return std::make_pair(first_part, temp[temp.size() - 1]); } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } return std::make_pair("", ""); } From 27092eba644347c349f8e5a8159670598ebb93d6 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 051/342] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 651281fcdf5c..059488aca5cc 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -51,9 +51,9 @@ std::pair ParserKQLSummarize::removeLastWord(String input) { return std::make_pair(first_part, temp[temp.size() - 1]); } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); From 6398ed18d5f1522ed85ad9fa84b4614518157f11 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 17 Jun 2022 08:47:08 -0700 Subject: [PATCH 052/342] Kusto-phase2 : Added KQL functions interface. changed the summarize class for new aggregation functions --- src/Parsers/Kusto/ParserKQLStatement.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 140684597bde..a9da3b47872c 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -59,6 +59,17 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +/* +bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) +{ + return wrapParseImpl(pos, IncreaseDepthTag{}, [&] + { + bool res = convertImpl(out,pos); + if (!res) + out = ""; + return res; + }); +}*/ bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { From d3be90e421a3655f938cc0833ce395ff1522d91b Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 21 Jun 2022 09:33:07 -0700 Subject: [PATCH 053/342] Kusto-phase2: Add KQL functions parser --- .../Kusto/KustoFunctions/KQLStringFunctions.cpp | 8 ++------ src/Parsers/Kusto/ParserKQLStatement.cpp | 11 ----------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 919e620dac06..ddd872c20f04 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -314,9 +314,7 @@ bool IsNotNull::convertImpl(String & out,IParser::Pos & pos) bool ParseCommandLine::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"isNull"); } bool IsNull::convertImpl(String & out,IParser::Pos & pos) @@ -458,9 +456,7 @@ bool StrCatDelim::convertImpl(String & out,IParser::Pos & pos) bool StrCmp::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"lengthUTF8"); } bool StrLen::convertImpl(String & out,IParser::Pos & pos) diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index a9da3b47872c..140684597bde 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -59,17 +59,6 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } -/* -bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) -{ - return wrapParseImpl(pos, IncreaseDepthTag{}, [&] - { - bool res = convertImpl(out,pos); - if (!res) - out = ""; - return res; - }); -}*/ bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { From bcede07e8515268771f6c4df1a7b8a700138a06b Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 23 Jun 2022 14:26:37 -0700 Subject: [PATCH 054/342] Kusto-phase2: add kusto_auto dialect --- src/Client/ClientBase.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 962768cc79ff..4b7891f2aaa4 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -327,6 +327,8 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const auto & settings = global_context->getSettingsRef(); size_t max_length = 0; + auto begin = pos; + if (!allow_multi_statements) max_length = settings.max_query_size; @@ -355,8 +357,14 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!res) { - std::cerr << std::endl << message << std::endl << std::endl; - return nullptr; + if (sql_dialect != "kusto") + res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + + if (!res) + { + std::cerr << std::endl << message << std::endl << std::endl; + return nullptr; + } } } else From 6f05f1267e446cc9c5b18bfdd40a49c53df5361b Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 16 Jul 2022 07:49:24 -0700 Subject: [PATCH 055/342] Kusto-phase2: Fixed the issue of conflict --- src/Client/ClientBase.cpp | 51 ++++++++++++--------------------------- 1 file changed, 15 insertions(+), 36 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 4b7891f2aaa4..022e3c221346 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -327,8 +327,6 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const auto & settings = global_context->getSettingsRef(); size_t max_length = 0; - auto begin = pos; - if (!allow_multi_statements) max_length = settings.max_query_size; @@ -357,14 +355,8 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!res) { - if (sql_dialect != "kusto") - res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); - - if (!res) - { - std::cerr << std::endl << message << std::endl << std::endl; - return nullptr; - } + std::cerr << std::endl << message << std::endl << std::endl; + return nullptr; } } else @@ -2034,21 +2026,9 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) bool ClientBase::processQueryText(const String & text) { - auto trimmed_input = trim(text, [](char c) { return isWhitespaceASCII(c) || c == ';'; }); - - if (exit_strings.end() != exit_strings.find(trimmed_input)) + if (exit_strings.end() != exit_strings.find(trim(text, [](char c) { return isWhitespaceASCII(c) || c == ';'; }))) return false; - if (trimmed_input.starts_with("\\i")) - { - size_t skip_prefix_size = std::strlen("\\i"); - auto file_name = trim( - trimmed_input.substr(skip_prefix_size, trimmed_input.size() - skip_prefix_size), - [](char c) { return isWhitespaceASCII(c); }); - - return processMultiQueryFromFile(file_name); - } - if (!is_multiquery) { assert(!query_fuzzer_runs); @@ -2256,17 +2236,6 @@ void ClientBase::runInteractive() } -bool ClientBase::processMultiQueryFromFile(const String & file_name) -{ - String queries_from_file; - - ReadBufferFromFile in(file_name); - readStringUntilEOF(queries_from_file, in); - - return executeMultiQuery(queries_from_file); -} - - void ClientBase::runNonInteractive() { if (delayed_interactive) @@ -2274,13 +2243,23 @@ void ClientBase::runNonInteractive() if (!queries_files.empty()) { + auto process_multi_query_from_file = [&](const String & file) + { + String queries_from_file; + + ReadBufferFromFile in(file); + readStringUntilEOF(queries_from_file, in); + + return executeMultiQuery(queries_from_file); + }; + for (const auto & queries_file : queries_files) { for (const auto & interleave_file : interleave_queries_files) - if (!processMultiQueryFromFile(interleave_file)) + if (!process_multi_query_from_file(interleave_file)) return; - if (!processMultiQueryFromFile(queries_file)) + if (!process_multi_query_from_file(queries_file)) return; } From 56c7c30da439a6491c3a40cd475aa4b75aedc8b8 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Tue, 26 Jul 2022 22:13:34 -0700 Subject: [PATCH 056/342] Add config entry to overwrite default dialect to kusto auto --- src/Parsers/Kusto/KQL_ReleaseNote.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 47e2e8173916..5f3f5f343ace 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,6 +1,15 @@ # KQL implemented features. - - +The config setting to allow dialect setting from configuration XMLs. + - Set dialect setting during command line in batch mode ./clickhouse-client --dialect='kusto_auto' -q "KQL query" + - Set dialect setting client configuration XML and create a client connection using --config-file option. + For example: Clickouse-client.xml looks like as below + + ` + kusto_auto + ` + + Establish clickhouse-client connection using command `clickhouse-client --config-file=clickhouse-client.xml` + Note: Since it is user level setting, It is not required to udpate config.xml. # July 17, 2022 ## Renamed dialect from sql_dialect to dialect From 1bd7068169631fe2cf462d81b023964d92eae413 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Thu, 28 Jul 2022 08:59:08 -0700 Subject: [PATCH 057/342] Updated Release notes with examples --- src/Parsers/Kusto/KQL_ReleaseNote.md | 34 ++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 5f3f5f343ace..4474b0ce22df 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,15 +1,29 @@ # KQL implemented features. -The config setting to allow dialect setting from configuration XMLs. - - Set dialect setting during command line in batch mode ./clickhouse-client --dialect='kusto_auto' -q "KQL query" - - Set dialect setting client configuration XML and create a client connection using --config-file option. - For example: Clickouse-client.xml looks like as below - - ` - kusto_auto - ` +The config setting to allow modify dialect setting. + - Set dialect setting in server configuration XML at user level(` users.xml `). This sets the ` dialect ` at server startup and CH will do query parsing for all users with ` default ` profile acording to dialect value. + + For example: + ` + + + random + kusto_auto + ` + + - Query can be executed with HTTP client as below once dialect is set in users.xml + ` echo "KQL query" | curl -sS "http://localhost:8123/?" --data-binary @- ` + + - To execute the query using clickhouse-client , Update clickhouse-client.xml as below and connect clickhouse-client with --config-file option (` clickhouse-client --config-file= `) - Establish clickhouse-client connection using command `clickhouse-client --config-file=clickhouse-client.xml` - Note: Since it is user level setting, It is not required to udpate config.xml. + ` + kusto_auto + ` + + OR + pass dialect setting with '--'. For example : + ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` + + # July 17, 2022 ## Renamed dialect from sql_dialect to dialect From 684fdb50568c2531ee7860703969260a9b6afb14 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 19 Jul 2022 21:25:52 -0700 Subject: [PATCH 058/342] Kusto-phase2 Fixed bug of Syntax error when Order By is followed by another statement --- src/Parsers/Kusto/ParserKQLSort.cpp | 2 +- src/Parsers/Kusto/ParserKQLSummarize.cpp | 8 --- src/Parsers/tests/gtest_Parser.cpp | 69 ++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 9 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 70e3283ee3e0..69857cd1386b 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -32,7 +32,7 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; pos = op_pos.back(); - while (!pos->isEnd() && pos->type != TokenType::PipeMark) + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String tmp(pos->begin,pos->end); if (tmp == "desc" or tmp == "asc") diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 059488aca5cc..60ab6497f810 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -47,14 +47,6 @@ std::pair ParserKQLSummarize::removeLastWord(String input) { return std::make_pair(first_part, temp[temp.size() - 1]); } - if (!temp.empty()) - { - return std::make_pair(first_part, temp[temp.size() - 1]); - } - if (!temp.empty()) - { - return std::make_pair(first_part, temp[temp.size() - 1]); - } return std::make_pair("", ""); } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index ff7bad6c3553..e5135e81623f 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -509,5 +509,74 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | project parse_ipv6('127.0.0.1')", "SELECT toIPv6OrNull('127.0.0.1')\nFROM Customers" + }, + { + "Customers|where Occupation has_any ('Skilled','abcd')", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Occupation, 'Skilled') OR hasTokenCaseInsensitive(Occupation, 'abcd')" + }, + { + "Customers|where Occupation has_all ('Skilled','abcd')", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Occupation, 'Skilled') AND hasTokenCaseInsensitive(Occupation, 'abcd')" + }, + { + "Customers|where Occupation has_all (strcat('Skill','ed'),'Manual')", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Occupation, concat('Skill', 'ed')) AND hasTokenCaseInsensitive(Occupation, 'Manual')" + }, + { + "Customers | where Occupation == strcat('Pro','fessional') | take 1", + "SELECT *\nFROM Customers\nWHERE Occupation = concat('Pro', 'fessional')\nLIMIT 1" + }, + { + "Customers | project countof('The cat sat on the mat', 'at')", + "SELECT countSubstrings('The cat sat on the mat', 'at')\nFROM Customers" + }, + { + "Customers | project countof('The cat sat on the mat', 'at', 'normal')", + "SELECT countSubstrings('The cat sat on the mat', 'at')\nFROM Customers" + }, + { + "Customers | project countof('The cat sat on the mat', 'at', 'regex')", + "SELECT countMatches('The cat sat on the mat', 'at')\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 10')", + "SELECT extract('The price of PINEAPPLE ice cream is 10', '\\b[A-Z]+\\b.+\\b\\\\d+')\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20')", + "SELECT extract('The price of PINEAPPLE ice cream is 20', '\\b[A-Z]+\\b')\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 30')", + "SELECT extract('The price of PINEAPPLE ice cream is 30', '\\b\\\\d+')\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 40', typeof(int))", + "SELECT CAST(extract('The price of PINEAPPLE ice cream is 40', '\\b\\\\d+'), 'Int32')\nFROM Customers" + }, + { + "Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 50')", + "SELECT extractAllGroups('The price of PINEAPPLE ice cream is 50', '(\\\\w)(\\\\w+)(\\\\w)')\nFROM Customers" + }, + { + " Customers | project split('aa_bb', '_')", + "SELECT splitByString('_', 'aa_bb')\nFROM Customers" + }, + { + "Customers | project split('aaa_bbb_ccc', '_', 1)", + "SELECT arrayPushBack([], splitByString('_', 'aaa_bbb_ccc')[2])\nFROM Customers" + }, + { + "Customers | project strcat_delim('-', '1', '2', 'A')", + "SELECT concat('1', '-', '2', '-', 'A')\nFROM Customers" + }, + { + "Customers | project indexof('abcdefg','cde')", + "SELECT position('abcdefg', 'cde', 1) - 1\nFROM Customers" + }, + { + "Customers | project indexof('abcdefg','cde', 2) ", + "SELECT position('abcdefg', 'cde', 3) - 1\nFROM Customers" + } }))); From de5719af849ed03874ce19dff84e2922a217b403 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 20 Jul 2022 06:39:32 -0700 Subject: [PATCH 059/342] Kusto-phase: Add function to validate end of kql function --- .../Kusto/KustoFunctions/IParserKQLFunction.cpp | 17 ++++++++++++----- .../Kusto/KustoFunctions/IParserKQLFunction.h | 11 ++++++----- .../Kusto/KustoFunctions/KQLStringFunctions.cpp | 15 ++++++++++++++- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 73472a42010e..c45ccdd3ab11 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -23,10 +23,11 @@ namespace DB namespace ErrorCodes { extern const int SYNTAX_ERROR; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) +bool IParserKQLFunction::convert(String & out,IParser::Pos & pos) { return wrapConvertImpl(pos, IncreaseDepthTag{}, [&] { @@ -37,7 +38,7 @@ bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) }); } -bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const String &ch_fn) +bool IParserKQLFunction::directMapping(String & out,IParser::Pos & pos,const String & ch_fn) { std::vector arguments; @@ -75,7 +76,7 @@ bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const Strin return false; } -String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser::Pos &pos) +String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser::Pos & pos) { String converted_arg; std::vector tokens; @@ -85,7 +86,7 @@ String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser:: return converted_arg; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) - throw Exception("Syntax error near " + fn_name, ErrorCodes::SYNTAX_ERROR); + throw Exception("Need more argument(s) in function: " + fn_name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { @@ -119,7 +120,7 @@ String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser:: return converted_arg; } -String IParserKQLFunction::getKQLFunctionName(IParser::Pos &pos) +String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) { String fn_name = String(pos->begin, pos->end); ++pos; @@ -131,4 +132,10 @@ String IParserKQLFunction::getKQLFunctionName(IParser::Pos &pos) return fn_name; } +void IParserKQLFunction::validateEndOfFunction(const String & fn_name, IParser::Pos & pos) +{ + if (pos->type != TokenType:: ClosingRoundBracket) + throw Exception("Too many arguments in function: " + fn_name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 8af2623a984d..6e565eabe9ea 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -28,14 +28,15 @@ class IParserKQLFunction pos = begin; return res; } - bool convert(String &out,IParser::Pos &pos); + bool convert(String & out,IParser::Pos & pos); virtual const char * getName() const = 0; virtual ~IParserKQLFunction() = default; protected: - virtual bool convertImpl(String &out,IParser::Pos &pos) = 0; - static bool directMapping(String &out,IParser::Pos &pos,const String &ch_fn); - static String getConvertedArgument(const String &fn_name, IParser::Pos &pos); - static String getKQLFunctionName(IParser::Pos &pos); + virtual bool convertImpl(String & out,IParser::Pos & pos) = 0; + static bool directMapping(String &out,IParser::Pos & pos,const String & ch_fn); + static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); + static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos); + static String getKQLFunctionName(IParser::Pos & pos); }; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index ddd872c20f04..6dd90121168b 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -456,7 +457,19 @@ bool StrCatDelim::convertImpl(String & out,IParser::Pos & pos) bool StrCmp::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"lengthUTF8"); + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String string1 = getConvertedArgument(fn_name, pos); + ++pos; + const String string2 = getConvertedArgument(fn_name, pos); + + validateEndOfFunction(fn_name, pos); + + out = std::format("multiIf({0} == {1}, 0, {0} < {1}, -1, 1)", string1, string2); + return true; } bool StrLen::convertImpl(String & out,IParser::Pos & pos) From 09e80eb1d5a7f36da37ca07cf0a57b6c6c2d9574 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Mon, 18 Jul 2022 15:56:57 -0400 Subject: [PATCH 060/342] Add functional tests for tabular table summarize --- .../02366_kql_create_table.reference | 4 + .../0_stateless/02366_kql_create_table.sql | 34 +++++ .../0_stateless/02366_kql_summarize.reference | 25 ++++ .../0_stateless/02366_kql_summarize.sql | 39 +++++ .../0_stateless/02366_kql_tabular.reference | 111 +++++++++++++++ .../queries/0_stateless/02366_kql_tabular.sql | 133 ++++++++++++++++++ 6 files changed, 346 insertions(+) create mode 100644 tests/queries/0_stateless/02366_kql_create_table.reference create mode 100644 tests/queries/0_stateless/02366_kql_create_table.sql create mode 100644 tests/queries/0_stateless/02366_kql_summarize.reference create mode 100644 tests/queries/0_stateless/02366_kql_summarize.sql create mode 100644 tests/queries/0_stateless/02366_kql_tabular.reference create mode 100644 tests/queries/0_stateless/02366_kql_tabular.sql diff --git a/tests/queries/0_stateless/02366_kql_create_table.reference b/tests/queries/0_stateless/02366_kql_create_table.reference new file mode 100644 index 000000000000..35136b5ff425 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_create_table.reference @@ -0,0 +1,4 @@ +-- test create table -- +Theodore +Diaz +Theodore Diaz 28 diff --git a/tests/queries/0_stateless/02366_kql_create_table.sql b/tests/queries/0_stateless/02366_kql_create_table.sql new file mode 100644 index 000000000000..67f099a2d709 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_create_table.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); +INSERT INTO Customers VALUES ('Stephanie','Cox','Management abcd defg','Bachelors',33); +INSERT INTO Customers VALUES ('Peter','Nara','Skilled Manual','Graduate Degree',26); +INSERT INTO Customers VALUES ('Latoya','Shen','Professional','Graduate Degree',25); +INSERT INTO Customers VALUES ('Apple','','Skilled Manual','Bachelors',28); +INSERT INTO Customers VALUES (NULL,'why','Professional','Partial College',38); + +Select '-- test create table --' ; +Select * from kql(Customers|project FirstName) limit 1;; +DROP TABLE IF EXISTS kql_table1; +CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName | filter LastName=='Diaz'); +select LastName from kql_table1 limit 1; +DROP TABLE IF EXISTS kql_table2; +CREATE TABLE kql_table2 +( + FirstName Nullable(String), + LastName String, + Age Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO kql_table2 select * from kql(Customers|project FirstName,LastName,Age | filter FirstName=='Theodore'); +select * from kql_table2 limit 1; +DROP TABLE IF EXISTS Customers; +DROP TABLE IF EXISTS kql_table1; +DROP TABLE IF EXISTS kql_table2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_summarize.reference b/tests/queries/0_stateless/02366_kql_summarize.reference new file mode 100644 index 000000000000..d73f75b03c2d --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_summarize.reference @@ -0,0 +1,25 @@ +-- test summarize -- +12 25 46 32.416666666666664 389 +Skilled Manual 5 26 36 30.2 151 +Professional 6 25 46 34.166666666666664 205 +Management abcd defg 1 33 33 33 33 +Skilled Manual 0 +Professional 2 +Management abcd defg 0 +Skilled Manual 36 +Professional 38 +Management abcd defg 33 +Skilled Manual 26 +Professional 25 +Management abcd defg 33 +Skilled Manual 30.2 +Professional 29.25 +Management abcd defg 33 +Skilled Manual 151 +Professional 117 +Management abcd defg 33 +4 +2 +40 2 +20 6 +30 4 diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql new file mode 100644 index 000000000000..8eba49f92f05 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -0,0 +1,39 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); +INSERT INTO Customers VALUES ('Stephanie','Cox','Management abcd defg','Bachelors',33); +INSERT INTO Customers VALUES ('Peter','Nara','Skilled Manual','Graduate Degree',26); +INSERT INTO Customers VALUES ('Latoya','Shen','Professional','Graduate Degree',25); +INSERT INTO Customers VALUES ('Joshua','Lee','Professional','Partial College',26); +INSERT INTO Customers VALUES ('Edward','Hernandez','Skilled Manual','High School',36); +INSERT INTO Customers VALUES ('Dalton','Wood','Professional','Partial College',42); +INSERT INTO Customers VALUES ('Christine','Nara','Skilled Manual','Partial College',33); +INSERT INTO Customers VALUES ('Cameron','Rodriguez','Professional','Partial College',28); +INSERT INTO Customers VALUES ('Angel','Stewart','Professional','Partial College',46); +INSERT INTO Customers VALUES ('Apple','','Skilled Manual','Bachelors',28); +INSERT INTO Customers VALUES (NULL,'why','Professional','Partial College',38); + +Select '-- test summarize --' ; +set dialect='kusto'; +Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age); +Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age) by Occupation; +Customers | summarize countif(Age>40) by Occupation; +Customers | summarize MyMax = maxif(Age, Age<40) by Occupation; +Customers | summarize MyMin = minif(Age, Age<40) by Occupation; +Customers | summarize MyAvg = avgif(Age, Age<40) by Occupation; +Customers | summarize MySum = sumif(Age, Age<40) by Occupation; +Customers | summarize dcount(Education, Occupation=='Professional'); +Customers | summarize dcountif(Education, Occupation=='Professional'); +Customers | summarize count() by bin(Age, 10) + +-- The following does not work +-- arg_max() +-- arg_min() diff --git a/tests/queries/0_stateless/02366_kql_tabular.reference b/tests/queries/0_stateless/02366_kql_tabular.reference new file mode 100644 index 000000000000..6fd5af9b60ab --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_tabular.reference @@ -0,0 +1,111 @@ +-- test Query only has table name: -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Joshua Lee Professional Partial College 26 +Edward Hernandez Skilled Manual High School 36 +Dalton Wood Professional Partial College 42 +Christine Nara Skilled Manual Partial College 33 +Cameron Rodriguez Professional Partial College 28 +Angel Stewart Professional Partial College 46 +-- Query has Column Selection -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +Edward Hernandez Skilled Manual +Dalton Wood Professional +Christine Nara Skilled Manual +Cameron Rodriguez Professional +Angel Stewart Professional +-- Query has limit -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +-- Query has second limit with bigger value -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +-- Query has second limit with smaller value -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +-- Query has second Column selection -- +Theodore Diaz +Stephanie Cox +Peter Nara +-- Query has second Column selection with extra column -- +Theodore Diaz Bachelors +Stephanie Cox Bachelors +Peter Nara Graduate Degree +-- Test String Equals (==) -- +Theodore Diaz Skilled Manual +Peter Nara Skilled Manual +Edward Hernandez Skilled Manual +Christine Nara Skilled Manual +-- Test String Not equals (!=) -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Dalton Wood Professional +Cameron Rodriguez Professional +Angel Stewart Professional +-- Test Filter using a list (in) -- +Theodore Diaz Skilled Manual Bachelors +Stephanie Cox Management Bachelors +Edward Hernandez Skilled Manual High School +-- Test Filter using a list (!in) -- +Peter Nara Skilled Manual Graduate Degree +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Christine Nara Skilled Manual Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (contains_cs) -- +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Christine Nara Skilled Manual Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (startswith_cs) -- +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (endswith_cs) -- +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +-- Test Filter using numerical equal (==) -- +Peter Nara Skilled Manual Graduate Degree 26 +Joshua Lee Professional Partial College 26 +-- Test Filter using numerical great and less (> , <) -- +Stephanie Cox Management Bachelors 33 +Edward Hernandez Skilled Manual High School 36 +Christine Nara Skilled Manual Partial College 33 +-- Test Filter using multi where -- +Dalton Wood Professional Partial College 42 +Angel Stewart Professional Partial College 46 +-- test sort, order -- +Angel Stewart Professional Partial College 46 +Dalton Wood Professional Partial College 42 +Edward Hernandez Skilled Manual High School 36 +Christine Nara Skilled Manual Partial College 33 +Stephanie Cox Management Bachelors 33 +Cameron Rodriguez Professional Partial College 28 +Theodore Diaz Skilled Manual Bachelors 28 +Joshua Lee Professional Partial College 26 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 diff --git a/tests/queries/0_stateless/02366_kql_tabular.sql b/tests/queries/0_stateless/02366_kql_tabular.sql new file mode 100644 index 000000000000..3f16e63567bc --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_tabular.sql @@ -0,0 +1,133 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); + +set dialect='clickhouse'; +Select '-- test Query only has table name: --'; +set dialect='kusto'; +Customers; + +set dialect='clickhouse'; +Select '-- Query has Column Selection --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation; + +set dialect='clickhouse'; +Select '-- Query has limit --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | take 5; +Customers | project FirstName,LastName,Occupation | limit 5; + +set dialect='clickhouse'; +Select '-- Query has second limit with bigger value --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | take 5 | take 7; + +set dialect='clickhouse'; +Select '-- Query has second limit with smaller value --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | take 5 | take 3; + +set dialect='clickhouse'; +Select '-- Query has second Column selection --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName; + +set dialect='clickhouse'; +Select '-- Query has second Column selection with extra column --'; +set dialect='kusto'; +Customers| project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education; + +-- set dialect='clickhouse'; +-- Select '-- Query with desc sort --'; +-- set dialect='kusto'; +-- Customers | project FirstName,LastName,Occupation | take 5 | sort by FirstName desc; +-- Customers | project FirstName,LastName,Occupation | take 5 | order by Occupation desc; + +-- set dialect='clickhouse'; +-- Select '-- Query with asc sort --'; +-- set dialect='kusto'; +-- Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc; + +-- set dialect='clickhouse'; +-- Select '-- Query with sort (without keyword asc desc) --'; +-- set dialect='kusto'; +-- Customers | project FirstName,LastName,Occupation | take 5 | sort by FirstName; +-- Customers | project FirstName,LastName,Occupation | take 5 | order by Occupation; + +-- set dialect='clickhouse'; +-- Select '-- Query with sort 2 Columns with different direction --'; +-- set dialect='kusto'; +-- Customers | project FirstName,LastName,Occupation | take 5 |sort by Occupation asc, LastName desc; + +-- set dialect='clickhouse'; +-- Select '-- Query with second sort --'; +-- set dialect='kusto'; +-- Customers | project FirstName,LastName,Occupation | take 5 |sort by Occupation desc |sort by Occupation asc; + + +set dialect='clickhouse'; +Select '-- Test String Equals (==) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | where Occupation == 'Skilled Manual'; + +set dialect='clickhouse'; +Select '-- Test String Not equals (!=) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | where Occupation != 'Skilled Manual'; + +set dialect='clickhouse'; +Select '-- Test Filter using a list (in) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where Education in ('Bachelors','High School'); + +set dialect='clickhouse'; +Select '-- Test Filter using a list (!in) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where Education !in ('Bachelors','High School'); + +set dialect='clickhouse'; +Select '-- Test Filter using common string operations (contains_cs) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where Education contains_cs 'Coll'; + +set dialect='clickhouse'; +Select '-- Test Filter using common string operations (startswith_cs) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where Occupation startswith_cs 'Prof'; + +set dialect='clickhouse'; +Select '-- Test Filter using common string operations (endswith_cs) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where FirstName endswith_cs 'a'; + +set dialect='clickhouse'; +Select '-- Test Filter using numerical equal (==) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age == 26; + +set dialect='clickhouse'; +Select '-- Test Filter using numerical great and less (> , <) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 and Age < 40; + + +set dialect='clickhouse'; +Select '-- Test Filter using multi where --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 | where Occupation == 'Professional'; + +-- TODO: verify the issue that order by can not be followed by other statements +set dialect='clickhouse'; +Select '-- test sort, order --'; +set dialect='kusto'; +Customers | order by Age desc, FirstName asc; + From 871a0bbcbf117750e39b9f055ff22320e6c163fe Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Wed, 20 Jul 2022 14:18:03 -0400 Subject: [PATCH 061/342] Added sorting test cases --- .../0_stateless/02366_kql_tabular.reference | 51 ++++++++++++++---- .../queries/0_stateless/02366_kql_tabular.sql | 53 ++++++++----------- 2 files changed, 62 insertions(+), 42 deletions(-) diff --git a/tests/queries/0_stateless/02366_kql_tabular.reference b/tests/queries/0_stateless/02366_kql_tabular.reference index 6fd5af9b60ab..fa9ea03a9c0b 100644 --- a/tests/queries/0_stateless/02366_kql_tabular.reference +++ b/tests/queries/0_stateless/02366_kql_tabular.reference @@ -49,6 +49,46 @@ Peter Nara Theodore Diaz Bachelors Stephanie Cox Bachelors Peter Nara Graduate Degree +-- Query with desc sort -- +Theodore +Stephanie +Peter +Latoya +Joshua +Skilled Manual +Skilled Manual +Skilled Manual +Skilled Manual +Professional +-- Query with asc sort -- +Management +Professional +Professional +Professional +Professional +-- Query with sort (without keyword asc desc) -- +Theodore +Stephanie +Peter +Latoya +Joshua +Skilled Manual +Skilled Manual +Skilled Manual +Skilled Manual +Professional +-- Query with sort 2 Columns with different direction -- +Stephanie Cox Management +Dalton Wood Professional +Angel Stewart Professional +Latoya Shen Professional +Cameron Rodriguez Professional +-- Query with second sort -- +Stephanie Cox Management +Dalton Wood Professional +Angel Stewart Professional +Latoya Shen Professional +Cameron Rodriguez Professional -- Test String Equals (==) -- Theodore Diaz Skilled Manual Peter Nara Skilled Manual @@ -98,14 +138,3 @@ Christine Nara Skilled Manual Partial College 33 -- Test Filter using multi where -- Dalton Wood Professional Partial College 42 Angel Stewart Professional Partial College 46 --- test sort, order -- -Angel Stewart Professional Partial College 46 -Dalton Wood Professional Partial College 42 -Edward Hernandez Skilled Manual High School 36 -Christine Nara Skilled Manual Partial College 33 -Stephanie Cox Management Bachelors 33 -Cameron Rodriguez Professional Partial College 28 -Theodore Diaz Skilled Manual Bachelors 28 -Joshua Lee Professional Partial College 26 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 diff --git a/tests/queries/0_stateless/02366_kql_tabular.sql b/tests/queries/0_stateless/02366_kql_tabular.sql index 3f16e63567bc..6a0a3417f421 100644 --- a/tests/queries/0_stateless/02366_kql_tabular.sql +++ b/tests/queries/0_stateless/02366_kql_tabular.sql @@ -46,33 +46,32 @@ Select '-- Query has second Column selection with extra column --'; set dialect='kusto'; Customers| project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education; --- set dialect='clickhouse'; --- Select '-- Query with desc sort --'; --- set dialect='kusto'; --- Customers | project FirstName,LastName,Occupation | take 5 | sort by FirstName desc; --- Customers | project FirstName,LastName,Occupation | take 5 | order by Occupation desc; - --- set dialect='clickhouse'; --- Select '-- Query with asc sort --'; --- set dialect='kusto'; --- Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc; +set dialect='clickhouse'; +Select '-- Query with desc sort --'; +set dialect='kusto'; +Customers | project FirstName | take 5 | sort by FirstName desc; +Customers | project Occupation | take 5 | order by Occupation desc; --- set dialect='clickhouse'; --- Select '-- Query with sort (without keyword asc desc) --'; --- set dialect='kusto'; --- Customers | project FirstName,LastName,Occupation | take 5 | sort by FirstName; --- Customers | project FirstName,LastName,Occupation | take 5 | order by Occupation; +set dialect='clickhouse'; +Select '-- Query with asc sort --'; +set dialect='kusto'; +Customers | project Occupation | take 5 | sort by Occupation asc; --- set dialect='clickhouse'; --- Select '-- Query with sort 2 Columns with different direction --'; --- set dialect='kusto'; --- Customers | project FirstName,LastName,Occupation | take 5 |sort by Occupation asc, LastName desc; +set dialect='clickhouse'; +Select '-- Query with sort (without keyword asc desc) --'; +set dialect='kusto'; +Customers | project FirstName | take 5 | sort by FirstName; +Customers | project Occupation | take 5 | order by Occupation; --- set dialect='clickhouse'; --- Select '-- Query with second sort --'; --- set dialect='kusto'; --- Customers | project FirstName,LastName,Occupation | take 5 |sort by Occupation desc |sort by Occupation asc; +set dialect='clickhouse'; +Select '-- Query with sort 2 Columns with different direction --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc, LastName desc; +set dialect='clickhouse'; +Select '-- Query with second sort --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation desc |sort by Occupation asc, LastName desc; set dialect='clickhouse'; Select '-- Test String Equals (==) --'; @@ -119,15 +118,7 @@ Select '-- Test Filter using numerical great and less (> , <) --'; set dialect='kusto'; Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 and Age < 40; - set dialect='clickhouse'; Select '-- Test Filter using multi where --'; set dialect='kusto'; Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 | where Occupation == 'Professional'; - --- TODO: verify the issue that order by can not be followed by other statements -set dialect='clickhouse'; -Select '-- test sort, order --'; -set dialect='kusto'; -Customers | order by Age desc, FirstName asc; - From 1fe2379102b96db72d0edf2de1365dbc83a162a1 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 22 Jul 2022 05:52:26 -0700 Subject: [PATCH 062/342] Kusto-phase2: Add print operator --- src/Parsers/Kusto/ParserKQLPrint.cpp | 20 ++++++++++++++++++++ src/Parsers/Kusto/ParserKQLPrint.h | 17 +++++++++++++++++ src/Parsers/Kusto/ParserKQLQuery.cpp | 12 ++++++++++++ src/Parsers/tests/gtest_Parser.cpp | 5 ++++- 4 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 src/Parsers/Kusto/ParserKQLPrint.cpp create mode 100644 src/Parsers/Kusto/ParserKQLPrint.h diff --git a/src/Parsers/Kusto/ParserKQLPrint.cpp b/src/Parsers/Kusto/ParserKQLPrint.cpp new file mode 100644 index 000000000000..e6f07cd65345 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLPrint.cpp @@ -0,0 +1,20 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLPrint::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + const String expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) + return false; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLPrint.h b/src/Parsers/Kusto/ParserKQLPrint.h new file mode 100644 index 000000000000..38cc9eb789c5 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLPrint.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLPrint : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index cd8c071e0fc5..d2c1e4943bf8 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { @@ -121,6 +122,17 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) operation_pos.push_back(std::make_pair("table",pos)); String table_name(pos->begin,pos->end); + if (table_name == "print") + { + ++pos; + if (!ParserKQLPrint().parse(pos, select_expression_list, expected)) + return false; + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + return true; + } + ++pos; while (!pos->isEnd() && pos->type != TokenType::Semicolon) { diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index e5135e81623f..eda19f8ed7f7 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -577,6 +577,9 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | project indexof('abcdefg','cde', 2) ", "SELECT position('abcdefg', 'cde', 3) - 1\nFROM Customers" - + }, + { + "print x=1, s=strcat('Hello', ', ', 'World!')", + "SELECT\n 1 AS x,\n concat('Hello', ', ', 'World!') AS s" } }))); From d3731a03de99d244179703cd03ec18e0f942f434 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 2 Aug 2022 19:03:22 -0700 Subject: [PATCH 063/342] Kusto Aggregate functions as of July 29 --- src/Parsers/Kusto/KQL_ReleaseNote.md | 30 +++---- .../KQLAggregationFunctions.cpp | 88 +++++++++++++------ src/Parsers/tests/gtest_Parser.cpp | 38 +++++++- 3 files changed, 113 insertions(+), 43 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 4474b0ce22df..365521d570bd 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -236,19 +236,19 @@ Please note that the functions listed below only take constant parameters for no - tolower() `Customers | project tolower(FirstName)` +# July 29, 2022 ## Aggregate Functions - - arg_max() - - arg_min() - - avg() - - avgif() - - count() - - countif() - - max() - - maxif() - - min() - - minif() - - sum() - - sumif() - - dcount() - - dcountif() - - bin \ No newline at end of file + - make_list() + `Customers | summarize t = make_list(FirstName) by FirstName` + `Customers | summarize t = make_list(FirstName, 10) by FirstName` + - make_list_if() + `Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName` + - make_list_with_nulls() + `Customers | summarize t = make_list_with_nulls(FirstName) by FirstName` + - make_set() + `Customers | summarize t = make_set(FirstName) by FirstName` + `Customers | summarize t = make_set(FirstName, 10) by FirstName` + - make_set_if() + `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 1bfb094518f7..30b33b5933ad 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -20,25 +20,21 @@ namespace DB bool ArgMax::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"argMax"); } bool ArgMin::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"argMin"); } bool Avg::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"avg"); } bool AvgIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"avgIf"); } @@ -72,13 +68,11 @@ bool BuildSchema::convertImpl(String &out,IParser::Pos &pos) bool Count::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"count"); } bool CountIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"countIf"); } @@ -125,60 +119,102 @@ bool MakeBagIf::convertImpl(String &out,IParser::Pos &pos) bool MakeList::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name,pos); + out = "groupArrayIf(" + max_size + ")(" + expr + " , " + expr + " IS NOT NULL)"; + } else + out = "groupArrayIf(" + expr + " , " + expr + " IS NOT NULL)"; + return true; } bool MakeListIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + ++pos; + const auto predicate = getConvertedArgument(fn_name,pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name,pos); + out = "groupArrayIf(" + max_size + ")(" + expr + " , " + predicate+ " )"; + } else + out = "groupArrayIf(" + expr + " , " + predicate+ " )"; + return true; } bool MakeListWithNulls::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"groupArray"); //groupArray takes everything including NULLs } bool MakeSet::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name,pos); + out = "groupUniqArray(" + max_size + ")(" + expr + ")"; + } else + out = "groupUniqArray(" + expr + ")"; + return true; } bool MakeSetIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + ++pos; + const auto predicate = getConvertedArgument(fn_name,pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name,pos); + out = "groupUniqArrayIf(" + max_size + ")(" + expr + " , " + predicate+ " )"; + } else + out = "groupUniqArrayIf(" + expr + " , " + predicate+ " )"; + return true; } bool Max::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"max"); } bool MaxIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"maxIf"); } bool Min::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"min"); } bool MinIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"minIf"); } @@ -226,13 +262,11 @@ bool StdevIf::convertImpl(String &out,IParser::Pos &pos) bool Sum::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"sum"); } bool SumIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"sumIf"); } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index eda19f8ed7f7..5bb3672f0a7d 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -581,5 +581,41 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "print x=1, s=strcat('Hello', ', ', 'World!')", "SELECT\n 1 AS x,\n concat('Hello', ', ', 'World!') AS s" - } + }, + { + "Customers | summarize t = make_list(FirstName) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(FirstName, FirstName IS NOT NULL) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list(FirstName, 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(10)(FirstName, FirstName IS NOT NULL) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(10)(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list_with_nulls(FirstName) by FirstName", + "SELECT\n FirstName,\n groupArray(FirstName) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set(FirstName) by FirstName", + "SELECT\n FirstName,\n groupUniqArray(FirstName) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set(FirstName, 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArray(10)(FirstName) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArrayIf(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArrayIf(10)(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + } }))); From c514e5327fe7416a94482f09fd89484892520e9f Mon Sep 17 00:00:00 2001 From: root Date: Wed, 3 Aug 2022 08:22:17 -0700 Subject: [PATCH 064/342] update release notes and test script --- src/Parsers/Kusto/KQL_ReleaseNote.md | 64 +++++++++++++++++++++------- src/Parsers/tests/gtest_Parser.cpp | 45 ++++++++++--------- 2 files changed, 71 insertions(+), 38 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 365521d570bd..708e7ab94181 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -22,6 +22,39 @@ The config setting to allow modify dialect setting. OR pass dialect setting with '--'. For example : ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` +# Augest 1, 2022 +- **strcmp** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcmpfunction) + `print strcmp('abc','ABC')` + +- **parse_url** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parseurlfunction) + `print Result = parse_url('scheme://username:password@www.google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')` + +- **parse_urlquery** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parseurlqueryfunction) + `print Result = parse_urlquery('k1=v1&k2=v2&k3=v3')` + +- **print operator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/printoperator) + `print x=1, s=strcat('Hello', ', ', 'World!')` + +- **The following functions now support arbitrary expressions as their argument:** + - [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) + - [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) + - [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) + +- **Aggregate Functions:** + - [make_list()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makelist-aggfunction) + `Customers | summarize t = make_list(FirstName) by FirstName` + `Customers | summarize t = make_list(FirstName, 10) by FirstName` + - [make_list_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makelistif-aggfunction) + `Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName` + - [make_list_with_nulls()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-list-with-nulls-aggfunction) + `Customers | summarize t = make_list_with_nulls(FirstName) by FirstName` + - [make_set()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makeset-aggfunction) + `Customers | summarize t = make_set(FirstName) by FirstName` + `Customers | summarize t = make_set(FirstName, 10) by FirstName` + - [make_set_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makesetif-aggfunction) + `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` # July 17, 2022 @@ -236,19 +269,20 @@ Please note that the functions listed below only take constant parameters for no - tolower() `Customers | project tolower(FirstName)` -# July 29, 2022 ## Aggregate Functions - - make_list() - `Customers | summarize t = make_list(FirstName) by FirstName` - `Customers | summarize t = make_list(FirstName, 10) by FirstName` - - make_list_if() - `Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName` - `Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName` - - make_list_with_nulls() - `Customers | summarize t = make_list_with_nulls(FirstName) by FirstName` - - make_set() - `Customers | summarize t = make_set(FirstName) by FirstName` - `Customers | summarize t = make_set(FirstName, 10) by FirstName` - - make_set_if() - `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` - `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` + - arg_max() + - arg_min() + - avg() + - avgif() + - count() + - countif() + - max() + - maxif() + - min() + - minif() + - sum() + - sumif() + - dcount() + - dcountif() + - bin + \ No newline at end of file diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 5bb3672f0a7d..38f017021762 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -479,36 +479,24 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" }, { - "Customers | project ipv4_is_in_range('127.0.0.1', '127.0.0.1')", - "SELECT '127.0.0.1' = '127.0.0.1'\nFROM Customers" + "Customers | project ipv4_is_in_range(FirstName, LastName)", + "SELECT isIPAddressInRange(FirstName, concat(LastName, if(position(LastName, '/') > 0, '', '/32')))\nFROM Customers" }, { - "Customers | project ipv4_is_in_range('192.168.1.6', '192.168.1.1/24')", - "SELECT isIPAddressInRange('192.168.1.6', '192.168.1.1/24')\nFROM Customers" + "Customers | project ipv4_is_private(Occupation)", + "SELECT (((length(splitByChar('/', Occupation) AS tokens) = 1) AND isIPAddressInRange(tokens[1] AS ip, '10.0.0.0/8')) OR ((length(tokens) = 2) AND isIPAddressInRange(IPv4NumToString((IPv4CIDRToRange(toIPv4(ip), if((toUInt8OrNull(tokens[-1]) AS suffix) IS NULL, throwIf(true, 'Unable to parse suffix'), assumeNotNull(suffix))) AS range).1) AS begin, '10.0.0.0/8') AND isIPAddressInRange(IPv4NumToString(range.2) AS end, '10.0.0.0/8'))) OR (((length(tokens) = 1) AND isIPAddressInRange(ip, '172.16.0.0/12')) OR ((length(tokens) = 2) AND isIPAddressInRange(begin, '172.16.0.0/12') AND isIPAddressInRange(end, '172.16.0.0/12'))) OR (((length(tokens) = 1) AND isIPAddressInRange(ip, '192.168.0.0/16')) OR ((length(tokens) = 2) AND isIPAddressInRange(begin, '192.168.0.0/16') AND isIPAddressInRange(end, '192.168.0.0/16')))\nFROM Customers" }, { - "Customers | project ipv4_is_private('192.168.1.6')", - "SELECT isIPAddressInRange('192.168.1.6', '10.0.0.0/8') OR isIPAddressInRange('192.168.1.6', '172.16.0.0/12') OR isIPAddressInRange('192.168.1.6', '192.168.0.0/16')\nFROM Customers" + "Customers | project ipv4_netmask_suffix(Occupation)", + "SELECT if((length(splitByChar('/', Occupation) AS tokens) <= 2) AND isIPv4String(tokens[1]), if(length(tokens) != 2, 32, if(((toInt8OrNull(tokens[-1]) AS suffix) >= 1) AND (suffix <= 32), suffix, throwIf(true, 'Suffix must be between 1 and 32'))), throwIf(true, 'Unable to recognize and IP address with or without a suffix'))\nFROM Customers" }, { - "Customers | project ipv4_is_private('192.168.1.6/24')", - "SELECT (isIPAddressInRange(IPv4NumToString((IPv4CIDRToRange(toIPv4('192.168.1.6'), 24) AS range).1) AS begin, '10.0.0.0/8') AND isIPAddressInRange(IPv4NumToString(range.2) AS end, '10.0.0.0/8')) OR (isIPAddressInRange(begin, '172.16.0.0/12') AND isIPAddressInRange(end, '172.16.0.0/12')) OR (isIPAddressInRange(begin, '192.168.0.0/16') AND isIPAddressInRange(end, '192.168.0.0/16'))\nFROM Customers" + "Customers | project parse_ipv4(FirstName)", + "SELECT toIPv4OrNull(FirstName)\nFROM Customers" }, { - "Customers | project ipv4_netmask_suffix('192.168.1.1/24')", - "SELECT if(isIPv4String('192.168.1.1') AND ((24 >= 1) AND (24 <= 32)), 24, NULL)\nFROM Customers" - }, - { - "Customers | project ipv4_netmask_suffix('192.168.1.1')", - "SELECT if(isIPv4String('192.168.1.1') AND ((32 >= 1) AND (32 <= 32)), 32, NULL)\nFROM Customers" - }, - { - "Customers | project parse_ipv4('127.0.0.1')", - "SELECT toIPv4OrNull('127.0.0.1')\nFROM Customers" - }, - { - "Customers | project parse_ipv6('127.0.0.1')", - "SELECT toIPv6OrNull('127.0.0.1')\nFROM Customers" + "Customers | project parse_ipv6(LastName)", + "SELECT toIPv6OrNull(LastName)\nFROM Customers" }, { "Customers|where Occupation has_any ('Skilled','abcd')", @@ -582,7 +570,18 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "print x=1, s=strcat('Hello', ', ', 'World!')", "SELECT\n 1 AS x,\n concat('Hello', ', ', 'World!') AS s" }, - { + { + "print parse_urlquery('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')", + "SELECT concat('{', concat('\"Query Parameters\":', concat('{\"', replace(replace(if(position('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment', '?') > 0, queryString('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), 'https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '=', '\":\"'), '&', '\",\"'), '\"}')), '}')" + }, + { + "print strcmp('a','b')", + "SELECT multiIf('a' = 'b', 0, 'a' < 'b', -1, 1)" + }, + { + "print parse_url('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')", + "SELECT concat('{', concat('\"Scheme\":\"', protocol('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), ',', concat('\"Host\":\"', domain('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), ',', concat('\"Port\":\"', toString(port('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')), '\"'), ',', concat('\"Path\":\"', path('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), ',', concat('\"Username\":\"', splitByChar(':', splitByChar('@', netloc('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'))[1])[1], '\"'), ',', concat('\"Password\":\"', splitByChar(':', splitByChar('@', netloc('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'))[1])[2], '\"'), ',', concat('\"Query Parameters\":', concat('{\"', replace(replace(queryString('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '=', '\":\"'), '&', '\",\"'), '\"}')), ',', concat('\"Fragment\":\"', fragment('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), '}')" + },{ "Customers | summarize t = make_list(FirstName) by FirstName", "SELECT\n FirstName,\n groupArrayIf(FirstName, FirstName IS NOT NULL) AS t\nFROM Customers\nGROUP BY FirstName" }, From 9f9d705e1a2127c5be4e66978ff06654a26b23e3 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Tue, 19 Jul 2022 09:52:54 -0700 Subject: [PATCH 065/342] Support expressions as IP function arguments --- src/Parsers/Kusto/KQL_ReleaseNote.md | 9 ++++ .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 51 +++++++------------ 2 files changed, 26 insertions(+), 34 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 708e7ab94181..8185cc00817a 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -55,6 +55,15 @@ The config setting to allow modify dialect setting. - [make_set_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makesetif-aggfunction) `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` +# July XX, 2022 + +## IP functions + +The following functions now support arbitrary expressions as their argument. + +- [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) +- [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) +- [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) # July 17, 2022 diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index d8de9cc4e9a9..d1c7963b66b6 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -22,21 +22,6 @@ namespace DB::ErrorCodes extern const int SYNTAX_ERROR; } -namespace -{ -String trimQuotes(const String & str) -{ - static constexpr auto QUOTE = '\''; - - const auto first_index = str.find(QUOTE); - const auto last_index = str.rfind(QUOTE); - if (first_index == String::npos || last_index == String::npos) - throw DB::Exception("Syntax error, improper quotation: " + str, DB::ErrorCodes::SYNTAX_ERROR); - - return str.substr(first_index + 1, last_index - first_index - 1); -} -} - namespace DB { @@ -59,8 +44,7 @@ bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) ++pos; const auto ip_range = getConvertedArgument(function_name, pos); - const auto slash_index = ip_range.find('/'); - out = std::format(slash_index == String::npos ? "{0} = {1}" : "isIPAddressInRange({0}, {1})", ip_address, ip_range); + out = std::format("isIPAddressInRange({0}, concat({1}, if(position({1}, '/') > 0, '', '/32')))", ip_address, ip_range); return true; } @@ -79,8 +63,9 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) if (function_name.empty()) return false; - const auto ip_address = trimQuotes(getConvertedArgument(function_name, pos)); - const auto slash_index = ip_address.find('/'); + ++pos; + + const auto ip_address = getConvertedArgument(function_name, pos); out += "or("; for (int i = 0; i < std::ssize(PRIVATE_SUBNETS); ++i) @@ -88,14 +73,13 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) out += i > 0 ? ", " : ""; const auto & subnet = PRIVATE_SUBNETS[i]; - out += slash_index == String::npos - ? std::format("isIPAddressInRange('{0}', '{1}')", ip_address, subnet) - : std::format( - "and(isIPAddressInRange(IPv4NumToString(tupleElement((IPv4CIDRToRange(toIPv4('{0}'), {1}) as range), 1)) as begin, '{2}'), " - "isIPAddressInRange(IPv4NumToString(tupleElement(range, 2)) as end, '{2}'))", - std::string_view(ip_address.c_str(), slash_index), - std::string_view(ip_address.c_str() + slash_index + 1, ip_address.length() - slash_index - 1), - subnet); + out += std::format( + "or(and(length(splitByChar('/', {0}) as tokens) = 1, isIPAddressInRange(tokens[1] as ip, '{1}')), " + "and(length(tokens) = 2, isIPAddressInRange(IPv4NumToString(tupleElement((IPv4CIDRToRange(toIPv4(ip), " + "if(isNull(toUInt8OrNull(tokens[-1]) as suffix), throwIf(true, 'Unable to parse suffix'), assumeNotNull(suffix))) as range), " + "1)) as begin, '{1}'), isIPAddressInRange(IPv4NumToString(tupleElement(range, 2)) as end, '{1}')))", + ip_address, + subnet); } out += ")"; @@ -104,19 +88,18 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) { - static constexpr auto DEFAULT_NETMASK = 32; - const auto function_name = getKQLFunctionName(pos); if (function_name.empty()) return false; ++pos; - const auto ip_range = trimQuotes(getConvertedArgument(function_name, pos)); - const auto slash_index = ip_range.find('/'); - const std::string_view ip_address(ip_range.c_str(), std::min(ip_range.length(), slash_index)); - const auto netmask = slash_index == String::npos ? DEFAULT_NETMASK : std::strtol(ip_range.c_str() + slash_index + 1, nullptr, 10); - out = std::format("if(and(isIPv4String('{0}'), {1} between 1 and 32), {1}, null)", ip_address, netmask); + const auto ip_range = getConvertedArgument(function_name, pos); + out = std::format( + "if(length(splitByChar('/', {0}) as tokens) <= 2 and isIPv4String(tokens[1]), if(length(tokens) != 2, 32, " + "if((toInt8OrNull(tokens[-1]) as suffix) between 1 and 32, suffix, throwIf(true, 'Suffix must be between 1 and 32'))), " + "throwIf(true, 'Unable to recognize and IP address with or without a suffix'))", + ip_range); return true; } From a8a4aba83b89e69dc056d111ee23b30e01324393 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Tue, 26 Jul 2022 08:32:28 -0700 Subject: [PATCH 066/342] Provide conformance to the specification --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index d1c7963b66b6..0383292669d8 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -67,22 +67,23 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getConvertedArgument(function_name, pos); - out += "or("; + out += std::format( + "multiIf(length(splitByChar('/', {0}) as tokens) > 2 or isNull(toIPv4OrNull(tokens[1]) as nullable_ip), null, " + "length(tokens) = 2 and isNull(toUInt8OrNull(tokens[-1]) as suffix), null, " + "ignore(assumeNotNull(nullable_ip) as ip, " + "IPv4CIDRToRange(ip, assumeNotNull(suffix)) as range, IPv4NumToString(tupleElement(range, 1)) as begin, " + "IPv4NumToString(tupleElement(range, 2)) as end), null, ", + ip_address); for (int i = 0; i < std::ssize(PRIVATE_SUBNETS); ++i) { - out += i > 0 ? ", " : ""; - const auto & subnet = PRIVATE_SUBNETS[i]; out += std::format( - "or(and(length(splitByChar('/', {0}) as tokens) = 1, isIPAddressInRange(tokens[1] as ip, '{1}')), " - "and(length(tokens) = 2, isIPAddressInRange(IPv4NumToString(tupleElement((IPv4CIDRToRange(toIPv4(ip), " - "if(isNull(toUInt8OrNull(tokens[-1]) as suffix), throwIf(true, 'Unable to parse suffix'), assumeNotNull(suffix))) as range), " - "1)) as begin, '{1}'), isIPAddressInRange(IPv4NumToString(tupleElement(range, 2)) as end, '{1}')))", - ip_address, + "length(tokens) = 1 and isIPAddressInRange(IPv4NumToString(ip), '{0}') or " + "isIPAddressInRange(begin, '{0}') and isIPAddressInRange(end, '{0}'), true, ", subnet); } - out += ")"; + out += "false)"; return true; } @@ -96,9 +97,8 @@ bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) const auto ip_range = getConvertedArgument(function_name, pos); out = std::format( - "if(length(splitByChar('/', {0}) as tokens) <= 2 and isIPv4String(tokens[1]), if(length(tokens) != 2, 32, " - "if((toInt8OrNull(tokens[-1]) as suffix) between 1 and 32, suffix, throwIf(true, 'Suffix must be between 1 and 32'))), " - "throwIf(true, 'Unable to recognize and IP address with or without a suffix'))", + "multiIf(length(splitByChar('/', {0}) as tokens) > 2 or not isIPv4String(tokens[1]), null, " + "length(tokens) = 1, 32, isNull(toUInt8OrNull(tokens[-1]) as suffix), null, toUInt8(min2(suffix, 32)))", ip_range); return true; } From 8c77cd307c014d7d0ecaa9e58aa39f9628ac193e Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 25 Jul 2022 00:01:19 -0700 Subject: [PATCH 067/342] Kusto-phase2: Added check end of function, and neww string functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 15 +- .../KustoFunctions/IParserKQLFunction.cpp | 23 +- .../Kusto/KustoFunctions/IParserKQLFunction.h | 1 + .../KustoFunctions/KQLStringFunctions.cpp | 245 +++++++----------- src/Parsers/Kusto/ParserKQLOperators.cpp | 19 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 15 +- 6 files changed, 119 insertions(+), 199 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 8185cc00817a..ae3f9e1b6716 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -35,11 +35,6 @@ The config setting to allow modify dialect setting. - **print operator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/printoperator) `print x=1, s=strcat('Hello', ', ', 'World!')` -- **The following functions now support arbitrary expressions as their argument:** - - [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) - - [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) - - [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) - - **Aggregate Functions:** - [make_list()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makelist-aggfunction) `Customers | summarize t = make_list(FirstName) by FirstName` @@ -59,12 +54,10 @@ The config setting to allow modify dialect setting. ## IP functions -The following functions now support arbitrary expressions as their argument. - -- [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) -- [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) -- [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) - +- **The following functions now support arbitrary expressions as their argument:** + - [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) + - [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) + - [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) # July 17, 2022 diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index c45ccdd3ab11..825c5eb5f925 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -96,12 +96,7 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: { if (pos->type == TokenType::BareWord ) { - String converted; - fun = KQLFunctionFactory::get(token); - if ( fun && fun->convert(converted,pos)) - tokens.push_back(converted); - else - tokens.push_back(token); + tokens.push_back(IParserKQLFunction::getExpression(pos)); } else if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) { @@ -138,4 +133,20 @@ void IParserKQLFunction::validateEndOfFunction(const String & fn_name, IParser:: throw Exception("Too many arguments in function: " + fn_name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } +String IParserKQLFunction::getExpression(IParser::Pos & pos) +{ + String arg = String(pos->begin, pos->end); + if (pos->type == TokenType::BareWord ) + { + String new_arg; + auto fun = KQLFunctionFactory::get(arg); + if (fun && fun->convert(new_arg, pos)) + { + validateEndOfFunction(arg, pos); + arg = new_arg; + } + } + return arg; +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 6e565eabe9ea..7ed3841583b9 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -31,6 +31,7 @@ class IParserKQLFunction bool convert(String & out,IParser::Pos & pos); virtual const char * getName() const = 0; virtual ~IParserKQLFunction() = default; + static String getExpression(IParser::Pos & pos); protected: virtual bool convertImpl(String & out,IParser::Pos & pos) = 0; static bool directMapping(String &out,IParser::Pos & pos,const String & ch_fn); diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 6dd90121168b..c2d1bd251da8 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -51,14 +51,9 @@ bool CountOf::convertImpl(String & out, IParser::Pos & pos) if (fn_name.empty()) return false; - auto begin = pos; - ++pos; const String source = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; - ++pos; const String search = getConvertedArgument(fn_name, pos); @@ -70,16 +65,12 @@ bool CountOf::convertImpl(String & out, IParser::Pos & pos) } assert (kind =="'normal' " || kind =="'regex' "); - if (pos->type == TokenType::ClosingRoundBracket) - { - if (kind == "'normal' " ) - out = "countSubstrings(" + source + ", " + search + ")"; - else - out = "countMatches("+ source + ", " + search + ")"; - return true; - } - pos = begin; - return false; + if (kind == "'normal' " ) + out = "countSubstrings(" + source + ", " + search + ")"; + else + out = "countMatches("+ source + ", " + search + ")"; + return true; + } bool Extract::convertImpl(String & out, IParser::Pos & pos) @@ -88,14 +79,9 @@ bool Extract::convertImpl(String & out, IParser::Pos & pos) if (fn_name.empty()) return false; - auto begin = pos; - ++pos; String regex = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; - ++pos; size_t capture_group = stoi(getConvertedArgument(fn_name, pos)); @@ -149,48 +135,43 @@ bool Extract::convertImpl(String & out, IParser::Pos & pos) regex = "'" + tmp_regex + "'"; } - if (pos->type == TokenType::ClosingRoundBracket) + out = "extract(" + source + ", " + regex + ")"; + if (!type_literal.empty()) { - out = "extract(" + source + ", " + regex + ")"; - if (!type_literal.empty()) + std::unordered_map type_cast = + { {"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"dynamic", "Array"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"}, + {"decimal", "Decimal"} + }; + + Tokens token_type(type_literal.c_str(), type_literal.c_str() + type_literal.size()); + IParser::Pos pos_type(token_type, pos.max_depth); + ParserKeyword s_kql("typeof"); + Expected expected; + + if (s_kql.ignore(pos_type, expected)) { - std::unordered_map type_cast = - { {"bool", "Boolean"}, - {"boolean", "Boolean"}, - {"datetime", "DateTime"}, - {"date", "DateTime"}, - {"dynamic", "Array"}, - {"guid", "UUID"}, - {"int", "Int32"}, - {"long", "Int64"}, - {"real", "Float64"}, - {"double", "Float64"}, - {"string", "String"}, - {"decimal", "Decimal"} - }; - - Tokens token_type(type_literal.c_str(), type_literal.c_str() + type_literal.size()); - IParser::Pos pos_type(token_type, pos.max_depth); - ParserKeyword s_kql("typeof"); - Expected expected; - - if (s_kql.ignore(pos_type, expected)) - { - ++pos_type; - auto kql_type= String(pos_type->begin,pos_type->end); - if (type_cast.find(kql_type) == type_cast.end()) - return false; - auto ch_type = type_cast[kql_type]; - out = "CAST(" + out + ", '" + ch_type + "')"; - } - else + ++pos_type; + auto kql_type= String(pos_type->begin,pos_type->end); + if (type_cast.find(kql_type) == type_cast.end()) return false; + auto ch_type = type_cast[kql_type]; + out = "CAST(" + out + ", '" + ch_type + "')"; } - return true; + else + return false; } + return true; - pos = begin; - return false; } bool ExtractAll::convertImpl(String & out,IParser::Pos & pos) @@ -199,14 +180,9 @@ bool ExtractAll::convertImpl(String & out,IParser::Pos & pos) if (fn_name.empty()) return false; - auto begin = pos; - ++pos; const String regex = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; - ++pos; const String second_arg = getConvertedArgument(fn_name, pos); @@ -220,13 +196,8 @@ bool ExtractAll::convertImpl(String & out,IParser::Pos & pos) if (!third_arg.empty()) // currently the captureGroups not supported return false; - if (pos->type == TokenType::ClosingRoundBracket) - { - out = "extractAllGroups(" + second_arg + ", " + regex + ")"; - return true; - } - pos = begin; - return false; + out = "extractAllGroups(" + second_arg + ", " + regex + ")"; + return true; } bool ExtractJson::convertImpl(String & out,IParser::Pos & pos) @@ -247,16 +218,12 @@ bool IndexOf::convertImpl(String & out,IParser::Pos & pos) { int start_index = 0, length = -1, occurrence = 1; - String fn_name = getKQLFunctionName(pos); + const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; - auto begin = pos; - ++pos; const String source = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; ++pos; const String lookup = getConvertedArgument(fn_name, pos); @@ -275,7 +242,6 @@ bool IndexOf::convertImpl(String & out,IParser::Pos & pos) { ++pos; occurrence = stoi(getConvertedArgument(fn_name, pos)); - } } } @@ -294,7 +260,6 @@ bool IndexOf::convertImpl(String & out,IParser::Pos & pos) return true; } - pos = begin; return false; } @@ -339,16 +304,41 @@ bool ParseJson::convertImpl(String & out,IParser::Pos & pos) bool ParseURL::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String url = getConvertedArgument(fn_name, pos); + + const String scheme = std::format("concat('\"Scheme\":\"', protocol({0}),'\"')",url); + const String host = std::format("concat('\"Host\":\"', domain({0}),'\"')",url); + const String port = std::format("concat('\"Port\":\"', toString(port({0})),'\"')",url); + const String path = std::format("concat('\"Path\":\"', path({0}),'\"')",url); + const String username_pwd = std::format("netloc({0})",url); + const String query_string = std::format("queryString({0})",url); + const String fragment = std::format("concat('\"Fragment\":\"',fragment({0}),'\"')",url); + const String username = std::format("concat('\"Username\":\"', arrayElement(splitByChar(':',arrayElement(splitByChar('@',{0}) ,1)),1),'\"')", username_pwd); + const String password = std::format("concat('\"Password\":\"', arrayElement(splitByChar(':',arrayElement(splitByChar('@',{0}) ,1)),2),'\"')", username_pwd); + const String query_parameters = std::format("concat('\"Query Parameters\":', concat('{{\"', replace(replace({}, '=', '\":\"'),'&','\",\"') ,'\"}}'))", query_string); + + out = std::format("concat('{{',{},',',{},',',{},',',{},',',{},',',{},',',{},',',{},'}}')",scheme, host, port, path, username, password, query_parameters,fragment); + return true; } bool ParseURLQuery::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String query = getConvertedArgument(fn_name, pos); + + const String query_string = std::format("if (position({},'?') > 0, queryString({}), {})", query, query, query); + const String query_parameters = std::format("concat('\"Query Parameters\":', concat('{{\"', replace(replace({}, '=', '\":\"'),'&','\",\"') ,'\"}}'))", query_string); + out = std::format("concat('{{',{},'}}')",query_parameters); + return true; } bool ParseVersion::convertImpl(String & out,IParser::Pos & pos) @@ -378,35 +368,25 @@ bool Split::convertImpl(String & out,IParser::Pos & pos) if (fn_name.empty()) return false; - auto begin = pos; - ++pos; const String source = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; ++pos; const String delimiter = getConvertedArgument(fn_name, pos); - int requestedIndex = -1; + int requested_index = -1; if (pos->type == TokenType::Comma) { ++pos; - requestedIndex = std::stoi(getConvertedArgument(fn_name, pos)); + requested_index = std::stoi(getConvertedArgument(fn_name, pos)); } - if (pos->type == TokenType::ClosingRoundBracket) + out = "splitByString(" + delimiter + ", " + source + ")"; + if (requested_index >= 0) { - out = "splitByString(" + delimiter + ", " + source + ")"; - if (requestedIndex >= 0) - { - out = "arrayPushBack([],arrayElement(" + out + ", " + std::to_string(requestedIndex + 1) + "))"; - } - return true; + out = "arrayPushBack([],arrayElement(" + out + ", " + std::to_string(requested_index + 1) + "))"; } - - pos = begin; - return false; + return true; } bool StrCat::convertImpl(String & out,IParser::Pos & pos) @@ -420,12 +400,8 @@ bool StrCatDelim::convertImpl(String & out,IParser::Pos & pos) if (fn_name.empty()) return false; - auto begin = pos; - ++pos; const String delimiter = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; int arg_count = 0; String args; @@ -445,14 +421,8 @@ bool StrCatDelim::convertImpl(String & out,IParser::Pos & pos) if (arg_count < 2 || arg_count > 64) throw Exception("argument count out of bound in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); - if (pos->type == TokenType::ClosingRoundBracket) - { - out = std::move(args); - return true; - } - - pos = begin; - return false; + out = std::move(args); + return true; } bool StrCmp::convertImpl(String & out,IParser::Pos & pos) @@ -466,8 +436,6 @@ bool StrCmp::convertImpl(String & out,IParser::Pos & pos) ++pos; const String string2 = getConvertedArgument(fn_name, pos); - validateEndOfFunction(fn_name, pos); - out = std::format("multiIf({0} == {1}, 0, {0} < {1}, -1, 1)", string1, string2); return true; } @@ -479,43 +447,28 @@ bool StrLen::convertImpl(String & out,IParser::Pos & pos) bool StrRep::convertImpl(String & out,IParser::Pos & pos) { - String fn_name = getKQLFunctionName(pos); + const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; - auto begin = pos; - ++pos; - String value = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; + const String value = getConvertedArgument(fn_name, pos); ++pos; - String multiplier = getConvertedArgument(fn_name, pos); + const String multiplier = getConvertedArgument(fn_name, pos); - String delimiter; if (pos->type == TokenType::Comma) { ++pos; - delimiter = getConvertedArgument(fn_name, pos); - } - - if (pos->type == TokenType::ClosingRoundBracket) - { - if (!delimiter.empty()) - { - String repeated_str = "repeat(concat("+value+"," + delimiter + ")," + multiplier + ")"; - out = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; - } - else - out = "repeat("+ value + ", " + multiplier + ")"; - - return true; + const String delimiter = getConvertedArgument(fn_name, pos); + const String repeated_str = "repeat(concat("+value+"," + delimiter + ")," + multiplier + ")"; + out = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; } + else + out = "repeat("+ value + ", " + multiplier + ")"; - pos = begin; - return false; + return true; } bool SubString::convertImpl(String & out,IParser::Pos & pos) @@ -525,34 +478,22 @@ bool SubString::convertImpl(String & out,IParser::Pos & pos) if (fn_name.empty()) return false; - auto begin = pos; - ++pos; String source = getConvertedArgument(fn_name, pos); - - if (pos->type != TokenType::Comma) - return false; ++pos; String startingIndex = getConvertedArgument(fn_name, pos); - String length; if (pos->type == TokenType::Comma) { ++pos; - length = getConvertedArgument(fn_name, pos); + auto length = getConvertedArgument(fn_name, pos); + out = "substr("+ source + ", " + startingIndex + " + 1, " + length + ")"; } + else + out = "substr("+ source + "," + startingIndex + " + 1)"; - if (pos->type == TokenType::ClosingRoundBracket) - { - if (length.empty()) - out = "substr("+ source + "," + startingIndex +" + 1)"; - else - out = "substr("+ source + ", " + startingIndex +" + 1, " + length + ")"; - return true; - } - pos = begin; - return false; + return true; } bool ToLower::convertImpl(String & out,IParser::Pos & pos) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index fbab8e829d14..758997aa5a9c 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -30,15 +30,7 @@ String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) { - String tmp_arg = String(token_pos->begin, token_pos->end); - if (token_pos->type == TokenType::BareWord ) - { - String new_arg; - auto fun = KQLFunctionFactory::get(tmp_arg); - if (fun && fun->convert(new_arg,token_pos)) - tmp_arg = new_arg; - } - + auto tmp_arg = IParserKQLFunction::getExpression(token_pos); if (token_pos->type == TokenType::Comma ) new_expr = new_expr + logic_op; else @@ -140,14 +132,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) { - String tmp_arg = String(token_pos->begin,token_pos->end); - if (token_pos->type == TokenType::BareWord ) - { - String new_arg; - auto fun = KQLFunctionFactory::get(tmp_arg); - if (fun && fun->convert(new_arg,token_pos)) - tmp_arg = new_arg; - } + auto tmp_arg = IParserKQLFunction::getExpression(token_pos); new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; } else diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index d2c1e4943bf8..7f00a76fa726 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -36,7 +36,7 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); - String new_token; + if (token == "=") { ++pos; @@ -49,18 +49,7 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) } else if (!KQLOperators().convert(tokens,pos)) { - if (pos->type == TokenType::BareWord ) - { - kql_function = KQLFunctionFactory::get(token); - if (kql_function && kql_function->convert(new_token,pos)) - token = new_token; - /* else if (!kql_function) - { - if ((++pos)->type == TokenType::OpeningRoundBracket) - throw Exception("Unknown function " + token, ErrorCodes::UNKNOWN_FUNCTION); - --pos; - }*/ - } + token = IParserKQLFunction::getExpression(pos); tokens.push_back(token); } From 906bac9d69a5266cc2bf658f6255b586bb1d3883 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Tue, 26 Jul 2022 20:24:29 -0400 Subject: [PATCH 068/342] Added func tests for string and ip --- .../0_stateless/02366_kql_func_ip.reference | 28 ++ .../queries/0_stateless/02366_kql_func_ip.sql | 36 +++ .../02366_kql_func_string.reference | 262 ++++++++++++++++++ .../0_stateless/02366_kql_func_string.sql | 185 +++++++++++++ .../0_stateless/02366_kql_summarize.reference | 2 +- .../0_stateless/02366_kql_summarize.sql | 4 +- .../queries/0_stateless/02366_kql_tabular.sql | 86 ++---- 7 files changed, 536 insertions(+), 67 deletions(-) create mode 100644 tests/queries/0_stateless/02366_kql_func_ip.reference create mode 100644 tests/queries/0_stateless/02366_kql_func_ip.sql create mode 100644 tests/queries/0_stateless/02366_kql_func_string.reference create mode 100644 tests/queries/0_stateless/02366_kql_func_string.sql diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference new file mode 100644 index 000000000000..050096fe2d1b --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_ip.reference @@ -0,0 +1,28 @@ +-- ipv4_is_private(\'127.0.0.1\') +false +-- ipv4_is_private(\'10.1.2.3\') +true +-- ipv4_is_private(\'192.168.1.1/24\') +true +ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\')) +true +-- ipv4_is_private(\'abc\') +\N +-- ipv4_netmask_suffix(\'192.168.1.1/24\') +24 +-- ipv4_netmask_suffix(\'192.168.1.1\') +32 +-- ipv4_netmask_suffix(\'127.0.0.1/16\') +16 +-- ipv4_netmask_suffix(\'abc\') +\N +ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\')) +16 +-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\') +1 +-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\') +1 +-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\') +0 +ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\') +0 diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql new file mode 100644 index 000000000000..3c35e7f58cc2 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_ip.sql @@ -0,0 +1,36 @@ +set dialect='kusto'; +print '-- ipv4_is_private(\'127.0.0.1\')'; +print ipv4_is_private('127.0.0.1'); +print '-- ipv4_is_private(\'10.1.2.3\')'; +print ipv4_is_private('10.1.2.3'); +print '-- ipv4_is_private(\'192.168.1.1/24\')'; +print ipv4_is_private('192.168.1.1/24'); +print 'ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\'))'; +print ipv4_is_private(strcat('192.','168.','1.','1','/24')); +print '-- ipv4_is_private(\'abc\')'; +print ipv4_is_private('abc'); -- == null + +print '-- ipv4_netmask_suffix(\'192.168.1.1/24\')'; +print ipv4_netmask_suffix('192.168.1.1/24'); -- == 24 +print '-- ipv4_netmask_suffix(\'192.168.1.1\')'; +print ipv4_netmask_suffix('192.168.1.1'); -- == 32 +print '-- ipv4_netmask_suffix(\'127.0.0.1/16\')'; +print ipv4_netmask_suffix('127.0.0.1/16'); -- == 16 +print '-- ipv4_netmask_suffix(\'abc\')'; +print ipv4_netmask_suffix('abc'); -- == null +print 'ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\'))'; +print ipv4_netmask_suffix(strcat('127.', '0.', '0.1/16')); -- == 16 + +print '-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\')'; +print ipv4_is_in_range('127.0.0.1', '127.0.0.1'); -- == true +print '-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\')'; +print ipv4_is_in_range('192.168.1.6', '192.168.1.1/24'); -- == true +print '-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\')'; +print ipv4_is_in_range('192.168.1.1', '192.168.2.1/24'); -- == false +print 'ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\')'; +print ipv4_is_in_range(strcat('192.','168.', '1.1'), '192.168.2.1/24'); -- == false + +-- TODO: +-- print ipv4_is_in_range('abc', '127.0.0.1'); -- == null +-- parse_ipv4() +-- parse_ipv6() \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_string.reference b/tests/queries/0_stateless/02366_kql_func_string.reference new file mode 100644 index 000000000000..255acb486cd9 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_string.reference @@ -0,0 +1,262 @@ +-- test String Functions -- +-- Customers |where Education contains \'degree\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers |where Education !contains \'degree\' +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers |where Education contains \'Degree\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers |where Education !contains \'Degree\' +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where FirstName endswith \'RE\' +Theodore Diaz Skilled Manual Bachelors 28 + +-- Customers | where ! FirstName endswith \'RE\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +--Customers | where FirstName endswith_cs \'re\' +Theodore Diaz Skilled Manual Bachelors 28 + +-- Customers | where FirstName !endswith_cs \'re\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation == \'Skilled Manual\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation != \'Skilled Manual\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation has \'skilled\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation !has \'skilled\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation has \'Skilled\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation !has \'Skilled\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation hasprefix_cs \'Ab\' + +-- Customers | where Occupation !hasprefix_cs \'Ab\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hasprefix_cs \'ab\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation !hasprefix_cs \'ab\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hassuffix \'Ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation !hassuffix \'Ent\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hassuffix \'ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation hassuffix \'ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers |where Education in (\'Bachelors\',\'High School\') +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Education !in (\'Bachelors\',\'High School\') +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName matches regex \'P.*r\' +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName startswith \'pet\' +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName !startswith \'pet\' +Latoya Shen Professional Graduate Degree 25 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where FirstName startswith_cs \'pet\' + +-- Customers | where FirstName !startswith_cs \'pet\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | project base64_encode_tostring(\'Kusto1\') | take 1 +S3VzdG8x + +-- Customers | project base64_decode_tostring(\'S3VzdG8x\') | take 1 +Kusto1 + +-- Customers | where isempty(LastName) +Apple Skilled Manual Bachelors 28 + +-- Customers | where isnotempty(LastName) +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +\N why Professional Partial College 38 + +-- Customers | where isnotnull(FirstName) +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where isnull(FirstName) +\N why Professional Partial College 38 + +-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1 +https://www.test.com/hello word + +-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1 +https%3A%2F%2Fwww.test.com%2Fhello%20word + +-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2)) +\N +Lat en +Pet ra +The az +Ste x +App + +-- Customers | project name = strcat(FirstName, \' \', LastName) +\N +Latoya Shen +Peter Nara +Theodore Diaz +Stephanie Cox +Apple + +-- Customers | project FirstName, strlen(FirstName) +\N \N +Latoya 6 +Peter 5 +Theodore 8 +Stephanie 9 +Apple 5 + +-- Customers | project strrep(FirstName,2,\'_\') +\N +Latoya_Latoya +Peter_Peter +Theodore_Theodore +Stephanie_Stephanie +Apple_Apple + +-- Customers | project toupper(FirstName) +\N +LATOYA +PETER +THEODORE +STEPHANIE +APPLE + +-- Customers | project tolower(FirstName) +\N +latoya +peter +theodore +stephanie +apple + +-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Apple Skilled Manual Bachelors 28 + +-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) +3 +3 +1 + +-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) +PINEAPPLE ice cream is 20 +PINEAPPLE +20 + +20 + +-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet +[['T','h','e'],['p','ric','e'],['P','INEAPPL','E'],['i','c','e'],['c','rea','m']] + +-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) +['aa','bb'] +['bbb'] +[''] +['a','','b'] +['aa','cc'] + +-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now. +1-2-Ab + +-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction); TODO: length and occurrence not supported yet +2 +2 +-1 diff --git a/tests/queries/0_stateless/02366_kql_func_string.sql b/tests/queries/0_stateless/02366_kql_func_string.sql new file mode 100644 index 000000000000..cdf9b1e4b171 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_string.sql @@ -0,0 +1,185 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect='kusto'; +print '-- test String Functions --'; + +print '-- Customers |where Education contains \'degree\''; +Customers |where Education contains 'degree' | order by LastName; +print ''; +print '-- Customers |where Education !contains \'degree\''; +Customers |where Education !contains 'degree' | order by LastName; +print ''; +print '-- Customers |where Education contains \'Degree\''; +Customers |where Education contains 'Degree' | order by LastName; +print ''; +print '-- Customers |where Education !contains \'Degree\''; +Customers |where Education !contains 'Degree' | order by LastName; +print ''; +print '-- Customers | where FirstName endswith \'RE\''; +Customers | where FirstName endswith 'RE' | order by LastName; +print ''; +print '-- Customers | where ! FirstName endswith \'RE\''; +Customers | where FirstName ! endswith 'RE' | order by LastName; +print ''; +print '--Customers | where FirstName endswith_cs \'re\''; +Customers | where FirstName endswith_cs 're' | order by LastName; +print ''; +print '-- Customers | where FirstName !endswith_cs \'re\''; +Customers | where FirstName !endswith_cs 're' | order by LastName; +print ''; +print '-- Customers | where Occupation == \'Skilled Manual\''; +Customers | where Occupation == 'Skilled Manual' | order by LastName; +print ''; +print '-- Customers | where Occupation != \'Skilled Manual\''; +Customers | where Occupation != 'Skilled Manual' | order by LastName; +print ''; +print '-- Customers | where Occupation has \'skilled\''; +Customers | where Occupation has 'skilled' | order by LastName; +print ''; +print '-- Customers | where Occupation !has \'skilled\''; +Customers | where Occupation !has 'skilled' | order by LastName; +print ''; +print '-- Customers | where Occupation has \'Skilled\''; +Customers | where Occupation has 'Skilled'| order by LastName; +print ''; +print '-- Customers | where Occupation !has \'Skilled\''; +Customers | where Occupation !has 'Skilled'| order by LastName; +print ''; +print '-- Customers | where Occupation hasprefix_cs \'Ab\''; +Customers | where Occupation hasprefix_cs 'Ab'| order by LastName; +print ''; +print '-- Customers | where Occupation !hasprefix_cs \'Ab\''; +Customers | where Occupation !hasprefix_cs 'Ab'| order by LastName; +print ''; +print '-- Customers | where Occupation hasprefix_cs \'ab\''; +Customers | where Occupation hasprefix_cs 'ab'| order by LastName; +print ''; +print '-- Customers | where Occupation !hasprefix_cs \'ab\''; +Customers | where Occupation !hasprefix_cs 'ab'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'Ent\''; +Customers | where Occupation hassuffix 'Ent'| order by LastName; +print ''; +print '-- Customers | where Occupation !hassuffix \'Ent\''; +Customers | where Occupation !hassuffix 'Ent'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'ent\''; +Customers | where Occupation hassuffix 'ent'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'ent\''; +Customers | where Occupation hassuffix 'ent'| order by LastName; +print ''; +print '-- Customers |where Education in (\'Bachelors\',\'High School\')'; +Customers |where Education in ('Bachelors','High School')| order by LastName; +print ''; +print '-- Customers | where Education !in (\'Bachelors\',\'High School\')'; +Customers | where Education !in ('Bachelors','High School')| order by LastName; +print ''; +print '-- Customers | where FirstName matches regex \'P.*r\''; +Customers | where FirstName matches regex 'P.*r'| order by LastName; +print ''; +print '-- Customers | where FirstName startswith \'pet\''; +Customers | where FirstName startswith 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName !startswith \'pet\''; +Customers | where FirstName !startswith 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName startswith_cs \'pet\''; +Customers | where FirstName startswith_cs 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName !startswith_cs \'pet\''; +Customers | where FirstName !startswith_cs 'pet'| order by LastName; +print ''; +print '-- Customers | project base64_encode_tostring(\'Kusto1\') | take 1'; +Customers | project base64_encode_tostring('Kusto1') | take 1; +print ''; +print '-- Customers | project base64_decode_tostring(\'S3VzdG8x\') | take 1'; +Customers | project base64_decode_tostring('S3VzdG8x') | take 1; +print ''; +print '-- Customers | where isempty(LastName)'; +Customers | where isempty(LastName); +print ''; +print '-- Customers | where isnotempty(LastName)'; +Customers | where isnotempty(LastName); +print ''; +print '-- Customers | where isnotnull(FirstName)'; +Customers | where isnotnull(FirstName)| order by LastName; +print ''; +print '-- Customers | where isnull(FirstName)'; +Customers | where isnull(FirstName)| order by LastName; +print ''; +print '-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1'; +Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1; +print ''; +print '-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1'; +Customers | project url_encode('https://www.test.com/hello word') | take 1; +print ''; +print '-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2))'; +Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))| order by LastName; +print ''; +print '-- Customers | project name = strcat(FirstName, \' \', LastName)'; +Customers | project name = strcat(FirstName, ' ', LastName)| order by LastName; +print ''; +print '-- Customers | project FirstName, strlen(FirstName)'; +Customers | project FirstName, strlen(FirstName)| order by LastName; +print ''; +print '-- Customers | project strrep(FirstName,2,\'_\')'; +Customers | project strrep(FirstName,2,'_')| order by LastName; +print ''; +print '-- Customers | project toupper(FirstName)'; +Customers | project toupper(FirstName)| order by LastName; +print ''; +print '-- Customers | project tolower(FirstName)'; +Customers | project tolower(FirstName)| order by LastName; +print ''; +print '-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet'; +Customers | where Age in ((Customers|project Age|where Age < 30)) | order by LastName; +-- Customer | where LastName in~ ("diaz", "cox") +print ''; +print '-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet'; +Customers | where Occupation has_all ('manual', 'skilled') | order by LastName; +print ''; +print '-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet'; +Customers|where Occupation has_any ('Skilled','abcd'); +print ''; +print '-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction)'; +Customers | project countof('The cat sat on the mat', 'at') | take 1; +Customers | project countof('The cat sat on the mat', 'at', 'normal') | take 1; +Customers | project countof('The cat sat on the mat', '\\s.he', 'regex') | take 1; +print ''; +print '-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction)'; +Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20') | take 1; +Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20') | take 1; +Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20') | take 1; +Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20') | take 1; +Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real)) | take 1; +print ''; +print '-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet'; +Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20') | take 1; +print ''; +print '-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction)'; +Customers | project split('aa_bb', '_') | take 1; +Customers | project split('aaa_bbb_ccc', '_', 1) | take 1; +Customers | project split('', '_') | take 1; +Customers | project split('a__b', '_') | take 1; +Customers | project split('aabbcc', 'bb') | take 1; +print ''; +print '-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now.'; +Customers | project strcat_delim('-', '1', '2', strcat('A','b')) | take 1; +-- Customers | project strcat_delim('-', '1', '2', 'A' , 1s); +print ''; +print '-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction); TODO: length and occurrence not supported yet'; +Customers | project indexof('abcdefg','cde') | take 1; +Customers | project indexof('abcdefg','cde',2) | take 1; +Customers | project indexof('abcdefg','cde',6) | take 1; + diff --git a/tests/queries/0_stateless/02366_kql_summarize.reference b/tests/queries/0_stateless/02366_kql_summarize.reference index d73f75b03c2d..ef5ff544f63a 100644 --- a/tests/queries/0_stateless/02366_kql_summarize.reference +++ b/tests/queries/0_stateless/02366_kql_summarize.reference @@ -20,6 +20,6 @@ Professional 117 Management abcd defg 33 4 2 -40 2 20 6 30 4 +40 2 diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql index 8eba49f92f05..88d7641f3b95 100644 --- a/tests/queries/0_stateless/02366_kql_summarize.sql +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -30,9 +30,9 @@ Customers | summarize MyMax = maxif(Age, Age<40) by Occupation; Customers | summarize MyMin = minif(Age, Age<40) by Occupation; Customers | summarize MyAvg = avgif(Age, Age<40) by Occupation; Customers | summarize MySum = sumif(Age, Age<40) by Occupation; -Customers | summarize dcount(Education, Occupation=='Professional'); +Customers | summarize dcount(Education); Customers | summarize dcountif(Education, Occupation=='Professional'); -Customers | summarize count() by bin(Age, 10) +Customers | summarize count() by bin(Age, 10) | order by count() ASC; -- The following does not work -- arg_max() diff --git a/tests/queries/0_stateless/02366_kql_tabular.sql b/tests/queries/0_stateless/02366_kql_tabular.sql index 6a0a3417f421..e7f715eaedb1 100644 --- a/tests/queries/0_stateless/02366_kql_tabular.sql +++ b/tests/queries/0_stateless/02366_kql_tabular.sql @@ -10,115 +10,73 @@ CREATE TABLE Customers INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); -set dialect='clickhouse'; -Select '-- test Query only has table name: --'; set dialect='kusto'; +print '-- test Query only has table name: --'; Customers; -set dialect='clickhouse'; -Select '-- Query has Column Selection --'; -set dialect='kusto'; +print '-- Query has Column Selection --'; Customers | project FirstName,LastName,Occupation; -set dialect='clickhouse'; -Select '-- Query has limit --'; -set dialect='kusto'; +print '-- Query has limit --'; Customers | project FirstName,LastName,Occupation | take 5; Customers | project FirstName,LastName,Occupation | limit 5; -set dialect='clickhouse'; -Select '-- Query has second limit with bigger value --'; -set dialect='kusto'; +print '-- Query has second limit with bigger value --'; Customers | project FirstName,LastName,Occupation | take 5 | take 7; -set dialect='clickhouse'; -Select '-- Query has second limit with smaller value --'; -set dialect='kusto'; +print '-- Query has second limit with smaller value --'; Customers | project FirstName,LastName,Occupation | take 5 | take 3; -set dialect='clickhouse'; -Select '-- Query has second Column selection --'; -set dialect='kusto'; +print '-- Query has second Column selection --'; Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName; -set dialect='clickhouse'; -Select '-- Query has second Column selection with extra column --'; -set dialect='kusto'; +print '-- Query has second Column selection with extra column --'; Customers| project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education; -set dialect='clickhouse'; -Select '-- Query with desc sort --'; -set dialect='kusto'; +print '-- Query with desc sort --'; Customers | project FirstName | take 5 | sort by FirstName desc; Customers | project Occupation | take 5 | order by Occupation desc; -set dialect='clickhouse'; -Select '-- Query with asc sort --'; -set dialect='kusto'; +print '-- Query with asc sort --'; Customers | project Occupation | take 5 | sort by Occupation asc; -set dialect='clickhouse'; -Select '-- Query with sort (without keyword asc desc) --'; -set dialect='kusto'; +print '-- Query with sort (without keyword asc desc) --'; Customers | project FirstName | take 5 | sort by FirstName; Customers | project Occupation | take 5 | order by Occupation; -set dialect='clickhouse'; -Select '-- Query with sort 2 Columns with different direction --'; -set dialect='kusto'; +print '-- Query with sort 2 Columns with different direction --'; Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc, LastName desc; -set dialect='clickhouse'; -Select '-- Query with second sort --'; -set dialect='kusto'; +print '-- Query with second sort --'; Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation desc |sort by Occupation asc, LastName desc; -set dialect='clickhouse'; -Select '-- Test String Equals (==) --'; -set dialect='kusto'; +print '-- Test String Equals (==) --'; Customers | project FirstName,LastName,Occupation | where Occupation == 'Skilled Manual'; -set dialect='clickhouse'; -Select '-- Test String Not equals (!=) --'; -set dialect='kusto'; +print '-- Test String Not equals (!=) --'; Customers | project FirstName,LastName,Occupation | where Occupation != 'Skilled Manual'; -set dialect='clickhouse'; -Select '-- Test Filter using a list (in) --'; -set dialect='kusto'; +print '-- Test Filter using a list (in) --'; Customers | project FirstName,LastName,Occupation,Education | where Education in ('Bachelors','High School'); -set dialect='clickhouse'; -Select '-- Test Filter using a list (!in) --'; +print '-- Test Filter using a list (!in) --'; set dialect='kusto'; Customers | project FirstName,LastName,Occupation,Education | where Education !in ('Bachelors','High School'); -set dialect='clickhouse'; -Select '-- Test Filter using common string operations (contains_cs) --'; -set dialect='kusto'; +print '-- Test Filter using common string operations (contains_cs) --'; Customers | project FirstName,LastName,Occupation,Education | where Education contains_cs 'Coll'; -set dialect='clickhouse'; -Select '-- Test Filter using common string operations (startswith_cs) --'; -set dialect='kusto'; +print '-- Test Filter using common string operations (startswith_cs) --'; Customers | project FirstName,LastName,Occupation,Education | where Occupation startswith_cs 'Prof'; -set dialect='clickhouse'; -Select '-- Test Filter using common string operations (endswith_cs) --'; -set dialect='kusto'; +print '-- Test Filter using common string operations (endswith_cs) --'; Customers | project FirstName,LastName,Occupation,Education | where FirstName endswith_cs 'a'; -set dialect='clickhouse'; -Select '-- Test Filter using numerical equal (==) --'; -set dialect='kusto'; +print '-- Test Filter using numerical equal (==) --'; Customers | project FirstName,LastName,Occupation,Education,Age | where Age == 26; -set dialect='clickhouse'; -Select '-- Test Filter using numerical great and less (> , <) --'; -set dialect='kusto'; +print '-- Test Filter using numerical great and less (> , <) --'; Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 and Age < 40; -set dialect='clickhouse'; -Select '-- Test Filter using multi where --'; -set dialect='kusto'; +print '-- Test Filter using multi where --'; Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 | where Occupation == 'Professional'; From 47a01efae9371b47482a9e3fa7bfc10da163432f Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Wed, 27 Jul 2022 12:44:08 -0700 Subject: [PATCH 069/342] Improve conformance to the specifications --- .../KustoFunctions/IParserKQLFunction.cpp | 2 +- .../Kusto/KustoFunctions/IParserKQLFunction.h | 4 +- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 94 ++++++++++++++----- 3 files changed, 75 insertions(+), 25 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 825c5eb5f925..56dc9e1b114b 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -76,7 +76,7 @@ bool IParserKQLFunction::directMapping(String & out,IParser::Pos & pos,const Str return false; } -String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser::Pos & pos) +String getConvertedArgument(const String & fn_name, IParser::Pos & pos) { String converted_arg; std::vector tokens; diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 7ed3841583b9..492d721f7ead 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -2,6 +2,7 @@ #include #include + namespace DB { class IParserKQLFunction @@ -35,10 +36,9 @@ class IParserKQLFunction protected: virtual bool convertImpl(String & out,IParser::Pos & pos) = 0; static bool directMapping(String &out,IParser::Pos & pos,const String & ch_fn); - static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos); static String getKQLFunctionName(IParser::Pos & pos); }; +String getConvertedArgument(const String & fn_name, IParser::Pos & pos); } - diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 0383292669d8..ddbd25d1fd47 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -22,9 +22,41 @@ namespace DB::ErrorCodes extern const int SYNTAX_ERROR; } -namespace DB +namespace +{ +std::optional getOptionalArgument(const String & function_name, DB::IParser::Pos & pos) +{ + std::optional argument; + if (const auto & type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket) + return {}; + + ++pos; + return getConvertedArgument(function_name, pos); +} + +String getArgument(const String & function_name, DB::IParser::Pos & pos) +{ + return getOptionalArgument(function_name, pos).value(); +} + +String kqlCallToExpression( + const String & function_name, std::initializer_list> params, const uint32_t max_depth) { + const auto params_str = std::accumulate( + std::cbegin(params), + std::cend(params), + String(), + [](auto acc, const auto & param) { return (acc.empty() ? "" : ", ") + std::move(acc) + param.get(); }); + + const auto kql_call = std::format("{}({})", function_name, params_str); + DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); + DB::IParser::Pos tokens_pos(call_tokens, max_depth); + return DB::IParserKQLFunction::getExpression(tokens_pos); +} +} +namespace DB +{ bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) { String res = String(pos->begin, pos->end); @@ -38,13 +70,15 @@ bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) if (function_name.empty()) return false; - ++pos; - - const auto ip_address = getConvertedArgument(function_name, pos); - ++pos; - - const auto ip_range = getConvertedArgument(function_name, pos); - out = std::format("isIPAddressInRange({0}, concat({1}, if(position({1}, '/') > 0, '', '/32')))", ip_address, ip_range); + const auto ip_address = getArgument(function_name, pos); + const auto ip_range = getArgument(function_name, pos); + out = std::format( + "if(isNull(IPv4StringToNumOrNull({0}) as ip) or isNull({2} as calculated_mask) or " + "isNull(toIPv4OrNull(tokens[1]) as range_prefix_ip), null, isIPAddressInRange(IPv4NumToString(assumeNotNull(ip)), " + "concat(IPv4NumToString(assumeNotNull(range_prefix_ip)), '/', toString(assumeNotNull(calculated_mask)))))", + ip_address, + ip_range, + kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth)); return true; } @@ -57,26 +91,24 @@ bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) { - static const std::array PRIVATE_SUBNETS{"10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"}; + static const std::array s_private_subnets{"10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"}; const auto function_name = getKQLFunctionName(pos); if (function_name.empty()) return false; - ++pos; - - const auto ip_address = getConvertedArgument(function_name, pos); + const auto ip_address = getArgument(function_name, pos); out += std::format( "multiIf(length(splitByChar('/', {0}) as tokens) > 2 or isNull(toIPv4OrNull(tokens[1]) as nullable_ip), null, " - "length(tokens) = 2 and isNull(toUInt8OrNull(tokens[-1]) as suffix), null, " + "length(tokens) = 2 and isNull(toUInt8OrNull(tokens[-1]) as mask), null, " "ignore(assumeNotNull(nullable_ip) as ip, " - "IPv4CIDRToRange(ip, assumeNotNull(suffix)) as range, IPv4NumToString(tupleElement(range, 1)) as begin, " + "IPv4CIDRToRange(ip, assumeNotNull(mask)) as range, IPv4NumToString(tupleElement(range, 1)) as begin, " "IPv4NumToString(tupleElement(range, 2)) as end), null, ", ip_address); - for (int i = 0; i < std::ssize(PRIVATE_SUBNETS); ++i) + for (int i = 0; i < std::ssize(s_private_subnets); ++i) { - const auto & subnet = PRIVATE_SUBNETS[i]; + const auto & subnet = s_private_subnets[i]; out += std::format( "length(tokens) = 1 and isIPAddressInRange(IPv4NumToString(ip), '{0}') or " "isIPAddressInRange(begin, '{0}') and isIPAddressInRange(end, '{0}'), true, ", @@ -93,19 +125,27 @@ bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) if (function_name.empty()) return false; - ++pos; - - const auto ip_range = getConvertedArgument(function_name, pos); + const auto ip_range = getArgument(function_name, pos); out = std::format( "multiIf(length(splitByChar('/', {0}) as tokens) > 2 or not isIPv4String(tokens[1]), null, " - "length(tokens) = 1, 32, isNull(toUInt8OrNull(tokens[-1]) as suffix), null, toUInt8(min2(suffix, 32)))", + "length(tokens) = 1, 32, isNull(toUInt8OrNull(tokens[-1]) as mask), null, toUInt8(min2(mask, 32)))", ip_range); return true; } bool ParseIpv4::convertImpl(String & out, IParser::Pos & pos) { - return directMapping(out, pos, "toIPv4OrNull"); + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + out = std::format( + "multiIf(length(splitByChar('/', {0}) as tokens) = 1, IPv4StringToNumOrNull(tokens[1]) as ip, " + "length(tokens) = 2 and isNotNull(ip) and isNotNull(toUInt8OrNull(tokens[-1]) as mask), " + "tupleElement(IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)), 1), null)", + ip_address); + return true; } bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) @@ -131,7 +171,17 @@ bool Ipv6IsMatch::convertImpl(String & out, IParser::Pos & pos) bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) { - return directMapping(out, pos, "toIPv6OrNull"); + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + out = std::format( + "if(isNull(ifNull(if(isNull({1} as ipv4), null, IPv4ToIPv6(ipv4)), IPv6StringToNumOrNull({0})) as ipv6), null, " + "arrayStringConcat(flatten(extractAllGroups(lower(hex(assumeNotNull(ipv6))), '([\\da-f]{{4}})')), ':'))", + ip_address, + kqlCallToExpression("parse_ipv4", {ip_address}, pos.max_depth)); + return true; } bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) From f5c3cda793c447c820fe250675024b5d5bae71d7 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Thu, 28 Jul 2022 07:24:45 -0700 Subject: [PATCH 070/342] Extract common functions --- .../KustoFunctions/IParserKQLFunction.cpp | 37 ++++++++++++++++-- .../Kusto/KustoFunctions/IParserKQLFunction.h | 23 ++++++++--- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 38 ------------------- 3 files changed, 50 insertions(+), 48 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 56dc9e1b114b..a1dc9132b1b9 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -17,16 +17,16 @@ #include #include +#include + namespace DB { - namespace ErrorCodes { extern const int SYNTAX_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } - bool IParserKQLFunction::convert(String & out,IParser::Pos & pos) { return wrapConvertImpl(pos, IncreaseDepthTag{}, [&] @@ -76,7 +76,12 @@ bool IParserKQLFunction::directMapping(String & out,IParser::Pos & pos,const Str return false; } -String getConvertedArgument(const String & fn_name, IParser::Pos & pos) +String IParserKQLFunction::getArgument(const String & function_name, DB::IParser::Pos & pos) +{ + return getOptionalArgument(function_name, pos).value(); +} + +String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser::Pos & pos) { String converted_arg; std::vector tokens; @@ -115,6 +120,16 @@ String getConvertedArgument(const String & fn_name, IParser::Pos & pos) return converted_arg; } +std::optional IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParser::Pos & pos) +{ + std::optional argument; + if (const auto & type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket) + return {}; + + ++pos; + return getConvertedArgument(function_name, pos); +} + String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) { String fn_name = String(pos->begin, pos->end); @@ -127,6 +142,21 @@ String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) return fn_name; } +String IParserKQLFunction::kqlCallToExpression( + const String & function_name, std::initializer_list> params, const uint32_t max_depth) +{ + const auto params_str = std::accumulate( + std::cbegin(params), + std::cend(params), + String(), + [](auto acc, const auto & param) { return (acc.empty() ? "" : ", ") + std::move(acc) + param.get(); }); + + const auto kql_call = std::format("{}({})", function_name, params_str); + DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); + DB::IParser::Pos tokens_pos(call_tokens, max_depth); + return DB::IParserKQLFunction::getExpression(tokens_pos); +} + void IParserKQLFunction::validateEndOfFunction(const String & fn_name, IParser::Pos & pos) { if (pos->type != TokenType:: ClosingRoundBracket) @@ -148,5 +178,4 @@ String IParserKQLFunction::getExpression(IParser::Pos & pos) } return arg; } - } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 492d721f7ead..3613cb71facb 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -17,7 +17,11 @@ class IParserKQLFunction pos = begin; return res; } - struct IncreaseDepthTag {}; + + struct IncreaseDepthTag + { + }; + template ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, IncreaseDepthTag, const F & func) { @@ -29,16 +33,23 @@ class IParserKQLFunction pos = begin; return res; } - bool convert(String & out,IParser::Pos & pos); + + bool convert(String & out, IParser::Pos & pos); virtual const char * getName() const = 0; virtual ~IParserKQLFunction() = default; + static String getExpression(IParser::Pos & pos); + protected: - virtual bool convertImpl(String & out,IParser::Pos & pos) = 0; - static bool directMapping(String &out,IParser::Pos & pos,const String & ch_fn); + virtual bool convertImpl(String & out, IParser::Pos & pos) = 0; + + static bool directMapping(String & out, IParser::Pos & pos, const String & ch_fn); + static String getArgument(const String & function_name, DB::IParser::Pos & pos); + static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); + static std::optional getOptionalArgument(const String & function_name, DB::IParser::Pos & pos); + static String kqlCallToExpression( + const String & function_name, std::initializer_list> params, uint32_t max_depth); static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos); static String getKQLFunctionName(IParser::Pos & pos); }; - -String getConvertedArgument(const String & fn_name, IParser::Pos & pos); } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index ddbd25d1fd47..63a4ade7ca91 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -17,44 +17,6 @@ #include -namespace DB::ErrorCodes -{ -extern const int SYNTAX_ERROR; -} - -namespace -{ -std::optional getOptionalArgument(const String & function_name, DB::IParser::Pos & pos) -{ - std::optional argument; - if (const auto & type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket) - return {}; - - ++pos; - return getConvertedArgument(function_name, pos); -} - -String getArgument(const String & function_name, DB::IParser::Pos & pos) -{ - return getOptionalArgument(function_name, pos).value(); -} - -String kqlCallToExpression( - const String & function_name, std::initializer_list> params, const uint32_t max_depth) -{ - const auto params_str = std::accumulate( - std::cbegin(params), - std::cend(params), - String(), - [](auto acc, const auto & param) { return (acc.empty() ? "" : ", ") + std::move(acc) + param.get(); }); - - const auto kql_call = std::format("{}({})", function_name, params_str); - DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); - DB::IParser::Pos tokens_pos(call_tokens, max_depth); - return DB::IParserKQLFunction::getExpression(tokens_pos); -} -} - namespace DB { bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) From 6f19d7890b09532a48b0ee3a0d4b450a1dbc04a7 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Wed, 3 Aug 2022 14:06:02 -0700 Subject: [PATCH 071/342] Fix bug in clickhouse-client for non-interactive mode --- src/Client/ClientBase.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 022e3c221346..47b1bf10f8b5 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -341,9 +341,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu res = tryParseQuery(kql_parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); else if (dialect == Dialect::kusto_auto) { - res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); - if (!res) { pos = begin; @@ -365,13 +363,15 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu res = parseQueryAndMovePosition(kql_parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); else if (dialect == Dialect::kusto_auto) { - res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); - - if (!res) - { - pos = begin; - res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); - } + try + { + res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + } + catch(...) + { + pos = begin; + res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + } } else res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); From 96458fe6ede10e9441b05d694f7a3620806e594a Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Wed, 3 Aug 2022 16:49:36 -0700 Subject: [PATCH 072/342] Fix some IP function unit tests --- src/Parsers/tests/gtest_Parser.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 38f017021762..0b2292cce84c 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -478,25 +478,25 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | where Age in ((Customers|project Age|where Age < 30))", "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" }, - { +{ "Customers | project ipv4_is_in_range(FirstName, LastName)", - "SELECT isIPAddressInRange(FirstName, concat(LastName, if(position(LastName, '/') > 0, '', '/32')))\nFROM Customers" + "SELECT if(((IPv4StringToNumOrNull(FirstName) AS ip) IS NULL) OR ((multiIf((length(splitByChar('/', LastName) AS tokens) > 2) OR (NOT isIPv4String(tokens[1])), NULL, length(tokens) = 1, 32, (toUInt8OrNull(tokens[-1]) AS mask) IS NULL, NULL, toUInt8(min2(mask, 32))) AS calculated_mask) IS NULL) OR ((toIPv4OrNull(tokens[1]) AS range_prefix_ip) IS NULL), NULL, isIPAddressInRange(IPv4NumToString(assumeNotNull(ip)), concat(IPv4NumToString(assumeNotNull(range_prefix_ip)), '/', toString(assumeNotNull(calculated_mask)))))\nFROM Customers" }, { "Customers | project ipv4_is_private(Occupation)", - "SELECT (((length(splitByChar('/', Occupation) AS tokens) = 1) AND isIPAddressInRange(tokens[1] AS ip, '10.0.0.0/8')) OR ((length(tokens) = 2) AND isIPAddressInRange(IPv4NumToString((IPv4CIDRToRange(toIPv4(ip), if((toUInt8OrNull(tokens[-1]) AS suffix) IS NULL, throwIf(true, 'Unable to parse suffix'), assumeNotNull(suffix))) AS range).1) AS begin, '10.0.0.0/8') AND isIPAddressInRange(IPv4NumToString(range.2) AS end, '10.0.0.0/8'))) OR (((length(tokens) = 1) AND isIPAddressInRange(ip, '172.16.0.0/12')) OR ((length(tokens) = 2) AND isIPAddressInRange(begin, '172.16.0.0/12') AND isIPAddressInRange(end, '172.16.0.0/12'))) OR (((length(tokens) = 1) AND isIPAddressInRange(ip, '192.168.0.0/16')) OR ((length(tokens) = 2) AND isIPAddressInRange(begin, '192.168.0.0/16') AND isIPAddressInRange(end, '192.168.0.0/16')))\nFROM Customers" + "SELECT multiIf((length(splitByChar('/', Occupation) AS tokens) > 2) OR ((toIPv4OrNull(tokens[1]) AS nullable_ip) IS NULL), NULL, (length(tokens) = 2) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NULL), NULL, ignore(assumeNotNull(nullable_ip) AS ip, IPv4CIDRToRange(ip, assumeNotNull(mask)) AS range, IPv4NumToString(range.1) AS begin, IPv4NumToString(range.2) AS end), NULL, ((length(tokens) = 1) AND isIPAddressInRange(IPv4NumToString(ip), '10.0.0.0/8')) OR (isIPAddressInRange(begin, '10.0.0.0/8') AND isIPAddressInRange(end, '10.0.0.0/8')), true, ((length(tokens) = 1) AND isIPAddressInRange(IPv4NumToString(ip), '172.16.0.0/12')) OR (isIPAddressInRange(begin, '172.16.0.0/12') AND isIPAddressInRange(end, '172.16.0.0/12')), true, ((length(tokens) = 1) AND isIPAddressInRange(IPv4NumToString(ip), '192.168.0.0/16')) OR (isIPAddressInRange(begin, '192.168.0.0/16') AND isIPAddressInRange(end, '192.168.0.0/16')), true, false)\nFROM Customers" }, { "Customers | project ipv4_netmask_suffix(Occupation)", - "SELECT if((length(splitByChar('/', Occupation) AS tokens) <= 2) AND isIPv4String(tokens[1]), if(length(tokens) != 2, 32, if(((toInt8OrNull(tokens[-1]) AS suffix) >= 1) AND (suffix <= 32), suffix, throwIf(true, 'Suffix must be between 1 and 32'))), throwIf(true, 'Unable to recognize and IP address with or without a suffix'))\nFROM Customers" + "SELECT multiIf((length(splitByChar('/', Occupation) AS tokens) > 2) OR (NOT isIPv4String(tokens[1])), NULL, length(tokens) = 1, 32, (toUInt8OrNull(tokens[-1]) AS mask) IS NULL, NULL, toUInt8(min2(mask, 32)))\nFROM Customers" }, { "Customers | project parse_ipv4(FirstName)", - "SELECT toIPv4OrNull(FirstName)\nFROM Customers" + "SELECT multiIf(length(splitByChar('/', FirstName) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)\nFROM Customers" }, { "Customers | project parse_ipv6(LastName)", - "SELECT toIPv6OrNull(LastName)\nFROM Customers" + "SELECT if((ifNull(if((multiIf(length(splitByChar('/', LastName) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL) AS ipv4) IS NULL, NULL, IPv4ToIPv6(ipv4)), IPv6StringToNumOrNull(LastName)) AS ipv6) IS NULL, NULL, arrayStringConcat(flatten(extractAllGroups(lower(hex(assumeNotNull(ipv6))), '([\\\\da-f]{4})')), ':'))\nFROM Customers" }, { "Customers|where Occupation has_any ('Skilled','abcd')", From 6e7ba2b7a0b7f7be151248b43fa5aad0344809fa Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Thu, 4 Aug 2022 20:55:33 -0400 Subject: [PATCH 073/342] Fix rebase conflicts. --- src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 038850f981de..9796ae10c07c 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -2,7 +2,7 @@ #include #include - +#include namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index f1fc13d2c488..fadf5305e897 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,7 +3,7 @@ #include #include #include - +#include namespace DB { From 21572e8a0f42456532c222d6dc3b0a2f396f4cc4 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Thu, 4 Aug 2022 22:33:08 -0400 Subject: [PATCH 074/342] Add make list set and more ip tests --- .../0_stateless/02366_kql_func_ip.reference | 10 ++++- .../queries/0_stateless/02366_kql_func_ip.sql | 18 ++++++-- .../0_stateless/02366_kql_summarize.reference | 24 ++++++++++ .../0_stateless/02366_kql_summarize.sql | 44 +++++++++++++------ 4 files changed, 78 insertions(+), 18 deletions(-) diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference index 050096fe2d1b..b6074a33b55b 100644 --- a/tests/queries/0_stateless/02366_kql_func_ip.reference +++ b/tests/queries/0_stateless/02366_kql_func_ip.reference @@ -24,5 +24,13 @@ ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\')) 1 -- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\') 0 -ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\') +-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\') 0 +-- ipv4_is_in_range(\'abc\', \'127.0.0.1\') +\N +-- parse_ipv6(127.0.0.1) +0000:0000:0000:0000:0000:ffff:7f00:0001 +-- parse_ipv6(fe80::85d:e82c:9446:7994) +fe80:0000:0000:0000:085d:e82c:9446:7994 +-- parse_ipv4(\'127.0.0.1\') +2130706433 diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql index 3c35e7f58cc2..a625c0bf4707 100644 --- a/tests/queries/0_stateless/02366_kql_func_ip.sql +++ b/tests/queries/0_stateless/02366_kql_func_ip.sql @@ -27,10 +27,20 @@ print '-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\')'; print ipv4_is_in_range('192.168.1.6', '192.168.1.1/24'); -- == true print '-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\')'; print ipv4_is_in_range('192.168.1.1', '192.168.2.1/24'); -- == false -print 'ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\')'; +print '-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\')'; print ipv4_is_in_range(strcat('192.','168.', '1.1'), '192.168.2.1/24'); -- == false +print '-- ipv4_is_in_range(\'abc\', \'127.0.0.1\')'; -- == null +print ipv4_is_in_range('abc', '127.0.0.1'); +print '-- parse_ipv6(127.0.0.1)'; +print parse_ipv6('127.0.0.1'); +print '-- parse_ipv6(fe80::85d:e82c:9446:7994)'; +print parse_ipv6('fe80::85d:e82c:9446:7994'); +print '-- parse_ipv4(\'127.0.0.1\')'; +print parse_ipv4('127.0.0.1'); -- TODO: --- print ipv4_is_in_range('abc', '127.0.0.1'); -- == null --- parse_ipv4() --- parse_ipv6() \ No newline at end of file +-- print parse_ipv4('192.1.168.1') < parse_ipv4('192.1.168.2'); -- == true + + + + diff --git a/tests/queries/0_stateless/02366_kql_summarize.reference b/tests/queries/0_stateless/02366_kql_summarize.reference index ef5ff544f63a..dce19393a443 100644 --- a/tests/queries/0_stateless/02366_kql_summarize.reference +++ b/tests/queries/0_stateless/02366_kql_summarize.reference @@ -23,3 +23,27 @@ Management abcd defg 33 20 6 30 4 40 2 +Skilled Manual ['Bachelors','Graduate Degree','High School','Partial College','Bachelors'] +Professional ['Graduate Degree','Partial College','Partial College','Partial College','Partial College','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Bachelors','Graduate Degree'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Edward','Christine'] +Professional ['Dalton','Angel'] +Management abcd defg ['Stephanie'] +Skilled Manual ['Edward'] +Professional ['Dalton'] +Management abcd defg ['Stephanie'] +Skilled Manual ['Graduate Degree','High School','Partial College','Bachelors'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Graduate Degree','Bachelors'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Partial College','High School'] +Professional ['Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['High School'] +Professional ['Partial College'] +Management abcd defg ['Bachelors'] diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql index 88d7641f3b95..048bdc9e7123 100644 --- a/tests/queries/0_stateless/02366_kql_summarize.sql +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -8,18 +8,21 @@ CREATE TABLE Customers Age Nullable(UInt8) ) ENGINE = Memory; -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); -INSERT INTO Customers VALUES ('Stephanie','Cox','Management abcd defg','Bachelors',33); -INSERT INTO Customers VALUES ('Peter','Nara','Skilled Manual','Graduate Degree',26); -INSERT INTO Customers VALUES ('Latoya','Shen','Professional','Graduate Degree',25); -INSERT INTO Customers VALUES ('Joshua','Lee','Professional','Partial College',26); -INSERT INTO Customers VALUES ('Edward','Hernandez','Skilled Manual','High School',36); -INSERT INTO Customers VALUES ('Dalton','Wood','Professional','Partial College',42); -INSERT INTO Customers VALUES ('Christine','Nara','Skilled Manual','Partial College',33); -INSERT INTO Customers VALUES ('Cameron','Rodriguez','Professional','Partial College',28); -INSERT INTO Customers VALUES ('Angel','Stewart','Professional','Partial College',46); -INSERT INTO Customers VALUES ('Apple','','Skilled Manual','Bachelors',28); -INSERT INTO Customers VALUES (NULL,'why','Professional','Partial College',38); +-- INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); +-- INSERT INTO Customers VALUES ('Stephanie','Cox','Management abcd defg','Bachelors',33); +-- INSERT INTO Customers VALUES ('Peter','Nara','Skilled Manual','Graduate Degree',26); +-- INSERT INTO Customers VALUES ('Latoya','Shen','Professional','Graduate Degree',25); +-- INSERT INTO Customers VALUES ('Joshua','Lee','Professional','Partial College',26); +-- INSERT INTO Customers VALUES ('Edward','Hernandez','Skilled Manual','High School',36); +-- INSERT INTO Customers VALUES ('Dalton','Wood','Professional','Partial College',42); +-- INSERT INTO Customers VALUES ('Christine','Nara','Skilled Manual','Partial College',33); +-- INSERT INTO Customers VALUES ('Cameron','Rodriguez','Professional','Partial College',28); +-- INSERT INTO Customers VALUES ('Angel','Stewart','Professional','Partial College',46); +-- INSERT INTO Customers VALUES ('Apple','','Skilled Manual','Bachelors',28); +-- INSERT INTO Customers VALUES (NULL,'why','Professional','Partial College',38); + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Joshua','Lee','Professional','Partial College',26),('Edward','Hernandez','Skilled Manual','High School',36),('Dalton','Wood','Professional','Partial College',42),('Christine','Nara','Skilled Manual','Partial College',33),('Cameron','Rodriguez','Professional','Partial College',28),('Angel','Stewart','Professional','Partial College',46),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + Select '-- test summarize --' ; set dialect='kusto'; @@ -34,6 +37,21 @@ Customers | summarize dcount(Education); Customers | summarize dcountif(Education, Occupation=='Professional'); Customers | summarize count() by bin(Age, 10) | order by count() ASC; --- The following does not work +-- make_list() +Customers | summarize f_list = make_list(Education) by Occupation; +Customers | summarize f_list = make_list(Education, 2) by Occupation; +-- make_list_if() +Customers | summarize f_list = make_list_if(FirstName, Age>30) by Occupation; +Customers | summarize f_list = make_list_if(FirstName, Age>30, 1) by Occupation; +-- make_set() +Customers | summarize f_list = make_set(Education) by Occupation; +Customers | summarize f_list = make_set(Education, 2) by Occupation; +-- make_set_if() +Customers | summarize f_list = make_set_if(Education, Age>30) by Occupation; +Customers | summarize f_list = make_set_if(Education, Age>30, 1) by Occupation; + +-- TODO: -- arg_max() -- arg_min() +-- make_list_with_nulls() +-- Customers | sort by FirstName | summarize count(Education) by Occupation; \ No newline at end of file From f4b2e9e295b4cc7e37ee6bdf703e372ace724a68 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Tue, 2 Aug 2022 07:35:21 -0700 Subject: [PATCH 075/342] Implement KQL functions handling IPv4 --- .../KustoFunctions/IParserKQLFunction.cpp | 84 +++++++++++-------- .../Kusto/KustoFunctions/IParserKQLFunction.h | 7 +- .../KustoFunctions/KQLCastingFunctions.cpp | 36 ++++---- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 82 ++++++++++++++---- 4 files changed, 139 insertions(+), 70 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index a1dc9132b1b9..bf46364f1f09 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -1,21 +1,21 @@ -#include -#include #include #include -#include -#include +#include #include -#include -#include -#include -#include #include -#include -#include #include -#include +#include +#include +#include #include +#include +#include +#include +#include #include +#include +#include +#include #include @@ -27,22 +27,25 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -bool IParserKQLFunction::convert(String & out,IParser::Pos & pos) +bool IParserKQLFunction::convert(String & out, IParser::Pos & pos) { - return wrapConvertImpl(pos, IncreaseDepthTag{}, [&] - { - bool res = convertImpl(out,pos); - if (!res) - out = ""; - return res; - }); + return wrapConvertImpl( + pos, + IncreaseDepthTag{}, + [&] + { + bool res = convertImpl(out, pos); + if (!res) + out = ""; + return res; + }); } -bool IParserKQLFunction::directMapping(String & out,IParser::Pos & pos,const String & ch_fn) +bool IParserKQLFunction::directMapping(String & out, IParser::Pos & pos, const String & ch_fn) { std::vector arguments; - String fn_name = getKQLFunctionName(pos); + String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; @@ -52,17 +55,17 @@ bool IParserKQLFunction::directMapping(String & out,IParser::Pos & pos,const Str ++pos; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - String argument = getConvertedArgument(fn_name,pos); + String argument = getConvertedArgument(fn_name, pos); arguments.push_back(argument); if (pos->type == TokenType::ClosingRoundBracket) { - for (auto arg : arguments) + for (auto arg : arguments) { if (res.empty()) res = ch_fn + "(" + arg; else - res = res + ", "+ arg; + res = res + ", " + arg; } res += ")"; @@ -78,7 +81,10 @@ bool IParserKQLFunction::directMapping(String & out,IParser::Pos & pos,const Str String IParserKQLFunction::getArgument(const String & function_name, DB::IParser::Pos & pos) { - return getOptionalArgument(function_name, pos).value(); + if (auto optionalArgument = getOptionalArgument(function_name, pos)) + return std::move(*optionalArgument); + + throw Exception(std::format("Required argument was not provided in {}", function_name), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser::Pos & pos) @@ -95,11 +101,11 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - String token = String(pos->begin,pos->end); + String token = String(pos->begin, pos->end); String new_token; - if (!KQLOperators().convert(tokens,pos)) + if (!KQLOperators().convert(tokens, pos)) { - if (pos->type == TokenType::BareWord ) + if (pos->type == TokenType::BareWord) { tokens.push_back(IParserKQLFunction::getExpression(pos)); } @@ -114,15 +120,14 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) break; } - for (auto token : tokens) - converted_arg = converted_arg + token +" "; + for (auto token : tokens) + converted_arg = converted_arg + token + " "; return converted_arg; } std::optional IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParser::Pos & pos) { - std::optional argument; if (const auto & type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket) return {}; @@ -139,17 +144,24 @@ String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) --pos; return ""; } - return fn_name; + return fn_name; } String IParserKQLFunction::kqlCallToExpression( - const String & function_name, std::initializer_list> params, const uint32_t max_depth) + const String & function_name, std::initializer_list params, const uint32_t max_depth) { const auto params_str = std::accumulate( std::cbegin(params), std::cend(params), String(), - [](auto acc, const auto & param) { return (acc.empty() ? "" : ", ") + std::move(acc) + param.get(); }); + [](String acc, const std::string_view param) + { + if (!acc.empty()) + acc.append(", "); + + acc.append(param.data(), param.length()); + return acc; + }); const auto kql_call = std::format("{}({})", function_name, params_str); DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); @@ -159,14 +171,14 @@ String IParserKQLFunction::kqlCallToExpression( void IParserKQLFunction::validateEndOfFunction(const String & fn_name, IParser::Pos & pos) { - if (pos->type != TokenType:: ClosingRoundBracket) + if (pos->type != TokenType::ClosingRoundBracket) throw Exception("Too many arguments in function: " + fn_name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } String IParserKQLFunction::getExpression(IParser::Pos & pos) { String arg = String(pos->begin, pos->end); - if (pos->type == TokenType::BareWord ) + if (pos->type == TokenType::BareWord) { String new_arg; auto fun = KQLFunctionFactory::get(arg); diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 3613cb71facb..b7f8427043cc 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -17,7 +17,7 @@ class IParserKQLFunction pos = begin; return res; } - + struct IncreaseDepthTag { }; @@ -33,7 +33,7 @@ class IParserKQLFunction pos = begin; return res; } - + bool convert(String & out, IParser::Pos & pos); virtual const char * getName() const = 0; virtual ~IParserKQLFunction() = default; @@ -47,8 +47,7 @@ class IParserKQLFunction static String getArgument(const String & function_name, DB::IParser::Pos & pos); static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); static std::optional getOptionalArgument(const String & function_name, DB::IParser::Pos & pos); - static String kqlCallToExpression( - const String & function_name, std::initializer_list> params, uint32_t max_depth); + static String kqlCallToExpression(const String & function_name, std::initializer_list params, uint32_t max_depth); static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos); static String getKQLFunctionName(IParser::Pos & pos); }; diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp index 9129d82aa780..acbb7468d204 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -1,48 +1,54 @@ #include #include -#include #include +#include + +#include namespace DB { -bool ToBool::convertImpl(String &out,IParser::Pos &pos) +bool ToBool::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ToDateTime::convertImpl(String &out,IParser::Pos &pos) +bool ToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ToDouble::convertImpl(String &out,IParser::Pos &pos) +bool ToDouble::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ToInt::convertImpl(String &out,IParser::Pos &pos) +bool ToInt::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ToString::convertImpl(String &out,IParser::Pos &pos) +bool ToString::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("ifNull(toString({0}), '')", param); + return true; } -bool ToTimeSpan::convertImpl(String &out,IParser::Pos &pos) +bool ToTimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 63a4ade7ca91..c8d16b8b9184 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -21,9 +21,25 @@ namespace DB { bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + + out = std::format( + "multiIf(length(splitByChar('/', {0}) as lhs) > 2 or length(splitByChar('/', {1}) as rhs) > 2, null, " + "isNull(toIPv4OrNull(lhs[1]) as lhs_ip) or length(lhs) = 2 and isNull(toUInt8OrNull(lhs[-1]) as lhs_mask) or " + "isNull(toIPv4OrNull(rhs[1]) as rhs_ip) or length(rhs) = 2 and isNull(toUInt8OrNull(rhs[-1]) as rhs_mask), null, " + "ignore(toUInt8(min2(32, min2({2}, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) as mask), null, " + "sign(toInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask), 1))" + " - toInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask), 1))))", + lhs, + rhs, + mask ? *mask : "32"); + return true; } bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) @@ -46,9 +62,16 @@ bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + + out = std::format("{} = 0", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask ? *mask : "32"}, pos.max_depth)); + return true; } bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) @@ -112,9 +135,18 @@ bool ParseIpv4::convertImpl(String & out, IParser::Pos & pos) bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto mask = getArgument(function_name, pos); + out = std::format( + "if(isNull(toIPv4OrNull({0}) as ip) or isNull(toUInt8OrNull(toString({1})) as mask), null, " + "toUInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(ip), toUInt8(max2(0, min2(32, assumeNotNull(mask))))), 1)))", + ip_address, + mask); + return true; } bool Ipv6Compare::convertImpl(String & out, IParser::Pos & pos) @@ -155,15 +187,35 @@ bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + out = std::format( + "ifNull(multiIf(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32) and toTypeName({0}) = 'String' or {1} < 0, null, " + "isNull(ifNull(param_as_uint32, {2}) as ip_as_number), null, " + "IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - {1}) - 1))))), '')", + ip_address, + mask ? *mask : "32", + kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth)); + return true; } bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + const auto calculated_mask = mask ? *mask : "32"; + out = std::format( + "if(empty({1} as formatted_ip) or not {0} between 0 and 32, '', concat(formatted_ip, '/', toString({0})))", + calculated_mask, + kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth)); + return true; } } From 224a9c0a28b94ef7eec0181c70dfd2d4bb174904 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Tue, 2 Aug 2022 16:28:50 -0700 Subject: [PATCH 076/342] Add unit tests and release notes --- src/Parsers/Kusto/KQL_ReleaseNote.md | 29 +++++++++++++++++++-- src/Parsers/tests/gtest_Parser.cpp | 38 +++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index ae3f9e1b6716..b37b991a2a26 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,4 +1,5 @@ -# KQL implemented features. +# August XX, 2022 +## KQL implemented features The config setting to allow modify dialect setting. - Set dialect setting in server configuration XML at user level(` users.xml `). This sets the ` dialect ` at server startup and CH will do query parsing for all users with ` default ` profile acording to dialect value. @@ -22,7 +23,31 @@ The config setting to allow modify dialect setting. OR pass dialect setting with '--'. For example : ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` -# Augest 1, 2022 + +## IP functions +- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) + `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` + `print format_ipv4(3232236031, 24) == '192.168.1.0'` +- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) + `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` + `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` +- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) + `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` + `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` +- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) + `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` + `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` +- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) + `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` + `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` + `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` + `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` + +# August 1, 2022 - **strcmp** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcmpfunction) `print strcmp('abc','ABC')` diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 0b2292cce84c..36615f735db9 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -478,7 +478,43 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | where Age in ((Customers|project Age|where Age < 30))", "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" }, -{ + { + "print format_ipv4(ip)", + "SELECT ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (32 < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - 32) - 1))))), '')" + }, + { + "print format_ipv4(ip, mask)", + "SELECT ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (mask < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - mask) - 1))))), '')" + }, + { + "print format_ipv4_mask(ip)", + "SELECT if(empty(ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (32 < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - 32) - 1))))), '') AS formatted_ip) OR (NOT ((32 >= 0) AND (32 <= 32))), '', concat(formatted_ip, '/', toString(32)))" + }, + { + "print format_ipv4_mask(ip, mask)", + "SELECT if(empty(ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (mask < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - mask) - 1))))), '') AS formatted_ip) OR (NOT ((mask >= 0) AND (mask <= 32))), '', concat(formatted_ip, '/', toString(mask)))" + }, + { + "print ipv4_compare(ip1, ip2)", + "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(32, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1)))" + }, + { + "print ipv4_compare(ip1, ip2, mask)", + "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(mask, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1)))" + }, + { + "print ipv4_is_match(ip1, ip2)", + "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(32, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1))) = 0" + }, + { + "print ipv4_is_match(ip1, ip2, mask)", + "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(mask, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1))) = 0" + }, + { + "print parse_ipv4_mask(ip, mask)", + "SELECT if(((toIPv4OrNull(ip) AS ip) IS NULL) OR ((toUInt8OrNull(toString(mask)) AS mask) IS NULL), NULL, toUInt32(IPv4CIDRToRange(assumeNotNull(ip), toUInt8(max2(0, min2(32, assumeNotNull(mask))))).1))" + }, + { "Customers | project ipv4_is_in_range(FirstName, LastName)", "SELECT if(((IPv4StringToNumOrNull(FirstName) AS ip) IS NULL) OR ((multiIf((length(splitByChar('/', LastName) AS tokens) > 2) OR (NOT isIPv4String(tokens[1])), NULL, length(tokens) = 1, 32, (toUInt8OrNull(tokens[-1]) AS mask) IS NULL, NULL, toUInt8(min2(mask, 32))) AS calculated_mask) IS NULL) OR ((toIPv4OrNull(tokens[1]) AS range_prefix_ip) IS NULL), NULL, isIPAddressInRange(IPv4NumToString(assumeNotNull(ip)), concat(IPv4NumToString(assumeNotNull(range_prefix_ip)), '/', toString(assumeNotNull(calculated_mask)))))\nFROM Customers" }, From 4d2682d400a8d54bde3cd4b910bb82fcdc574af3 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Thu, 4 Aug 2022 10:16:23 -0700 Subject: [PATCH 077/342] Make aliases unique --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 99 +++++++++++-------- 1 file changed, 60 insertions(+), 39 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index c8d16b8b9184..ddd9b9ab0abf 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -15,8 +15,19 @@ #include #include +#include + #include +namespace +{ +String generateUniqueIdentifier() +{ + static pcg32_unique unique_random_generator; + return std::to_string(unique_random_generator()); +} +} + namespace DB { bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) @@ -30,15 +41,17 @@ bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) const auto mask = getOptionalArgument(function_name, pos); out = std::format( - "multiIf(length(splitByChar('/', {0}) as lhs) > 2 or length(splitByChar('/', {1}) as rhs) > 2, null, " - "isNull(toIPv4OrNull(lhs[1]) as lhs_ip) or length(lhs) = 2 and isNull(toUInt8OrNull(lhs[-1]) as lhs_mask) or " - "isNull(toIPv4OrNull(rhs[1]) as rhs_ip) or length(rhs) = 2 and isNull(toUInt8OrNull(rhs[-1]) as rhs_mask), null, " - "ignore(toUInt8(min2(32, min2({2}, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) as mask), null, " - "sign(toInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask), 1))" - " - toInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask), 1))))", - lhs, - rhs, - mask ? *mask : "32"); + "multiIf(isNull({0} as lhs_ip_{5}) or isNull({1} as lhs_mask_{5}), null, " + "isNull({2} as rhs_ip_{5}) or isNull({3} as rhs_mask_{5}), null, " + "ignore(toUInt8(min2({4}, min2(assumeNotNull(lhs_mask_{5}), assumeNotNull(rhs_mask_{5})))) as mask_{5}), null, " + "sign(toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip_{5}), mask_{5}), 1))" + " - toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip_{5}), mask_{5}), 1))))", + kqlCallToExpression("parse_ipv4", {lhs}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {lhs}, pos.max_depth), + kqlCallToExpression("parse_ipv4", {rhs}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {rhs}, pos.max_depth), + mask ? *mask : "32", + generateUniqueIdentifier()); return true; } @@ -51,12 +64,13 @@ bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); const auto ip_range = getArgument(function_name, pos); out = std::format( - "if(isNull(IPv4StringToNumOrNull({0}) as ip) or isNull({2} as calculated_mask) or " - "isNull(toIPv4OrNull(tokens[1]) as range_prefix_ip), null, isIPAddressInRange(IPv4NumToString(assumeNotNull(ip)), " - "concat(IPv4NumToString(assumeNotNull(range_prefix_ip)), '/', toString(assumeNotNull(calculated_mask)))))", + "multiIf(isNull(IPv4StringToNumOrNull({0}) as ip_{3}), null, " + "isNull({1} as range_start_ip_{3}) or isNull({2} as range_mask_{3}), null, " + "bitXor(range_start_ip_{3}, bitAnd(ip_{3}, bitNot(toUInt32(intExp2(32 - range_mask_{3}) - 1)))) = 0)", ip_address, - ip_range, - kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth)); + kqlCallToExpression("parse_ipv4", {ip_range}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth), + generateUniqueIdentifier()); return true; } @@ -85,19 +99,21 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); out += std::format( - "multiIf(length(splitByChar('/', {0}) as tokens) > 2 or isNull(toIPv4OrNull(tokens[1]) as nullable_ip), null, " - "length(tokens) = 2 and isNull(toUInt8OrNull(tokens[-1]) as mask), null, " - "ignore(assumeNotNull(nullable_ip) as ip, " - "IPv4CIDRToRange(ip, assumeNotNull(mask)) as range, IPv4NumToString(tupleElement(range, 1)) as begin, " - "IPv4NumToString(tupleElement(range, 2)) as end), null, ", - ip_address); + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(toIPv4OrNull(tokens_{1}[1]) as nullable_ip_{1}), null, " + "length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " + "ignore(assumeNotNull(nullable_ip_{1}) as ip_{1}, " + "IPv4CIDRToRange(ip_{1}, assumeNotNull(mask_{1})) as range_{1}, IPv4NumToString(tupleElement(range_{1}, 1)) as begin_{1}, " + "IPv4NumToString(tupleElement(range_{1}, 2)) as end_{1}), null, ", + ip_address, + generateUniqueIdentifier()); for (int i = 0; i < std::ssize(s_private_subnets); ++i) { const auto & subnet = s_private_subnets[i]; out += std::format( - "length(tokens) = 1 and isIPAddressInRange(IPv4NumToString(ip), '{0}') or " - "isIPAddressInRange(begin, '{0}') and isIPAddressInRange(end, '{0}'), true, ", - subnet); + "length(tokens_{1}) = 1 and isIPAddressInRange(IPv4NumToString(ip_{1}), '{0}') or " + "isIPAddressInRange(begin_{1}, '{0}') and isIPAddressInRange(end_{1}, '{0}'), true, ", + subnet, + generateUniqueIdentifier()); } out += "false)"; @@ -112,9 +128,10 @@ bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) const auto ip_range = getArgument(function_name, pos); out = std::format( - "multiIf(length(splitByChar('/', {0}) as tokens) > 2 or not isIPv4String(tokens[1]), null, " - "length(tokens) = 1, 32, isNull(toUInt8OrNull(tokens[-1]) as mask), null, toUInt8(min2(mask, 32)))", - ip_range); + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or not isIPv4String(tokens_{1}[1]), null, " + "length(tokens_{1}) = 1, 32, isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, toUInt8(min2(mask_{1}, 32)))", + ip_range, + generateUniqueIdentifier()); return true; } @@ -126,10 +143,11 @@ bool ParseIpv4::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); out = std::format( - "multiIf(length(splitByChar('/', {0}) as tokens) = 1, IPv4StringToNumOrNull(tokens[1]) as ip, " - "length(tokens) = 2 and isNotNull(ip) and isNotNull(toUInt8OrNull(tokens[-1]) as mask), " - "tupleElement(IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)), 1), null)", - ip_address); + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) = 1, IPv4StringToNumOrNull(tokens_{1}[1]) as ip_{1}, " + "length(tokens_{1}) = 2 and isNotNull(ip_{1}) and isNotNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), " + "tupleElement(IPv4CIDRToRange(assumeNotNull(ip_{1}), assumeNotNull(mask_{1})), 1), null)", + ip_address, + generateUniqueIdentifier()); return true; } @@ -142,10 +160,11 @@ bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); const auto mask = getArgument(function_name, pos); out = std::format( - "if(isNull(toIPv4OrNull({0}) as ip) or isNull(toUInt8OrNull(toString({1})) as mask), null, " - "toUInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(ip), toUInt8(max2(0, min2(32, assumeNotNull(mask))))), 1)))", + "if(isNull(toIPv4OrNull({0}) as ip_{2}) or isNull(toUInt8OrNull(toString({1})) as mask_{2}), null, " + "toUInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(ip_{2}), toUInt8(max2(0, min2(32, assumeNotNull(mask_{2}))))), 1)))", ip_address, - mask); + mask, + generateUniqueIdentifier()); return true; } @@ -194,12 +213,13 @@ bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); const auto mask = getOptionalArgument(function_name, pos); out = std::format( - "ifNull(multiIf(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32) and toTypeName({0}) = 'String' or {1} < 0, null, " - "isNull(ifNull(param_as_uint32, {2}) as ip_as_number), null, " - "IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - {1}) - 1))))), '')", + "ifNull(multiIf(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32_{3}) and toTypeName({0}) = 'String' or {1} < 0, null, " + "isNull(ifNull(param_as_uint32_{3}, {2}) as ip_as_number_{3}), null, " + "IPv4NumToString(bitAnd(ip_as_number_{3}, bitNot(toUInt32(intExp2(32 - {1}) - 1))))), '')", ip_address, mask ? *mask : "32", - kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth)); + kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth), + generateUniqueIdentifier()); return true; } @@ -213,9 +233,10 @@ bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) const auto mask = getOptionalArgument(function_name, pos); const auto calculated_mask = mask ? *mask : "32"; out = std::format( - "if(empty({1} as formatted_ip) or not {0} between 0 and 32, '', concat(formatted_ip, '/', toString({0})))", + "if(empty({1} as formatted_ip_{2}) or not {0} between 0 and 32, '', concat(formatted_ip_{2}, '/', toString({0})))", calculated_mask, - kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth)); + kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth), + generateUniqueIdentifier()); return true; } } From 3aab34ec68d7d049ff786b737dc600ba8b7328e1 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 5 Aug 2022 13:58:32 -0700 Subject: [PATCH 078/342] Implement unit tests for IP functions --- contrib/googletest-cmake/CMakeLists.txt | 25 ++++-- src/CMakeLists.txt | 1 + .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 7 +- src/Parsers/tests/gtest_KQL.cpp | 86 +++++++++++++++++++ src/Parsers/tests/gtest_Parser.cpp | 65 +------------- src/Parsers/tests/gtest_common.h | 7 ++ 6 files changed, 120 insertions(+), 71 deletions(-) create mode 100644 src/Parsers/tests/gtest_KQL.cpp create mode 100644 src/Parsers/tests/gtest_common.h diff --git a/contrib/googletest-cmake/CMakeLists.txt b/contrib/googletest-cmake/CMakeLists.txt index 90fdde0c1859..94c35656987d 100644 --- a/contrib/googletest-cmake/CMakeLists.txt +++ b/contrib/googletest-cmake/CMakeLists.txt @@ -1,15 +1,30 @@ -set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest") +set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest") -add_library(_gtest "${SRC_DIR}/src/gtest-all.cc") +add_library(_gtest "${SRC_DIR}/googletest/src/gtest-all.cc") set_target_properties(_gtest PROPERTIES VERSION "1.0.0") target_compile_definitions (_gtest PUBLIC GTEST_HAS_POSIX_RE=0) -target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/include") -target_include_directories(_gtest PRIVATE "${SRC_DIR}") +target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/googletest/include") +target_include_directories(_gtest PRIVATE "${SRC_DIR}/googletest") -add_library(_gtest_main "${SRC_DIR}/src/gtest_main.cc") +add_library(_gtest_main "${SRC_DIR}/googletest/src/gtest_main.cc") set_target_properties(_gtest_main PROPERTIES VERSION "1.0.0") target_link_libraries(_gtest_main PUBLIC _gtest) add_library(_gtest_all INTERFACE) target_link_libraries(_gtest_all INTERFACE _gtest _gtest_main) add_library(ch_contrib::gtest_all ALIAS _gtest_all) + +add_library(_gmock "${SRC_DIR}/googlemock/src/gmock-all.cc") +set_target_properties(_gmock PROPERTIES VERSION "1.0.0") +target_compile_definitions (_gmock PUBLIC GTEST_HAS_POSIX_RE=0) +target_include_directories(_gmock SYSTEM PUBLIC "${SRC_DIR}/googlemock/include") +target_include_directories(_gmock PRIVATE "${SRC_DIR}/googlemock") +target_link_libraries(_gmock PUBLIC _gtest) + +add_library(_gmock_main "${SRC_DIR}/googlemock/src/gmock_main.cc") +set_target_properties(_gmock_main PROPERTIES VERSION "1.0.0") +target_link_libraries(_gmock_main PUBLIC _gmock) + +add_library(_gmock_all INTERFACE) +target_link_libraries(_gmock_all INTERFACE _gmock _gmock_main) +add_library(ch_contrib::gmock_all ALIAS _gmock_all) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bd8b221e2ba9..77a7b8d7352d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -611,6 +611,7 @@ if (ENABLE_TESTS) ) target_link_libraries(unit_tests_dbms PRIVATE + ch_contrib::gmock_all ch_contrib::gtest_all clickhouse_functions clickhouse_aggregate_functions diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index ddd9b9ab0abf..2ee483fa8431 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -190,10 +190,11 @@ bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); out = std::format( - "if(isNull(ifNull(if(isNull({1} as ipv4), null, IPv4ToIPv6(ipv4)), IPv6StringToNumOrNull({0})) as ipv6), null, " - "arrayStringConcat(flatten(extractAllGroups(lower(hex(assumeNotNull(ipv6))), '([\\da-f]{{4}})')), ':'))", + "if(isNull(ifNull(if(isNull({1} as ipv4_{2}), null, IPv4ToIPv6(ipv4_{2})), IPv6StringToNumOrNull({0})) as ipv6_{2}), null, " + "arrayStringConcat(flatten(extractAllGroups(lower(hex(assumeNotNull(ipv6_{2}))), '([\\da-f]{{4}})')), ':'))", ip_address, - kqlCallToExpression("parse_ipv4", {ip_address}, pos.max_depth)); + kqlCallToExpression("parse_ipv4", {ip_address}, pos.max_depth), + generateUniqueIdentifier()); return true; } diff --git a/src/Parsers/tests/gtest_KQL.cpp b/src/Parsers/tests/gtest_KQL.cpp new file mode 100644 index 000000000000..5cd4e39e2780 --- /dev/null +++ b/src/Parsers/tests/gtest_KQL.cpp @@ -0,0 +1,86 @@ +#include "gtest_common.h" + +#include +#include +#include + +#include +#include + +class ParserRegexTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +TEST_P(ParserRegexTest, parseQuery) +{ + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + + ASSERT_TRUE(parser); + ASSERT_TRUE(expected_ast); + + DB::ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + EXPECT_THAT(serializeAST(*ast->clone(), false), ::testing::MatchesRegex(expected_ast)); +} + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserRegexTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print format_ipv4(A)", + "SELECT ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(32 < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\)" + }, + { + "print format_ipv4(A, B)", + "SELECT ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(B < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - B\\) - 1\\)\\)\\)\\)\\), ''\\)" + }, + { + "print format_ipv4_mask(A)", + "SELECT if\\(empty\\(ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(32 < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\) AS formatted_ip_\\d+\\) OR \\(NOT \\(\\(32 >= 0\\) AND \\(32 <= 32\\)\\)\\), '', concat\\(formatted_ip_\\d+, '/', toString\\(32\\)\\)\\)" + }, + { + "print format_ipv4_mask(A, B)", + "SELECT if\\(empty\\(ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(B < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - B\\) - 1\\)\\)\\)\\)\\), ''\\) AS formatted_ip_\\d+\\) OR \\(NOT \\(\\(B >= 0\\) AND \\(B <= 32\\)\\)\\), '', concat\\(formatted_ip_\\d+, '/', toString\\(B\\)\\)\\)" + }, + { + "print ipv4_compare(A, B)", + "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" + }, + { + "print ipv4_compare(A, B, C)", + "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" + }, + { + "print ipv4_is_match(A, B)", + "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" + }, + { + "print ipv4_is_match(A, B, C)", + "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" + }, + { + "print parse_ipv4_mask(A, B)", + "SELECT if\\(\\(\\(toIPv4OrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(toUInt8OrNull\\(toString\\(B\\)\\) AS mask_\\d+\\) IS NULL\\), NULL, toUInt32\\(IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(max2\\(0, min2\\(32, assumeNotNull\\(mask_\\d+\\)\\)\\)\\)\\).1\\)\\)" + }, + { + "print ipv4_is_in_range(A, B)", + "SELECT multiIf\\(\\(IPv4StringToNumOrNull\\(A\\) AS ip_\\d+\\) IS NULL, NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS range_start_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS range_mask_\\d+\\) IS NULL\\), NULL, bitXor\\(range_start_ip_\\d+, bitAnd\\(ip_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - range_mask_\\d+\\) - 1\\)\\)\\)\\) = 0\\)" + }, + { + "print ipv4_is_private(A)", + "SELECT multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(toIPv4OrNull\\(tokens_\\d+\\[1\\]\\) AS nullable_ip_\\d+\\) IS NULL\\), NULL, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\), NULL, ignore\\(assumeNotNull\\(nullable_ip_\\d+\\) AS ip_\\d+, IPv4CIDRToRange\\(ip_\\d+, assumeNotNull\\(mask_\\d+\\)\\) AS range_\\d+, IPv4NumToString\\(range_\\d+.1\\) AS begin_\\d+, IPv4NumToString\\(range_\\d+.2\\) AS end_\\d+\\), NULL, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '10.0.0.0/8'\\)\\) OR \\(isIPAddressInRange\\(begin_\\d+, '10.0.0.0/8'\\) AND isIPAddressInRange\\(end_\\d+, '10.0.0.0/8'\\)\\), true, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '172.16.0.0/12'\\)\\) OR \\(isIPAddressInRange\\(begin_\\d+, '172.16.0.0/12'\\) AND isIPAddressInRange\\(end_\\d+, '172.16.0.0/12'\\)\\), true, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '192.168.0.0/16'\\)\\) OR \\(isIPAddressInRange\\(begin_\\d+, '192.168.0.0/16'\\) AND isIPAddressInRange\\(end_\\d+, '192.168.0.0/16'\\)\\), true, false\\)" + }, + { + "print ipv4_netmask_suffix(A)", + "SELECT multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\)" + }, + { + "print parse_ipv4(A)", + "SELECT multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)" + }, + { + "print parse_ipv6(A)", + "SELECT if\\(\\(ifNull\\(if\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS ipv4_\\d+\\) IS NULL, NULL, IPv4ToIPv6\\(ipv4_\\d+\\)\\), IPv6StringToNumOrNull\\(A\\)\\) AS ipv6_\\d+\\) IS NULL, NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(assumeNotNull\\(ipv6_\\d+\\)\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)" + } +}))); diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 36615f735db9..1e563a45cfa4 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -1,3 +1,5 @@ +#include "gtest_common.h" + #include #include #include @@ -23,13 +25,6 @@ using namespace DB; using namespace std::literals; } - -struct ParserTestCase -{ - const std::string_view input_text; - const char * expected_ast = nullptr; -}; - std::ostream & operator<<(std::ostream & ostr, const std::shared_ptr parser) { return ostr << "Parser: " << parser->getName(); @@ -478,62 +473,6 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | where Age in ((Customers|project Age|where Age < 30))", "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" }, - { - "print format_ipv4(ip)", - "SELECT ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (32 < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - 32) - 1))))), '')" - }, - { - "print format_ipv4(ip, mask)", - "SELECT ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (mask < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - mask) - 1))))), '')" - }, - { - "print format_ipv4_mask(ip)", - "SELECT if(empty(ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (32 < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - 32) - 1))))), '') AS formatted_ip) OR (NOT ((32 >= 0) AND (32 <= 32))), '', concat(formatted_ip, '/', toString(32)))" - }, - { - "print format_ipv4_mask(ip, mask)", - "SELECT if(empty(ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (mask < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - mask) - 1))))), '') AS formatted_ip) OR (NOT ((mask >= 0) AND (mask <= 32))), '', concat(formatted_ip, '/', toString(mask)))" - }, - { - "print ipv4_compare(ip1, ip2)", - "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(32, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1)))" - }, - { - "print ipv4_compare(ip1, ip2, mask)", - "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(mask, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1)))" - }, - { - "print ipv4_is_match(ip1, ip2)", - "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(32, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1))) = 0" - }, - { - "print ipv4_is_match(ip1, ip2, mask)", - "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(mask, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1))) = 0" - }, - { - "print parse_ipv4_mask(ip, mask)", - "SELECT if(((toIPv4OrNull(ip) AS ip) IS NULL) OR ((toUInt8OrNull(toString(mask)) AS mask) IS NULL), NULL, toUInt32(IPv4CIDRToRange(assumeNotNull(ip), toUInt8(max2(0, min2(32, assumeNotNull(mask))))).1))" - }, - { - "Customers | project ipv4_is_in_range(FirstName, LastName)", - "SELECT if(((IPv4StringToNumOrNull(FirstName) AS ip) IS NULL) OR ((multiIf((length(splitByChar('/', LastName) AS tokens) > 2) OR (NOT isIPv4String(tokens[1])), NULL, length(tokens) = 1, 32, (toUInt8OrNull(tokens[-1]) AS mask) IS NULL, NULL, toUInt8(min2(mask, 32))) AS calculated_mask) IS NULL) OR ((toIPv4OrNull(tokens[1]) AS range_prefix_ip) IS NULL), NULL, isIPAddressInRange(IPv4NumToString(assumeNotNull(ip)), concat(IPv4NumToString(assumeNotNull(range_prefix_ip)), '/', toString(assumeNotNull(calculated_mask)))))\nFROM Customers" - }, - { - "Customers | project ipv4_is_private(Occupation)", - "SELECT multiIf((length(splitByChar('/', Occupation) AS tokens) > 2) OR ((toIPv4OrNull(tokens[1]) AS nullable_ip) IS NULL), NULL, (length(tokens) = 2) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NULL), NULL, ignore(assumeNotNull(nullable_ip) AS ip, IPv4CIDRToRange(ip, assumeNotNull(mask)) AS range, IPv4NumToString(range.1) AS begin, IPv4NumToString(range.2) AS end), NULL, ((length(tokens) = 1) AND isIPAddressInRange(IPv4NumToString(ip), '10.0.0.0/8')) OR (isIPAddressInRange(begin, '10.0.0.0/8') AND isIPAddressInRange(end, '10.0.0.0/8')), true, ((length(tokens) = 1) AND isIPAddressInRange(IPv4NumToString(ip), '172.16.0.0/12')) OR (isIPAddressInRange(begin, '172.16.0.0/12') AND isIPAddressInRange(end, '172.16.0.0/12')), true, ((length(tokens) = 1) AND isIPAddressInRange(IPv4NumToString(ip), '192.168.0.0/16')) OR (isIPAddressInRange(begin, '192.168.0.0/16') AND isIPAddressInRange(end, '192.168.0.0/16')), true, false)\nFROM Customers" - }, - { - "Customers | project ipv4_netmask_suffix(Occupation)", - "SELECT multiIf((length(splitByChar('/', Occupation) AS tokens) > 2) OR (NOT isIPv4String(tokens[1])), NULL, length(tokens) = 1, 32, (toUInt8OrNull(tokens[-1]) AS mask) IS NULL, NULL, toUInt8(min2(mask, 32)))\nFROM Customers" - }, - { - "Customers | project parse_ipv4(FirstName)", - "SELECT multiIf(length(splitByChar('/', FirstName) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)\nFROM Customers" - }, - { - "Customers | project parse_ipv6(LastName)", - "SELECT if((ifNull(if((multiIf(length(splitByChar('/', LastName) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL) AS ipv4) IS NULL, NULL, IPv4ToIPv6(ipv4)), IPv6StringToNumOrNull(LastName)) AS ipv6) IS NULL, NULL, arrayStringConcat(flatten(extractAllGroups(lower(hex(assumeNotNull(ipv6))), '([\\\\da-f]{4})')), ':'))\nFROM Customers" - }, { "Customers|where Occupation has_any ('Skilled','abcd')", "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Occupation, 'Skilled') OR hasTokenCaseInsensitive(Occupation, 'abcd')" diff --git a/src/Parsers/tests/gtest_common.h b/src/Parsers/tests/gtest_common.h new file mode 100644 index 000000000000..abbc1a0cb0eb --- /dev/null +++ b/src/Parsers/tests/gtest_common.h @@ -0,0 +1,7 @@ +#include + +struct ParserTestCase +{ + const std::string_view input_text; + const char * expected_ast = nullptr; +}; From 268a7c98092e5651042823d032477287d2ce7307 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Mon, 8 Aug 2022 07:27:00 -0700 Subject: [PATCH 079/342] Move KQL tests into their own folder --- src/Parsers/tests/{gtest_KQL.cpp => KQL/gtest_KQL_IP.cpp} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/Parsers/tests/{gtest_KQL.cpp => KQL/gtest_KQL_IP.cpp} (99%) diff --git a/src/Parsers/tests/gtest_KQL.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp similarity index 99% rename from src/Parsers/tests/gtest_KQL.cpp rename to src/Parsers/tests/KQL/gtest_KQL_IP.cpp index 5cd4e39e2780..cf1d08381760 100644 --- a/src/Parsers/tests/gtest_KQL.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -1,4 +1,4 @@ -#include "gtest_common.h" +#include #include #include From 0b8a2e93c97a502e653e8bca694ce833873f6263 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Mon, 8 Aug 2022 07:38:51 -0700 Subject: [PATCH 080/342] Improve performance of IPv4 functions --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 2ee483fa8431..47d01e42ae3e 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -39,12 +39,11 @@ bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) const auto lhs = getArgument(function_name, pos); const auto rhs = getArgument(function_name, pos); const auto mask = getOptionalArgument(function_name, pos); - out = std::format( - "multiIf(isNull({0} as lhs_ip_{5}) or isNull({1} as lhs_mask_{5}), null, " - "isNull({2} as rhs_ip_{5}) or isNull({3} as rhs_mask_{5}), null, " - "ignore(toUInt8(min2({4}, min2(assumeNotNull(lhs_mask_{5}), assumeNotNull(rhs_mask_{5})))) as mask_{5}), null, " - "sign(toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip_{5}), mask_{5}), 1))" + "if(isNull({0} as lhs_ip_{5}) or isNull({1} as lhs_mask_{5}) " + "or isNull({2} as rhs_ip_{5}) or isNull({3} as rhs_mask_{5}), null, " + "sign(toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip_{5}), " + "toUInt8(min2({4}, min2(assumeNotNull(lhs_mask_{5}), assumeNotNull(rhs_mask_{5})))) as mask_{5}), 1))" " - toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip_{5}), mask_{5}), 1))))", kqlCallToExpression("parse_ipv4", {lhs}, pos.max_depth), kqlCallToExpression("ipv4_netmask_suffix", {lhs}, pos.max_depth), @@ -64,8 +63,8 @@ bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); const auto ip_range = getArgument(function_name, pos); out = std::format( - "multiIf(isNull(IPv4StringToNumOrNull({0}) as ip_{3}), null, " - "isNull({1} as range_start_ip_{3}) or isNull({2} as range_mask_{3}), null, " + "if(isNull(IPv4StringToNumOrNull({0}) as ip_{3}) " + "or isNull({1} as range_start_ip_{3}) or isNull({2} as range_mask_{3}), null, " "bitXor(range_start_ip_{3}, bitAnd(ip_{3}, bitNot(toUInt32(intExp2(32 - range_mask_{3}) - 1)))) = 0)", ip_address, kqlCallToExpression("parse_ipv4", {ip_range}, pos.max_depth), @@ -83,7 +82,6 @@ bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) const auto lhs = getArgument(function_name, pos); const auto rhs = getArgument(function_name, pos); const auto mask = getOptionalArgument(function_name, pos); - out = std::format("{} = 0", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask ? *mask : "32"}, pos.max_depth)); return true; } @@ -97,26 +95,30 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) return false; const auto ip_address = getArgument(function_name, pos); + const auto unique_identifier = generateUniqueIdentifier(); out += std::format( - "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(toIPv4OrNull(tokens_{1}[1]) as nullable_ip_{1}), null, " - "length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(toIPv4OrNull(tokens_{1}[1]) as nullable_ip_{1}) " + "or length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " "ignore(assumeNotNull(nullable_ip_{1}) as ip_{1}, " "IPv4CIDRToRange(ip_{1}, assumeNotNull(mask_{1})) as range_{1}, IPv4NumToString(tupleElement(range_{1}, 1)) as begin_{1}, " "IPv4NumToString(tupleElement(range_{1}, 2)) as end_{1}), null, ", ip_address, - generateUniqueIdentifier()); + unique_identifier); for (int i = 0; i < std::ssize(s_private_subnets); ++i) { + if (i > 0) + out += " or"; + const auto & subnet = s_private_subnets[i]; out += std::format( "length(tokens_{1}) = 1 and isIPAddressInRange(IPv4NumToString(ip_{1}), '{0}') or " - "isIPAddressInRange(begin_{1}, '{0}') and isIPAddressInRange(end_{1}, '{0}'), true, ", + "length(tokens_{1}) = 2 and isIPAddressInRange(begin_{1}, '{0}') and isIPAddressInRange(end_{1}, '{0}')", subnet, - generateUniqueIdentifier()); + unique_identifier); } - out += "false)"; + out += ")"; return true; } @@ -214,8 +216,8 @@ bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); const auto mask = getOptionalArgument(function_name, pos); out = std::format( - "ifNull(multiIf(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32_{3}) and toTypeName({0}) = 'String' or {1} < 0, null, " - "isNull(ifNull(param_as_uint32_{3}, {2}) as ip_as_number_{3}), null, " + "ifNull(if(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32_{3}) and toTypeName({0}) = 'String' or {1} < 0 " + "or isNull(ifNull(param_as_uint32_{3}, {2}) as ip_as_number_{3}), null, " "IPv4NumToString(bitAnd(ip_as_number_{3}, bitNot(toUInt32(intExp2(32 - {1}) - 1))))), '')", ip_address, mask ? *mask : "32", From cc4632837c246660ed0e096091c14af901b7fb41 Mon Sep 17 00:00:00 2001 From: kashwy Date: Tue, 9 Aug 2022 06:11:39 -0700 Subject: [PATCH 081/342] Kusto-phase2: Add kusto data types --- .../KustoFunctions/IParserKQLFunction.cpp | 10 + .../KustoFunctions/KQLDataTypeFunctions.cpp | 141 +++++++++ .../KustoFunctions/KQLDataTypeFunctions.h | 78 +++++ .../KustoFunctions/KQLDateTimeFunctions.cpp | 21 +- .../KustoFunctions/KQLDateTimeFunctions.h | 4 +- .../KustoFunctions/KQLFunctionFactory.cpp | 59 +++- .../Kusto/KustoFunctions/KQLFunctionFactory.h | 16 +- .../KustoFunctions/KQLGeneralFunctions.cpp | 38 +++ .../KustoFunctions/KQLGeneralFunctions.h | 7 + .../KustoFunctions/KQLStringFunctions.cpp | 66 +++- .../Kusto/ParserKQLDateTypeTimespan.cpp | 121 ++++++++ src/Parsers/Kusto/ParserKQLDateTypeTimespan.h | 36 +++ src/Parsers/Kusto/ParserKQLMakeSeries.cpp | 286 ++++++++++++++++++ src/Parsers/Kusto/ParserKQLMakeSeries.h | 45 +++ src/Parsers/Kusto/ParserKQLQuery.cpp | 8 + src/Parsers/Kusto/ParserKQLQuery.h | 2 +- 16 files changed, 909 insertions(+), 29 deletions(-) create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h create mode 100644 src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp create mode 100644 src/Parsers/Kusto/ParserKQLDateTypeTimespan.h create mode 100644 src/Parsers/Kusto/ParserKQLMakeSeries.cpp create mode 100644 src/Parsers/Kusto/ParserKQLMakeSeries.h diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index bf46364f1f09..0b7eb403a226 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -187,6 +188,15 @@ String IParserKQLFunction::getExpression(IParser::Pos & pos) validateEndOfFunction(arg, pos); arg = new_arg; } + else + { + ParserKQLDateTypeTimespan time_span; + ASTPtr node; + Expected expected; + + if (time_span.parse(pos, node, expected)) + arg = std::to_string(time_span.toSeconds()); + } } return arg; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp new file mode 100644 index 000000000000..2a59ab8b72a3 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include +#include +#include +/* +#include +#include +#include +#include +#include +#include +#include +#include +#include +*/ +#include +#include + +namespace DB +{ + +bool DatatypeBool::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatatypeDatetime::convertImpl(String &out,IParser::Pos &pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String datetime_str; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier) + datetime_str = std::format("'{}'", String(pos->begin+1, pos->end -1)); + else if (pos->type == TokenType::StringLiteral) + datetime_str = String(pos->begin, pos->end); + else + { auto start = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + if (pos->type == TokenType::ClosingRoundBracket) + break; + } + --pos; + datetime_str = std::format("'{}'",String(start->begin,pos->end)); + } + out = std::format("toDateTime64({},9,'UTC')", datetime_str); + ++pos; + return true; +} + +bool DatatypeDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String guid_str; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier) + guid_str = std::format("'{}'", String(pos->begin+1, pos->end -1)); + else if (pos->type == TokenType::StringLiteral) + guid_str = String(pos->begin, pos->end); + else + { auto start = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + if (pos->type == TokenType::ClosingRoundBracket) + break; + } + --pos; + guid_str = std::format("'{}'",String(start->begin,pos->end)); + } + out = guid_str; + ++pos; + return true; +} + +bool DatatypeInt::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatatypeLong::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatatypeReal::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatatypeString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatatypeTimespan::convertImpl(String &out,IParser::Pos &pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + + out = getConvertedArgument(fn_name, pos); + return true; +} + +bool DatatypeDecimal::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h new file mode 100644 index 000000000000..325fb3457ffa --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h @@ -0,0 +1,78 @@ +#pragma once + +#include +#include +namespace DB +{ +class DatatypeBool : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bool(),boolean()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDatetime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime(),date()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "guid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeInt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "int()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeLong : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "long()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeReal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "real(),double()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "string()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeTimespan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "timespan(), time()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDecimal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "decimal()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 0f098cbebda3..3b00ccbceb8f 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -24,13 +25,13 @@ bool TimeSpan::convertImpl(String &out,IParser::Pos &pos) out = res; return false; } - +/* bool DateTime::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; return false; -} +}*/ bool Ago::convertImpl(String &out,IParser::Pos &pos) { @@ -153,9 +154,19 @@ bool MakeDateTime::convertImpl(String &out,IParser::Pos &pos) bool Now::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + { + const auto offset = getConvertedArgument(fn_name, pos); + out = std::format("now('UTC') + {}", offset); + } + else + out = "now('UTC')"; + return true; } bool StartOfDay::convertImpl(String &out,IParser::Pos &pos) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h index 7627465ab5bc..ee87be15eda7 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -11,13 +11,13 @@ class TimeSpan : public IParserKQLFunction const char * getName() const override { return "timespan()"; } bool convertImpl(String &out,IParser::Pos &pos) override; }; - +/* class DateTime : public IParserKQLFunction { protected: const char * getName() const override { return "datetime()"; } bool convertImpl(String &out,IParser::Pos &pos) override; -}; +};*/ class Ago : public IParserKQLFunction { diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index 25e0c2af2f91..c66bfd606473 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -15,12 +15,12 @@ #include #include #include +#include namespace DB { std::unordered_map KQLFunctionFactory::kql_functions = { - {"datetime", KQLFunctionValue::datetime}, {"ago", KQLFunctionValue::ago}, {"datetime_add", KQLFunctionValue::datetime_add}, {"datetime_part", KQLFunctionValue::datetime_part}, @@ -202,7 +202,24 @@ namespace DB {"binary_shift_right", KQLFunctionValue::binary_shift_right}, {"binary_xor", KQLFunctionValue::binary_xor}, {"bitset_count_ones", KQLFunctionValue::bitset_count_ones}, - {"bin", KQLFunctionValue::bin} + + {"bin", KQLFunctionValue::bin}, + {"bin_at", KQLFunctionValue::bin_at}, + + {"bool", KQLFunctionValue::datatype_bool}, + {"boolean", KQLFunctionValue::datatype_bool}, + {"datetime", KQLFunctionValue::datatype_datetime}, + {"date", KQLFunctionValue::datatype_datetime}, + {"dynamic", KQLFunctionValue::datatype_dynamic}, + {"guid", KQLFunctionValue::datatype_guid}, + {"int", KQLFunctionValue::datatype_int}, + {"long", KQLFunctionValue::datatype_long}, + {"real", KQLFunctionValue::datatype_real}, + {"double", KQLFunctionValue::datatype_real}, + {"string", KQLFunctionValue::datatype_string}, + {"timespan", KQLFunctionValue::datatype_timespan}, + {"time", KQLFunctionValue::datatype_timespan}, + {"decimal", KQLFunctionValue::datatype_decimal} }; @@ -220,8 +237,8 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::timespan: return std::make_unique(); - case KQLFunctionValue::datetime: - return std::make_unique(); + // case KQLFunctionValue::datetime: + // return std::make_unique(); case KQLFunctionValue::ago: return std::make_unique(); @@ -732,6 +749,40 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::bin: return std::make_unique(); + + case KQLFunctionValue::bin_at: + return std::make_unique(); + + case KQLFunctionValue::datatype_bool: + return std::make_unique(); + + case KQLFunctionValue::datatype_datetime: + return std::make_unique(); + + case KQLFunctionValue::datatype_dynamic: + return std::make_unique(); + + case KQLFunctionValue::datatype_guid: + return std::make_unique(); + + case KQLFunctionValue::datatype_int: + return std::make_unique(); + + case KQLFunctionValue::datatype_long: + return std::make_unique(); + + case KQLFunctionValue::datatype_real: + return std::make_unique(); + + case KQLFunctionValue::datatype_string: + return std::make_unique(); + + case KQLFunctionValue::datatype_timespan: + return std::make_unique(); + + case KQLFunctionValue::datatype_decimal: + return std::make_unique(); + } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index 7c5f0d547345..7cbb0877c909 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -8,7 +8,7 @@ namespace DB enum class KQLFunctionValue : uint16_t { none, timespan, - datetime, + // datetime, ago, datetime_add, datetime_part, @@ -186,7 +186,19 @@ namespace DB binary_xor, bitset_count_ones, - bin + bin, + bin_at, + + datatype_bool, + datatype_datetime, + datatype_dynamic, + datatype_guid, + datatype_int, + datatype_long, + datatype_real, + datatype_string, + datatype_timespan, + datatype_decimal }; class KQLFunctionFactory { diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp index 253292a7d9df..714265633d5e 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -14,6 +14,8 @@ #include #include #include +#include +#include namespace DB { @@ -25,4 +27,40 @@ bool Bin::convertImpl(String &out,IParser::Pos &pos) return false; } +bool BinAt::convertImpl(String & out,IParser::Pos & pos) +{ + ParserKQLDateTypeTimespan time_span; + double bin_size; + + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + String expression_str = getConvertedArgument(fn_name, pos); + ++pos; + String bin_size_str = getConvertedArgument(fn_name, pos); + ++pos; + String fixed_point_str = getConvertedArgument(fn_name, pos); + + bin_size_str = bin_size_str.substr(0, bin_size_str.size()-1); + + auto t1 = std::format("toFloat64({})", fixed_point_str); + auto t2 = std::format("toFloat64({})", expression_str); + int dir = t2 >= t1 ? 0 : -1; + + if (time_span.parseConstKQLTimespan(bin_size_str)) + { + bin_size = time_span.toSeconds(); + + out = std::format("toDateTime64({} + toInt64(({} -{}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); + } + else + { + bin_size = std::stod(bin_size_str); + out = std::format("{} + toInt64(({} -{}) / {} + {}) * {}", t1, t2, t1, bin_size, dir, bin_size); + } + return true; +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h index 802fd152333f..76ead441dfcd 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h @@ -11,5 +11,12 @@ class Bin : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; +class BinAt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bin_at()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index c2d1bd251da8..767075987885 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -21,9 +21,16 @@ bool Base64EncodeToString::convertImpl(String & out,IParser::Pos & pos) bool Base64EncodeFromGuid::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String guid = getConvertedArgument(fn_name, pos); + + out = std::format("base64Encode({})", guid); + + return true; } bool Base64DecodeToString::convertImpl(String & out,IParser::Pos & pos) @@ -33,16 +40,21 @@ bool Base64DecodeToString::convertImpl(String & out,IParser::Pos & pos) bool Base64DecodeToArray::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String str = getConvertedArgument(fn_name, pos); + + out = std::format("arrayMap(x -> (reinterpretAsUInt8(x)), splitByRegexp ('',base64Decode({})))", str); + + return true; } bool Base64DecodeToGuid::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"base64Decode"); } bool CountOf::convertImpl(String & out, IParser::Pos & pos) @@ -209,9 +221,19 @@ bool ExtractJson::convertImpl(String & out,IParser::Pos & pos) bool HasAnyIndex::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String lookup = getConvertedArgument(fn_name, pos); + String src_array = std::format("splitByChar(' ',{})", source); + out = std::format("if (empty({1}), -1, indexOf(arrayMap ( x -> (x in {0}), if (empty({1}),[''], arrayMap(x->(toString(x)),{1}))),1) - 1)", + src_array, lookup); + return true; } bool IndexOf::convertImpl(String & out,IParser::Pos & pos) @@ -508,9 +530,23 @@ bool ToUpper::convertImpl(String & out,IParser::Pos & pos) bool Translate::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String from = getConvertedArgument(fn_name, pos); + ++pos; + String to = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + + String len_diff = std::format("length({}) - length({})", from, to); + String to_str = std::format("multiIf(length({1}) = 0, {0}, {2} > 0, concat({1},repeat(substr({1},length({1}),1),toUInt16({2}))),{2} < 0 , substr({1},1,length({0})),{1})", + from, to, len_diff); + out = std::format("if (length({3}) = 0,'',translate({0},{1},{2}))", source, from, to_str, to); + return true; } bool Trim::convertImpl(String & out,IParser::Pos & pos) diff --git a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp new file mode 100644 index 000000000000..d83ef4e2f537 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLDateTypeTimespan :: parseImpl(Pos & pos, [[maybe_unused]] ASTPtr & node, Expected & expected) +{ + const String token(pos->begin,pos->end); + const char * current_word = pos->begin; + expected.add(pos, current_word); + + if (!parseConstKQLTimespan(token)) + return false; + + return true; +} + +double ParserKQLDateTypeTimespan :: toSeconds() +{ + switch (time_span_unit) + { + case KQLTimespanUint::day: + return time_span * 24 * 60 * 60; + case KQLTimespanUint::hour: + return time_span * 60 * 60; + case KQLTimespanUint::minute: + return time_span * 60; + case KQLTimespanUint::second: + return time_span ; + case KQLTimespanUint::millisec: + return time_span / 1000.0; + case KQLTimespanUint::microsec: + return time_span / 1000000.0; + case KQLTimespanUint::nanosec: + return time_span / 1000000000.0; + case KQLTimespanUint::tick: + return time_span / 10000000000.0; + } +} + +bool ParserKQLDateTypeTimespan :: parseConstKQLTimespan(const String & text) +{ + std::unordered_map TimespanSuffixes = + { + {"d", KQLTimespanUint::day}, + {"day", KQLTimespanUint::day}, + {"days", KQLTimespanUint::day}, + {"h", KQLTimespanUint::hour}, + {"hr", KQLTimespanUint::hour}, + {"hrs", KQLTimespanUint::hour}, + {"hour", KQLTimespanUint::hour}, + {"hours", KQLTimespanUint::hour}, + {"m", KQLTimespanUint::minute}, + {"min", KQLTimespanUint::minute}, + {"minute", KQLTimespanUint::minute}, + {"minutes", KQLTimespanUint::minute}, + {"s", KQLTimespanUint::second}, + {"sec", KQLTimespanUint::second}, + {"second", KQLTimespanUint::second}, + {"seconds", KQLTimespanUint::second}, + {"ms", KQLTimespanUint::millisec}, + {"milli", KQLTimespanUint::millisec}, + {"millis", KQLTimespanUint::millisec}, + {"millisec", KQLTimespanUint::millisec}, + {"millisecond", KQLTimespanUint::millisec}, + {"milliseconds", KQLTimespanUint::millisec}, + {"micro", KQLTimespanUint::microsec}, + {"micros", KQLTimespanUint::microsec}, + {"microsec", KQLTimespanUint::microsec}, + {"microsecond", KQLTimespanUint::microsec}, + {"microseconds", KQLTimespanUint::microsec}, + {"nano", KQLTimespanUint::nanosec}, + {"nanos", KQLTimespanUint::nanosec}, + {"nanosec", KQLTimespanUint::nanosec}, + {"nanosecond", KQLTimespanUint::nanosec}, + {"nanoseconds", KQLTimespanUint::nanosec}, + {"tick", KQLTimespanUint::tick}, + {"ticks", KQLTimespanUint::tick} + }; + + + const char * ptr = text.c_str(); + + auto scanDigit = [&](const char *start) + { + auto index = start; + while (isdigit(*index)) + ++index; + return index > start ? index - start : -1; + }; + + int number_len = scanDigit(ptr); + if (number_len <= 0) + return false; + + if (*(ptr + number_len) == '.') + { + auto fractionLen = scanDigit(ptr + number_len + 1); + if (fractionLen >= 0) + { + number_len += fractionLen + 1; + } + } + + String timespan_suffix(ptr + number_len, ptr+text.size()); + if (TimespanSuffixes.find(timespan_suffix) == TimespanSuffixes.end()) + return false; + + time_span = std::stod(String(ptr, ptr + number_len)); + time_span_unit =TimespanSuffixes[timespan_suffix] ; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.h b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.h new file mode 100644 index 000000000000..11c74ddedadd --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLDateTypeTimespan : public ParserKQLBase +{ +public: + enum class KQLTimespanUint: uint8_t + { + day, + hour, + minute, + second, + millisec, + microsec, + nanosec, + tick + }; + bool parseConstKQLTimespan(const String &text); + double toSeconds(); + +protected: + const char * getName() const override { return "KQLDateTypeTimespan"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + double time_span; + KQLTimespanUint time_span_unit; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp new file mode 100644 index 000000000000..0c658b0ba7f1 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -0,0 +1,286 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLMakeSeries :: parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos) +{ + std::unordered_set allowed_aggregation + ({ + "avg", + "avgif", + "count", + "countif", + "dcount", + "dcountif", + "max", + "maxif", + "min", + "minif", + "percentile", + "take_any", + "stdev", + "sum", + "sumif", + "variance" + }); + + Expected expected; + ParserKeyword s_default("default"); + ParserToken equals(TokenType::Equals); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + ParserToken comma(TokenType::Comma); + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + String alias; + String aggregation_fun; + String column; + double default_value = 0; + + String first_token(pos->begin,pos->end); + + ++pos; + if (equals.ignore(pos, expected)) + { + alias = std::move(first_token); + aggregation_fun = String(pos->begin,pos->end); + } + else + aggregation_fun = std::move(first_token); + + if (allowed_aggregation.find(aggregation_fun) == allowed_aggregation.end()) + return false; + + ++pos; + if (open_bracket.ignore(pos, expected)) + column = String(pos->begin,pos->end); + else + return false; + + ++pos; + if (!close_bracket.ignore(pos, expected)) + return false; + + if (s_default.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + + default_value = std::stod(String(pos->begin,pos->end)); + ++pos; + } + if (alias.empty()) + alias = std::format("{}_{}", aggregation_fun, column); + aggregation_columns.push_back(AggregationColumn(alias, aggregation_fun, column, default_value)); + + if (!comma.ignore(pos, expected)) + break; + } + return true; +} + +bool ParserKQLMakeSeries :: parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos) +{ + auto begin = pos; + auto from_pos = begin; + auto to_pos = begin; + auto step_pos = begin; + auto end_pos = begin; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if ( String(pos->begin, pos->end) == "from") + from_pos = pos; + if ( String(pos->begin, pos->end) == "to") + to_pos = pos; + if ( String(pos->begin, pos->end) == "step") + step_pos = pos; + if ( String(pos->begin, pos->end) == "by") + { + end_pos = pos; + break; + } + ++pos; + } + + if (end_pos == begin) + end_pos = pos; + + if (step_pos == begin) + return false; + + if (String(from_pos->begin, from_pos->end) == "from") + { + ++from_pos; + auto end_from_pos = (to_pos != begin) ? to_pos : step_pos; + --end_from_pos; + from_to_step.from = String(from_pos->begin, end_from_pos->end); + } + + if (to_pos != begin) + { ++to_pos; + --step_pos; + from_to_step.to = String(to_pos->begin, step_pos->end); + ++step_pos; + ++step_pos; + } + --end_pos; + from_to_step.step = String(step_pos->begin, end_pos->end); + return true; +} + + +bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + + pos = op_pos.back(); + + String axis_column; + String group_expression; + + ParserKeyword s_on("on"); + ParserKeyword s_by("by"); + + ParserToken equals(TokenType::Equals); + ParserToken comma(TokenType::Comma); + + AggregationColumns aggregation_columns; + FromToStepClause from_to_step; + + ParserKQLDateTypeTimespan time_span; + + //const auto make_series_parameters = getMakeSeriesParameters(pos); + + if (!parseAggregationColumns(aggregation_columns, pos)) + return false; + + if (!s_on.ignore(pos, expected)) + return false; + + axis_column = String(pos->begin, pos->end); + ++pos; + + if (!parseFromToStepClause(from_to_step, pos)) + return false; + + // 'on' statement parameter, expecting scalar value of type 'int', 'long', 'real', 'datetime' or 'timespan'. + + if (s_by.ignore(pos, expected)) + { + group_expression = getExprFromToken(pos); + if (group_expression.empty()) + return false; + } + + String subquery_columns; + + for (auto agg_column : aggregation_columns) + { + String column_str = std::format("{}({}) AS {}_ali", agg_column.aggregation_fun, agg_column.column, agg_column.alias); + if (subquery_columns.empty()) + subquery_columns = column_str; + else + subquery_columns += ", "+ column_str; + } + + ASTPtr sub_qurery_table; + double step; + String sub_query ; + String main_query ; + String group_by; + + String start_str = getExprFromToken(from_to_step.from, pos.max_depth); + String end_str = getExprFromToken(from_to_step.to, pos.max_depth); + String step_str = from_to_step.step; + + if (time_span.parseConstKQLTimespan(step_str)) + { + step = time_span.toSeconds(); + + auto bin_str = std::format(" toUInt64(toFloat64(toDateTime64({},6,'UTC')) / {}) * {} AS {}_ali ", axis_column, step,step, axis_column); + auto sub_sub_query = std::format(" (Select {},{}, {} FROM {} GROUP BY {},{}_ali ORDER BY {}_ali) ", group_expression, subquery_columns, bin_str, table_name, group_expression, axis_column, axis_column); + + auto start = std::format("toUInt64(toDateTime64({},6,'UTC'))", start_str); + auto end = std::format("toUInt64(toDateTime64({},6,'UTC'))", end_str); + auto range = std::format("range({},{}, toUInt64({}))", start, end, step); + auto range_len = std::format("length({})", range); + main_query = std::format("{} ", group_expression); + + auto axis_and_agg_alias_list = axis_column; + auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}",axis_column); + int idx = 2; + for (auto agg_column : aggregation_columns) + { + String agg_group_column = std::format("arrayConcat(groupArrayIf ({}_ali,{}_ali >= {} and {}_ali <= {}) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", + agg_column.alias, axis_column, start, axis_column, end, agg_column.default_value, range_len, range_len, agg_column.alias); + main_query +=", " + agg_group_column; + + axis_and_agg_alias_list +=", " + agg_column.alias; + final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); + } + auto axis_str = std::format("arrayDistinct(arrayConcat(groupArrayIf(toDateTime64({}_ali,6,'UTC'),{}_ali >= {} and {}_ali <= {}), arrayMap( x->(toDateTime64(x,6,'UTC')), {}) )) as {}", + axis_column, axis_column, start, axis_column, end, range, axis_column); + + main_query += ", " + axis_str; + auto sub_group_by = std::format("{}", group_expression); + + sub_query = std::format("( SELECT min({}_ali) AS low,max({}_ali) AS high, arraySort(arrayZip({})) as zipped, {} FROM {} GROUP BY {} )", + axis_column, axis_column,axis_and_agg_alias_list,main_query,sub_sub_query, sub_group_by); + + main_query = std::format("{},{}", group_expression, final_axis_agg_alias_list); + + } + else + { + step = stod(step_str); + + sub_query = std::format("kql( {} | summarize {}, {} = toint({} / {}) * {} by {},{} )", + table_name, subquery_columns, axis_column, axis_column, step, subquery_columns, axis_column); + } + + Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserTablesInSelectQuery().parse(pos_subquery, sub_qurery_table, expected)) + return false; + tables = std::move(sub_qurery_table); + + String converted_columns = main_query; + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, node, expected)) + return false; + + if (!group_by.empty()) + { + String converted_groupby = group_by; + + Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); + IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); + + if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) + return false; + } + + pos = begin; + return true; + +} + +} diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.h b/src/Parsers/Kusto/ParserKQLMakeSeries.h new file mode 100644 index 000000000000..b30155b1bd81 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.h @@ -0,0 +1,45 @@ + +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLMakeSeries : public ParserKQLBase +{ +public: + ASTPtr group_expression_list; + ASTPtr tables; + void setTableName(String table_name_) {table_name = table_name_;} + +protected: + struct AggregationColumn { + String alias; + String aggregation_fun; + String column; + double default_value; + AggregationColumn(String alias_, String aggregation_fun_, String column_, double default_value_ ) + :alias(alias_), aggregation_fun(aggregation_fun_), column(column_), default_value(default_value_){} + }; + using AggregationColumns = std::vector; + + struct FromToStepClause { + String from; + String to; + String step; + }; + + bool parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos); + bool parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos); + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +private: + String table_name; +}; + +} + + + diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 7f00a76fa726..bfa52368c168 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -26,6 +26,14 @@ bool ParserKQLBase :: parsePrepare(Pos & pos) return true; } +String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) +{ + Tokens tokens(text.c_str(), text.c_str() + text.size()); + IParser::Pos pos(tokens, max_depth); + + return getExprFromToken(pos); +} + String ParserKQLBase :: getExprFromToken(Pos &pos) { String res; diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 2cfec703fd4f..ac8715ae894b 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -9,7 +9,7 @@ class ParserKQLBase : public IParserBase public: virtual bool parsePrepare(Pos & pos); virtual String getExprFromToken(Pos &pos); - + virtual String getExprFromToken(const String & text, const uint32_t & max_depth); std::vector op_pos; }; From 34fe5840f234fed45b9fcbb8a92a5bc8e0f35963 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Tue, 9 Aug 2022 10:02:09 -0700 Subject: [PATCH 082/342] Update unit tests for IP functions --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 4 ++-- src/Parsers/tests/KQL/gtest_KQL_IP.cpp | 20 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 47d01e42ae3e..765912ff9361 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -108,7 +108,7 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) for (int i = 0; i < std::ssize(s_private_subnets); ++i) { if (i > 0) - out += " or"; + out += " or "; const auto & subnet = s_private_subnets[i]; out += std::format( @@ -118,7 +118,7 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) unique_identifier); } - out += ")"; + out.push_back(')'); return true; } diff --git a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp index cf1d08381760..e6338ab52184 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -29,35 +29,35 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserRegexTest, ::testing::ValuesIn(std::initializer_list{ { "print format_ipv4(A)", - "SELECT ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(32 < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\)" + "SELECT ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(32 < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\)" }, { "print format_ipv4(A, B)", - "SELECT ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(B < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - B\\) - 1\\)\\)\\)\\)\\), ''\\)" + "SELECT ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(B < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - B\\) - 1\\)\\)\\)\\)\\), ''\\)" }, { "print format_ipv4_mask(A)", - "SELECT if\\(empty\\(ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(32 < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\) AS formatted_ip_\\d+\\) OR \\(NOT \\(\\(32 >= 0\\) AND \\(32 <= 32\\)\\)\\), '', concat\\(formatted_ip_\\d+, '/', toString\\(32\\)\\)\\)" + "SELECT if\\(empty\\(ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(32 < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\) AS formatted_ip_\\d+\\) OR \\(NOT \\(\\(32 >= 0\\) AND \\(32 <= 32\\)\\)\\), '', concat\\(formatted_ip_\\d+, '/', toString\\(32\\)\\)\\)" }, { "print format_ipv4_mask(A, B)", - "SELECT if\\(empty\\(ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(B < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - B\\) - 1\\)\\)\\)\\)\\), ''\\) AS formatted_ip_\\d+\\) OR \\(NOT \\(\\(B >= 0\\) AND \\(B <= 32\\)\\)\\), '', concat\\(formatted_ip_\\d+, '/', toString\\(B\\)\\)\\)" + "SELECT if\\(empty\\(ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(B < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - B\\) - 1\\)\\)\\)\\)\\), ''\\) AS formatted_ip_\\d+\\) OR \\(NOT \\(\\(B >= 0\\) AND \\(B <= 32\\)\\)\\), '', concat\\(formatted_ip_\\d+, '/', toString\\(B\\)\\)\\)" }, { "print ipv4_compare(A, B)", - "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" + "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" }, { "print ipv4_compare(A, B, C)", - "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" + "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" }, { "print ipv4_is_match(A, B)", - "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" + "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" }, { "print ipv4_is_match(A, B, C)", - "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" + "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" }, { "print parse_ipv4_mask(A, B)", @@ -65,11 +65,11 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserRegexTest, }, { "print ipv4_is_in_range(A, B)", - "SELECT multiIf\\(\\(IPv4StringToNumOrNull\\(A\\) AS ip_\\d+\\) IS NULL, NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS range_start_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS range_mask_\\d+\\) IS NULL\\), NULL, bitXor\\(range_start_ip_\\d+, bitAnd\\(ip_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - range_mask_\\d+\\) - 1\\)\\)\\)\\) = 0\\)" + "SELECT if\\(\\(\\(IPv4StringToNumOrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS range_start_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS range_mask_\\d+\\) IS NULL\\), NULL, bitXor\\(range_start_ip_\\d+, bitAnd\\(ip_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - range_mask_\\d+\\) - 1\\)\\)\\)\\) = 0\\)" }, { "print ipv4_is_private(A)", - "SELECT multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(toIPv4OrNull\\(tokens_\\d+\\[1\\]\\) AS nullable_ip_\\d+\\) IS NULL\\), NULL, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\), NULL, ignore\\(assumeNotNull\\(nullable_ip_\\d+\\) AS ip_\\d+, IPv4CIDRToRange\\(ip_\\d+, assumeNotNull\\(mask_\\d+\\)\\) AS range_\\d+, IPv4NumToString\\(range_\\d+.1\\) AS begin_\\d+, IPv4NumToString\\(range_\\d+.2\\) AS end_\\d+\\), NULL, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '10.0.0.0/8'\\)\\) OR \\(isIPAddressInRange\\(begin_\\d+, '10.0.0.0/8'\\) AND isIPAddressInRange\\(end_\\d+, '10.0.0.0/8'\\)\\), true, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '172.16.0.0/12'\\)\\) OR \\(isIPAddressInRange\\(begin_\\d+, '172.16.0.0/12'\\) AND isIPAddressInRange\\(end_\\d+, '172.16.0.0/12'\\)\\), true, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '192.168.0.0/16'\\)\\) OR \\(isIPAddressInRange\\(begin_\\d+, '192.168.0.0/16'\\) AND isIPAddressInRange\\(end_\\d+, '192.168.0.0/16'\\)\\), true, false\\)" + "SELECT multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(toIPv4OrNull\\(tokens_\\d+\\[1\\]\\) AS nullable_ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, ignore\\(assumeNotNull\\(nullable_ip_\\d+\\) AS ip_\\d+, IPv4CIDRToRange\\(ip_\\d+, assumeNotNull\\(mask_\\d+\\)\\) AS range_\\d+, IPv4NumToString\\(range_\\d+.1\\) AS begin_\\d+, IPv4NumToString\\(range_\\d+.2\\) AS end_\\d+\\), NULL, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '10.0.0.0/8'\\)\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND isIPAddressInRange\\(begin_\\d+, '10.0.0.0/8'\\) AND isIPAddressInRange\\(end_\\d+, '10.0.0.0/8'\\)\\) OR \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '172.16.0.0/12'\\)\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND isIPAddressInRange\\(begin_\\d+, '172.16.0.0/12'\\) AND isIPAddressInRange\\(end_\\d+, '172.16.0.0/12'\\)\\) OR \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '192.168.0.0/16'\\)\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND isIPAddressInRange\\(begin_\\d+, '192.168.0.0/16'\\) AND isIPAddressInRange\\(end_\\d+, '192.168.0.0/16'\\)\\)\\)" }, { "print ipv4_netmask_suffix(A)", From 62da88c1e4044fded0ef278668574f9ba0070f92 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Tue, 9 Aug 2022 09:40:35 -0700 Subject: [PATCH 083/342] Date_Time functions PART 1 --- .../KustoFunctions/KQLDateTimeFunctions.cpp | 138 +++++++++++++----- .../KustoFunctions/KQLDateTimeFunctions.h | 11 +- .../KustoFunctions/KQLFunctionFactory.cpp | 13 +- .../Kusto/KustoFunctions/KQLFunctionFactory.h | 5 +- 4 files changed, 120 insertions(+), 47 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 3b00ccbceb8f..aea1bc3127af 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -63,23 +63,23 @@ bool DatetimeDiff::convertImpl(String &out,IParser::Pos &pos) bool DayOfMonth::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toDayOfMonth"); } bool DayOfWeek::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + + out = std::format("toDayOfWeek() + %7"); + return true; } bool DayOfYear::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toDayOfYear"); } bool EndOfDay::convertImpl(String &out,IParser::Pos &pos) @@ -119,23 +119,17 @@ bool FormatTimeSpan::convertImpl(String &out,IParser::Pos &pos) bool GetMonth::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toMonth"); } bool GetYear::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toYear"); } bool HoursOfDay::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toHour"); } bool MakeTimeSpan::convertImpl(String &out,IParser::Pos &pos) @@ -162,39 +156,91 @@ bool Now::convertImpl(String &out,IParser::Pos &pos) if (pos->type != TokenType::ClosingRoundBracket) { const auto offset = getConvertedArgument(fn_name, pos); - out = std::format("now('UTC') + {}", offset); + out = std::format("now64(9,'UTC') + {}", offset); } else - out = "now('UTC')"; + out = "now64(9,'UTC')"; return true; } bool StartOfDay::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + + } + out = std::format("date_add(DAY,{}, toDateTime64((toStartOfDay({})) , 9 , 'UTC')) ", offset, datetime_str); + return true; } bool StartOfMonth::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + + } + out = std::format("date_add(MONTH,{}, toDateTime64((toStartOfMonth({})) , 9 , 'UTC')) ", offset, datetime_str); + return true; } bool StartOfWeek::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + + } + out = std::format("date_add(Week,{}, toDateTime64((toStartOfWeek({})) , 9 , 'UTC')) ", offset, datetime_str); + return true; } bool StartOfYear::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset ; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + } + out = std::format("date_add(YEAR,{}, toDateTime64((toStartOfYear({}, 'UTC')) , 9 , 'UTC'))", offset, datetime_str); + return true; } bool UnixTimeMicrosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) @@ -220,16 +266,32 @@ bool UnixTimeNanosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) bool UnixTimeSecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String value = getConvertedArgument(fn_name, pos); + out = std::format("toDateTime64({},9,'UTC')", value); + return true; } bool WeekOfYear::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + const String time_str = getConvertedArgument(fn_name, pos); + out = std::format("toWeek({},3,'UTC')", time_str); + return true; +} + +bool MonthOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + + return directMapping(out, pos, "toMonth"); } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h index ee87be15eda7..adf95a39a64a 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -120,7 +120,7 @@ class GetYear : public IParserKQLFunction class HoursOfDay : public IParserKQLFunction { protected: - const char * getName() const override { return "hoursofday()"; } + const char * getName() const override { return "hourofday()"; } bool convertImpl(String &out,IParser::Pos &pos) override; }; @@ -204,7 +204,14 @@ class UnixTimeSecondsToDateTime : public IParserKQLFunction class WeekOfYear : public IParserKQLFunction { protected: - const char * getName() const override { return "weekofyear()"; } + const char * getName() const override { return "week_of_year()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MonthOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "monthofyear()"; } bool convertImpl(String &out,IParser::Pos &pos) override; }; diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index c66bfd606473..075d56d9608a 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -35,7 +35,7 @@ namespace DB {"format_timespan", KQLFunctionValue::format_timespan}, {"getmonth", KQLFunctionValue::getmonth}, {"getyear", KQLFunctionValue::getyear}, - {"hoursofday", KQLFunctionValue::hoursofday}, + {"hourofday", KQLFunctionValue::hourofday}, {"make_timespan", KQLFunctionValue::make_timespan}, {"make_datetime", KQLFunctionValue::make_datetime}, {"now", KQLFunctionValue::now}, @@ -49,8 +49,8 @@ namespace DB {"unixtime_milliseconds_todatetime", KQLFunctionValue::unixtime_milliseconds_todatetime}, {"unixtime_nanoseconds_todatetime", KQLFunctionValue::unixtime_nanoseconds_todatetime}, {"unixtime_seconds_todatetime", KQLFunctionValue::unixtime_seconds_todatetime}, - {"weekofyear", KQLFunctionValue::weekofyear}, - + {"week_of_year", KQLFunctionValue::week_of_year}, + {"monthofyear", KQLFunctionValue::monthofyear}, {"base64_encode_tostring", KQLFunctionValue::base64_encode_tostring}, {"base64_encode_fromguid", KQLFunctionValue::base64_encode_fromguid}, {"base64_decode_tostring", KQLFunctionValue::base64_decode_tostring}, @@ -269,6 +269,9 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::endofyear: return std::make_unique(); + + case KQLFunctionValue::monthofyear: + return std::make_unique(); case KQLFunctionValue::format_datetime: return std::make_unique(); @@ -282,7 +285,7 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::getyear: return std::make_unique(); - case KQLFunctionValue::hoursofday: + case KQLFunctionValue::hourofday: return std::make_unique(); case KQLFunctionValue::make_timespan: @@ -318,7 +321,7 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::unixtime_seconds_todatetime: return std::make_unique(); - case KQLFunctionValue::weekofyear: + case KQLFunctionValue::week_of_year: return std::make_unique(); case KQLFunctionValue::base64_encode_tostring: diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index 7cbb0877c909..ed7479641756 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -19,11 +19,12 @@ namespace DB endofday, endofweek, endofyear, + monthofyear, format_datetime, format_timespan, getmonth, getyear, - hoursofday, + hourofday, make_timespan, make_datetime, now, @@ -37,7 +38,7 @@ namespace DB unixtime_milliseconds_todatetime, unixtime_nanoseconds_todatetime, unixtime_seconds_todatetime, - weekofyear, + week_of_year, base64_encode_tostring, base64_encode_fromguid, From fa34279881fba6925668e2ba1eccc9dfac13158e Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Wed, 10 Aug 2022 13:04:07 -0700 Subject: [PATCH 084/342] Added test and review comments --- src/Parsers/Kusto/KQL_ReleaseNote.md | 60 +++++++++ .../KustoFunctions/KQLDateTimeFunctions.cpp | 101 +++++++------- .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 126 ++++++++++++++++++ 3 files changed, 237 insertions(+), 50 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index b37b991a2a26..0f475654d5f7 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,4 +1,64 @@ # August XX, 2022 +- **DateTimeFunctions** +- [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) + `print startofyear(datetime(2017-01-01 10:10:17), -1)` + `print startofyear(datetime(2017-01-01 10:10:17), 0)` + `print startofyear(datetime(2017-01-01 10:10:17), 1)` +- [weekofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/weekofyearfunction) + `print week_of_year(datetime(2020-12-31))` + `print week_of_year(datetime(2020-06-15))` + `print week_of_year(datetime(1970-01-01))` + `print week_of_year(datetime(2000-01-01))` + +- [startofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofweekfunction) + `print startofweek(datetime(2017-01-01 10:10:17), -1)` + `print startofweek(datetime(2017-01-01 10:10:17), 0)` + `print startofweek(datetime(2017-01-01 10:10:17), 1)` + +- [startofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofmonthfunction) + `print startofmonth(datetime(2017-01-01 10:10:17), -1)` + `print startofmonth(datetime(2017-01-01 10:10:17), 0)` + `print startofmonth(datetime(2017-01-01 10:10:17), 1)` + +- [startofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofdayfunction) + `print startofday(datetime(2017-01-01 10:10:17), -1)` + `print startofday(datetime(2017-01-01 10:10:17), 0)` + `print startofday(datetime(2017-01-01 10:10:17), 1)` + +- [monthofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/monthofyearfunction) + `print monthofyear(datetime("2015-12-14"))` + +- [hourofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/hourofdayfunction) + `print hourofday(datetime(2015-12-14 18:54:00))` + +- [getyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getyearfunction) + `print getyear(datetime(2015-10-12))` + +- [getmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getmonthfunction) + `print getmonth(datetime(2015-10-12))` + +- [dayofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofyearfunction) + `print dayofyear(datetime(2015-12-14))` + +- [dayofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofmonthfunction) + `print (datetime(2015-12-14))` + +- [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) + `print unixtime_seconds_todatetime(1546300800)` + +- [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) + `print dayofweek(datetime(2015-12-20))` + +- [now](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/nowfunction) + `print now()` + `print now(2d)` + `print now(-2h)` + `print now(5 microseconds)` + `print now(5 seconds)` + `print now(6minutes)` + `print now(-2d) ` + `print now(time(1d))` + ## KQL implemented features The config setting to allow modify dialect setting. - Set dialect setting in server configuration XML at user level(` users.xml `). This sets the ` dialect ` at server startup and CH will do query parsing for all users with ` default ` profile acording to dialect value. diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index aea1bc3127af..58d8536fb49b 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -19,134 +19,136 @@ namespace DB { -bool TimeSpan::convertImpl(String &out,IParser::Pos &pos) +bool TimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } /* -bool DateTime::convertImpl(String &out,IParser::Pos &pos) +bool DateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; }*/ -bool Ago::convertImpl(String &out,IParser::Pos &pos) +bool Ago::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool DatetimeAdd::convertImpl(String &out,IParser::Pos &pos) +bool DatetimeAdd::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; }; -bool DatetimePart::convertImpl(String &out,IParser::Pos &pos) +bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool DatetimeDiff::convertImpl(String &out,IParser::Pos &pos) +bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool DayOfMonth::convertImpl(String &out,IParser::Pos &pos) +bool DayOfMonth::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "toDayOfMonth"); } -bool DayOfWeek::convertImpl(String &out,IParser::Pos &pos) +bool DayOfWeek::convertImpl(String & out, IParser::Pos & pos) { const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; ++pos; - out = std::format("toDayOfWeek() + %7"); + const String datetime_str = getConvertedArgument(fn_name, pos); + + out = std::format("toDayOfWeek({})%7",datetime_str); return true; } -bool DayOfYear::convertImpl(String &out,IParser::Pos &pos) +bool DayOfYear::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "toDayOfYear"); } -bool EndOfDay::convertImpl(String &out,IParser::Pos &pos) +bool EndOfDay::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool EndOfWeek::convertImpl(String &out,IParser::Pos &pos) +bool EndOfWeek::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool EndOfYear::convertImpl(String &out,IParser::Pos &pos) +bool EndOfYear::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool FormatDateTime::convertImpl(String &out,IParser::Pos &pos) +bool FormatDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool FormatTimeSpan::convertImpl(String &out,IParser::Pos &pos) +bool FormatTimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool GetMonth::convertImpl(String &out,IParser::Pos &pos) +bool GetMonth::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "toMonth"); } -bool GetYear::convertImpl(String &out,IParser::Pos &pos) +bool GetYear::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "toYear"); } -bool HoursOfDay::convertImpl(String &out,IParser::Pos &pos) +bool HoursOfDay::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "toHour"); } -bool MakeTimeSpan::convertImpl(String &out,IParser::Pos &pos) +bool MakeTimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool MakeDateTime::convertImpl(String &out,IParser::Pos &pos) +bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Now::convertImpl(String &out,IParser::Pos &pos) +bool Now::convertImpl(String & out, IParser::Pos & pos) { const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) @@ -163,7 +165,7 @@ bool Now::convertImpl(String &out,IParser::Pos &pos) return true; } -bool StartOfDay::convertImpl(String &out,IParser::Pos &pos) +bool StartOfDay::convertImpl(String & out, IParser::Pos & pos) { const String fn_name = getKQLFunctionName(pos); @@ -184,7 +186,7 @@ bool StartOfDay::convertImpl(String &out,IParser::Pos &pos) return true; } -bool StartOfMonth::convertImpl(String &out,IParser::Pos &pos) +bool StartOfMonth::convertImpl(String & out, IParser::Pos & pos) { const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) @@ -204,7 +206,7 @@ bool StartOfMonth::convertImpl(String &out,IParser::Pos &pos) return true; } -bool StartOfWeek::convertImpl(String &out,IParser::Pos &pos) +bool StartOfWeek::convertImpl(String & out, IParser::Pos & pos) { const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) @@ -224,9 +226,9 @@ bool StartOfWeek::convertImpl(String &out,IParser::Pos &pos) return true; } -bool StartOfYear::convertImpl(String &out,IParser::Pos &pos) +bool StartOfYear::convertImpl(String & out, IParser::Pos & pos) { - const String fn_name = getKQLFunctionName(pos); + const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; @@ -243,30 +245,30 @@ bool StartOfYear::convertImpl(String &out,IParser::Pos &pos) return true; } -bool UnixTimeMicrosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +bool UnixTimeMicrosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool UnixTimeMillisecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +bool UnixTimeMillisecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool UnixTimeNanosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +bool UnixTimeNanosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool UnixTimeSecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +bool UnixTimeSecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - const String fn_name = getKQLFunctionName(pos); + const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; @@ -276,9 +278,8 @@ bool UnixTimeSecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) return true; } -bool WeekOfYear::convertImpl(String &out,IParser::Pos &pos) +bool WeekOfYear::convertImpl(String & out, IParser::Pos & pos) { - const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; @@ -288,7 +289,7 @@ bool WeekOfYear::convertImpl(String &out,IParser::Pos &pos) return true; } -bool MonthOfYear::convertImpl(String &out,IParser::Pos &pos) +bool MonthOfYear::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "toMonth"); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp new file mode 100644 index 000000000000..77ad97147354 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +using namespace DB; +using namespace std::literals; +} +class ParserDateTimeFuncTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +TEST_P(ParserDateTimeFuncTest, ParseQuery) +{ const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + ASSERT_NE(nullptr, parser); + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print week_of_year(datetime(2020-12-31))", + "SELECT toWeek(toDateTime64('2020-12-31', 9, 'UTC'), 3, 'UTC')" + }, + { + "print startofweek(datetime(2017-01-01 10:10:17), -1)", + "SELECT toDateTime64(toStartOfWeek(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalWeek(-1)" + }, + { + "print startofmonth(datetime(2017-01-01 10:10:17), -1)", + "SELECT toDateTime64(toStartOfMonth(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalMonth(-1)" + }, + { + "print startofday(datetime(2017-01-01 10:10:17), -1)", + "SELECT toDateTime64(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalDay(-1)" + + }, + { + "print monthofyear(datetime(2015-12-14))", + "SELECT toMonth(toDateTime64('2015-12-14', 9, 'UTC'))" + }, + { + "print hourofday(datetime(2015-12-14 10:54:00))", + "SELECT toHour(toDateTime64('2015-12-14 10:54:00', 9, 'UTC'))" + }, + { + "print getyear(datetime(2015-10-12))", + "SELECT toYear(toDateTime64('2015-10-12', 9, 'UTC'))" + }, + { + "print getmonth(datetime(2015-10-12))", + "SELECT toMonth(toDateTime64('2015-10-12', 9, 'UTC'))" + }, + { + "print dayofyear(datetime(2015-10-12))", + "SELECT toDayOfYear(toDateTime64('2015-10-12', 9, 'UTC'))" + }, + { + "print dayofmonth(datetime(2015-10-12))", + "SELECT toDayOfMonth(toDateTime64('2015-10-12', 9, 'UTC'))" + }, + { + "print unixtime_seconds_todatetime(1546300899)", + "SELECT toDateTime64(1546300899, 9, 'UTC')" + }, + { + "print dayofweek(datetime(2015-12-20))", + "SELECT toDayOfWeek(toDateTime64('2015-12-20', 9, 'UTC')) % 7" + }, + { + "print now()", + "SELECT now64(9, 'UTC')" + } + +}))); From 615804be7c9c4fb952f60f9633ff10d02eef1022 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Wed, 10 Aug 2022 07:46:29 -0700 Subject: [PATCH 085/342] Implement KQL binary functions --- .../KustoFunctions/KQLBinaryFunctions.cpp | 105 +++++++++++------- src/Parsers/tests/KQL/gtest_KQL_Binary.cpp | 39 +++++++ src/Parsers/tests/KQL/gtest_KQL_IP.cpp | 2 +- src/Parsers/tests/gtest_Parser.cpp | 4 - src/Parsers/tests/gtest_common.h | 7 ++ 5 files changed, 114 insertions(+), 43 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_Binary.cpp diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp index 2a06c4e715be..f8765b116d4e 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp @@ -1,70 +1,99 @@ -#include -#include #include #include -#include -#include +#include #include -#include -#include -#include -#include #include -#include -#include #include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include + +#include namespace DB { -bool BinaryAnd::convertImpl(String &out,IParser::Pos &pos) +bool BinaryAnd::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitAnd(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; } -bool BinaryNot::convertImpl(String &out,IParser::Pos &pos) +bool BinaryNot::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + out = std::format("bitNot(cast({0}, 'Int64'))", value); + return true; } -bool BinaryOr::convertImpl(String &out,IParser::Pos &pos) +bool BinaryOr::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitOr(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; } -bool BinaryShiftLeft::convertImpl(String &out,IParser::Pos &pos) +bool BinaryShiftLeft::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format("if({1} < 0, null, bitShiftLeft(cast({0}, 'Int64'), {1}))", value, count); + return true; } -bool BinaryShiftRight::convertImpl(String &out,IParser::Pos &pos) +bool BinaryShiftRight::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format("if({1} < 0, null, bitShiftRight(cast({0}, 'Int64'), {1}))", value, count); + return true; } -bool BinaryXor::convertImpl(String &out,IParser::Pos &pos) +bool BinaryXor::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitXor(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; } -bool BitsetCountOnes::convertImpl(String &out,IParser::Pos &pos) +bool BitsetCountOnes::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "bitCount"); } } diff --git a/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp new file mode 100644 index 000000000000..600965dcef60 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp @@ -0,0 +1,39 @@ +#include + +#include +#include +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Binary, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print binary_and(A, B)", + "SELECT bitAnd(CAST(A, 'Int64'), CAST(B, 'Int64'))" + }, + { + "print binary_not(A)", + "SELECT bitNot(CAST(A, 'Int64'))" + }, + { + "print binary_or(A, B)", + "SELECT bitOr(CAST(A, 'Int64'), CAST(B, 'Int64'))" + }, + { + "print binary_shift_left(A, B)", + "SELECT if(B < 0, NULL, bitShiftLeft(CAST(A, 'Int64'), B))" + }, + { + "print binary_shift_right(A, B)", + "SELECT if(B < 0, NULL, bitShiftRight(CAST(A, 'Int64'), B))" + }, + { + "print binary_xor(A, B)", + "SELECT bitXor(CAST(A, 'Int64'), CAST(B, 'Int64'))" + }, + { + "print bitset_count_ones(A)", + "SELECT bitCount(A)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp index e6338ab52184..c2257d055009 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -23,7 +23,7 @@ TEST_P(ParserRegexTest, parseQuery) EXPECT_THAT(serializeAST(*ast->clone(), false), ::testing::MatchesRegex(expected_ast)); } -INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserRegexTest, +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, ::testing::Combine( ::testing::Values(std::make_shared()), ::testing::ValuesIn(std::initializer_list{ diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 1e563a45cfa4..960873d23428 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -17,7 +17,6 @@ #include #include #include -#include namespace { @@ -35,9 +34,6 @@ std::ostream & operator<<(std::ostream & ostr, const ParserTestCase & test_case) return ostr << "ParserTestCase input: " << test_case.input_text; } -class ParserTest : public ::testing::TestWithParam, ParserTestCase>> -{}; - TEST_P(ParserTest, parseQuery) { const auto & parser = std::get<0>(GetParam()); diff --git a/src/Parsers/tests/gtest_common.h b/src/Parsers/tests/gtest_common.h index abbc1a0cb0eb..aac3dddb117c 100644 --- a/src/Parsers/tests/gtest_common.h +++ b/src/Parsers/tests/gtest_common.h @@ -1,3 +1,7 @@ +#include + +#include + #include struct ParserTestCase @@ -5,3 +9,6 @@ struct ParserTestCase const std::string_view input_text; const char * expected_ast = nullptr; }; + +class ParserTest : public ::testing::TestWithParam, ParserTestCase>> +{}; From 1ec4ec9109a5dd6aa2f9ab7d3749f2818e151254 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Wed, 10 Aug 2022 07:58:07 -0700 Subject: [PATCH 086/342] Update release notes --- src/Parsers/Kusto/KQL_ReleaseNote.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 0f475654d5f7..ed64110487ed 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -84,6 +84,26 @@ The config setting to allow modify dialect setting. pass dialect setting with '--'. For example : ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` +## Binary functions +- [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) + `print binary_and(15, 3) == 3` + `print binary_and(1, 2) == 0` +- [binary_not](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-notfunction) + `print binary_not(1) == -2` +- [binary_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-orfunction) + `print binary_or(3, 8) == 11` + `print binary_or(1, 2) == 3` +- [binary_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-leftfunction) + `print binary_shift_left(1, 1) == 2` + `print binary_shift_left(1, 64) == 1` +- [binary_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-rightfunction) + `print binary_shift_right(1, 1) == 0` + `print binary_shift_right(1, 64) == 1` +- [binary_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-xorfunction) + `print binary_xor(1, 3) == 2` +- [bitset_count_ones](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/bitset-count-onesfunction) + `print bitset_count_ones(42) == 3` + ## IP functions - [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` From 9f80508133954a3531fb3488b0071fbeb5ab38f8 Mon Sep 17 00:00:00 2001 From: kashwy Date: Thu, 11 Aug 2022 12:38:49 -0700 Subject: [PATCH 087/342] Kusto-pahse2: fixed toimspan issue and other functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 275 +++++++++++------- .../KustoFunctions/IParserKQLFunction.cpp | 9 +- .../KustoFunctions/KQLDataTypeFunctions.cpp | 76 ++--- .../KustoFunctions/KQLStringFunctions.cpp | 47 ++- .../Kusto/ParserKQLDateTypeTimespan.cpp | 71 ++++- src/Parsers/Lexer.cpp | 2 +- .../tests/KQL/gtest_KQL_StringFunctions.cpp | 169 +++++++++++ .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 4 + 8 files changed, 493 insertions(+), 160 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index ed64110487ed..fa0a4c1240b2 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,66 +1,171 @@ -# August XX, 2022 -- **DateTimeFunctions** -- [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) - `print startofyear(datetime(2017-01-01 10:10:17), -1)` - `print startofyear(datetime(2017-01-01 10:10:17), 0)` - `print startofyear(datetime(2017-01-01 10:10:17), 1)` -- [weekofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/weekofyearfunction) - `print week_of_year(datetime(2020-12-31))` - `print week_of_year(datetime(2020-06-15))` - `print week_of_year(datetime(1970-01-01))` - `print week_of_year(datetime(2000-01-01))` - -- [startofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofweekfunction) - `print startofweek(datetime(2017-01-01 10:10:17), -1)` - `print startofweek(datetime(2017-01-01 10:10:17), 0)` - `print startofweek(datetime(2017-01-01 10:10:17), 1)` - -- [startofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofmonthfunction) - `print startofmonth(datetime(2017-01-01 10:10:17), -1)` - `print startofmonth(datetime(2017-01-01 10:10:17), 0)` - `print startofmonth(datetime(2017-01-01 10:10:17), 1)` - -- [startofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofdayfunction) - `print startofday(datetime(2017-01-01 10:10:17), -1)` - `print startofday(datetime(2017-01-01 10:10:17), 0)` - `print startofday(datetime(2017-01-01 10:10:17), 1)` - -- [monthofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/monthofyearfunction) - `print monthofyear(datetime("2015-12-14"))` - -- [hourofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/hourofdayfunction) - `print hourofday(datetime(2015-12-14 18:54:00))` - -- [getyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getyearfunction) - `print getyear(datetime(2015-10-12))` - -- [getmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getmonthfunction) - `print getmonth(datetime(2015-10-12))` - -- [dayofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofyearfunction) - `print dayofyear(datetime(2015-12-14))` - -- [dayofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofmonthfunction) - `print (datetime(2015-12-14))` - -- [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) - `print unixtime_seconds_todatetime(1546300800)` - -- [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) - `print dayofweek(datetime(2015-12-20))` - -- [now](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/nowfunction) - `print now()` - `print now(2d)` - `print now(-2h)` - `print now(5 microseconds)` - `print now(5 seconds)` - `print now(6minutes)` - `print now(-2d) ` - `print now(time(1d))` - ## KQL implemented features -The config setting to allow modify dialect setting. + +# August 15, 2022 + +## DateTpye +- [bool,boolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/bool) + `print bool(1)` + `print boolean(0)` + +- [datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/datetime) + `print datetime(2015-12-31 23:59:59.9)` + `print datetime('2015-12-31 23:59:59.9')` + `print datetime("2015-12-31:)` + +- [guid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/guid) + `print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)` + `print guid('74be27de-1e4e-49d9-b579-fe0b331d3642')` + `print guid('74be27de1e4e49d9b579fe0b331d3642')` + +- [int](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/int) + `print int(1)` + +- [long](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/long) + `print long(16)` + +- [real](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/real) + `print real(1)` + +- [timespan ,time](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/timespan) + **Note** the timespan is used for calculating datatime, so the output is in seconds. e.g. time(1h) = 3600 + `print 1d` + `print 30m` + `print time('0.12:34:56.7')` + `print time(2h)` + `print timespan(2h)` + + +## StringFunctions + +- [base64_encode_fromguid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64-encode-fromguid-function) +`print Quine = base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')` +- [base64_decode_toarray](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64_decode_toarrayfunction) +`print base64_decode_toarray('S3VzdG8=')` +- [base64_decode_toguid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64-decode-toguid-function) +`print base64_decode_toguid('YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi')` +- [replace_regex](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/replace-regex-function) +`print replace_regex('Hello, World!', '.', '\\0\\0')` +- [has_any_index](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-any-index-function) +`print idx = has_any_index('this is an example', dynamic(['this', 'example']))` +- [translate](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/translatefunction) +`print translate('krasp', 'otsku', 'spark')` +- [trim](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimfunction) +`print trim('--', '--https://bing.com--')` +- [trim_end](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimendfunction) +`print trim_end('.com', 'bing.com')` +- [trim_start](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimstartfunction) +`print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))` + + + +## DateTimeFunctions +- [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) + `print startofyear(datetime(2017-01-01 10:10:17), -1)` + `print startofyear(datetime(2017-01-01 10:10:17), 0)` + `print startofyear(datetime(2017-01-01 10:10:17), 1)` +- [weekofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/weekofyearfunction) + `print week_of_year(datetime(2020-12-31))` + `print week_of_year(datetime(2020-06-15))` + `print week_of_year(datetime(1970-01-01))` + `print week_of_year(datetime(2000-01-01))` + +- [startofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofweekfunction) + `print startofweek(datetime(2017-01-01 10:10:17), -1)` + `print startofweek(datetime(2017-01-01 10:10:17), 0)` + `print startofweek(datetime(2017-01-01 10:10:17), 1)` + +- [startofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofmonthfunction) + `print startofmonth(datetime(2017-01-01 10:10:17), -1)` + `print startofmonth(datetime(2017-01-01 10:10:17), 0)` + `print startofmonth(datetime(2017-01-01 10:10:17), 1)` + +- [startofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofdayfunction) + `print startofday(datetime(2017-01-01 10:10:17), -1)` + `print startofday(datetime(2017-01-01 10:10:17), 0)` + `print startofday(datetime(2017-01-01 10:10:17), 1)` + +- [monthofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/monthofyearfunction) + `print monthofyear(datetime("2015-12-14"))` + +- [hourofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/hourofdayfunction) + `print hourofday(datetime(2015-12-14 18:54:00))` + +- [getyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getyearfunction) + `print getyear(datetime(2015-10-12))` + +- [getmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getmonthfunction) + `print getmonth(datetime(2015-10-12))` + +- [dayofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofyearfunction) + `print dayofyear(datetime(2015-12-14))` + +- [dayofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofmonthfunction) + `print (datetime(2015-12-14))` + +- [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) + `print unixtime_seconds_todatetime(1546300800)` + +- [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) + `print dayofweek(datetime(2015-12-20))` + +- [now](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/nowfunction) + `print now()` + `print now(2d)` + `print now(-2h)` + `print now(5microseconds)` + `print now(5seconds)` + `print now(6minutes)` + `print now(-2d) ` + `print now(time(1d))` + + +## Binary functions +- [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) + `print binary_and(15, 3) == 3` + `print binary_and(1, 2) == 0` +- [binary_not](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-notfunction) + `print binary_not(1) == -2` +- [binary_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-orfunction) + `print binary_or(3, 8) == 11` + `print binary_or(1, 2) == 3` +- [binary_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-leftfunction) + `print binary_shift_left(1, 1) == 2` + `print binary_shift_left(1, 64) == 1` +- [binary_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-rightfunction) + `print binary_shift_right(1, 1) == 0` + `print binary_shift_right(1, 64) == 1` +- [binary_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-xorfunction) + `print binary_xor(1, 3) == 2` +- [bitset_count_ones](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/bitset-count-onesfunction) + `print bitset_count_ones(42) == 3` + +## IP functions +- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) + `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` + `print format_ipv4(3232236031, 24) == '192.168.1.0'` +- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) + `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` + `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` +- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) + `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` + `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` +- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) + `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` + `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` +- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) + `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` + `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` + `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` + `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` + + +# August 1, 2022 + +**The config setting to allow modify dialect setting**. - Set dialect setting in server configuration XML at user level(` users.xml `). This sets the ` dialect ` at server startup and CH will do query parsing for all users with ` default ` profile acording to dialect value. For example: @@ -83,51 +188,6 @@ The config setting to allow modify dialect setting. OR pass dialect setting with '--'. For example : ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` - -## Binary functions -- [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) - `print binary_and(15, 3) == 3` - `print binary_and(1, 2) == 0` -- [binary_not](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-notfunction) - `print binary_not(1) == -2` -- [binary_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-orfunction) - `print binary_or(3, 8) == 11` - `print binary_or(1, 2) == 3` -- [binary_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-leftfunction) - `print binary_shift_left(1, 1) == 2` - `print binary_shift_left(1, 64) == 1` -- [binary_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-rightfunction) - `print binary_shift_right(1, 1) == 0` - `print binary_shift_right(1, 64) == 1` -- [binary_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-xorfunction) - `print binary_xor(1, 3) == 2` -- [bitset_count_ones](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/bitset-count-onesfunction) - `print bitset_count_ones(42) == 3` - -## IP functions -- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) - `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` - `print format_ipv4(3232236031, 24) == '192.168.1.0'` -- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) - `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` - `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` -- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) - `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` - `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` - `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` - `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` -- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) - `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` - `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` - `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` - `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` -- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) - `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` - `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` - `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` - `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` - -# August 1, 2022 - **strcmp** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcmpfunction) `print strcmp('abc','ABC')` @@ -155,7 +215,6 @@ The config setting to allow modify dialect setting. - [make_set_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makesetif-aggfunction) `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` -# July XX, 2022 ## IP functions diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 0b7eb403a226..243b67b73082 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -102,7 +102,6 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - String token = String(pos->begin, pos->end); String new_token; if (!KQLOperators().convert(tokens, pos)) { @@ -115,7 +114,15 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: break; } else + { + String token; + if (pos->type == TokenType::QuotedIdentifier) + token = "'" + String(pos->begin + 1,pos->end - 1) + "'"; + else + token = String(pos->begin, pos->end); + tokens.push_back(token); + } } ++pos; if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp index 2a59ab8b72a3..0f60bf6d3266 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -5,28 +5,21 @@ #include #include #include -/* -#include -#include -#include -#include -#include -#include -#include -#include -#include -*/ #include +#include #include namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + bool DatatypeBool::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toBool"); } bool DatatypeDatetime::convertImpl(String &out,IParser::Pos &pos) @@ -59,9 +52,24 @@ bool DatatypeDatetime::convertImpl(String &out,IParser::Pos &pos) bool DatatypeDynamic::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String res = String(pos->begin, pos->end); + String array; + ++pos; //go pass "dynamic" string + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket) + { + array += String(pos->begin, pos->end); + } + ++pos; + } + if (pos->type == TokenType::ClosingRoundBracket) + array += String(pos->begin, pos->end); + else + return false; + + out = "array" + array; + return true; } bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) @@ -72,10 +80,8 @@ bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) String guid_str; ++pos; - if (pos->type == TokenType::QuotedIdentifier) - guid_str = std::format("'{}'", String(pos->begin+1, pos->end -1)); - else if (pos->type == TokenType::StringLiteral) - guid_str = String(pos->begin, pos->end); + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + guid_str = String(pos->begin+1, pos->end -1); else { auto start = pos; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) @@ -85,32 +91,26 @@ bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) break; } --pos; - guid_str = std::format("'{}'",String(start->begin,pos->end)); + guid_str = String(start->begin,pos->end); } - out = guid_str; + out = std::format("toUUID('{}')", guid_str); ++pos; return true; } bool DatatypeInt::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toInt32"); } bool DatatypeLong::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toInt64"); } bool DatatypeReal::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toFloat64"); } bool DatatypeString::convertImpl(String &out,IParser::Pos &pos) @@ -122,12 +122,22 @@ bool DatatypeString::convertImpl(String &out,IParser::Pos &pos) bool DatatypeTimespan::convertImpl(String &out,IParser::Pos &pos) { + ParserKQLDateTypeTimespan time_span; + ASTPtr node; + Expected expected; + const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; ++pos; - out = getConvertedArgument(fn_name, pos); + if (time_span.parse(pos, node, expected)) + { + out = std::to_string(time_span.toSeconds()); + ++pos; + } + else + throw Exception("Not a correct timespan expression: " + fn_name, ErrorCodes::BAD_ARGUMENTS); return true; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 767075987885..2a88a56b8442 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -372,9 +372,7 @@ bool ParseVersion::convertImpl(String & out,IParser::Pos & pos) bool ReplaceRegex::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "replaceRegexpAll"); } bool Reverse::convertImpl(String & out,IParser::Pos & pos) @@ -551,23 +549,48 @@ bool Translate::convertImpl(String & out,IParser::Pos & pos) bool Trim::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + String ltrim = std::format("if ((replaceRegexpOne(concat('random_str', {0}) as srcl, concat('random_str', {1}),'') as dstl) = srcl, {0}, dstl)", source, regex); + out = std::format("if ((replaceRegexpOne(concat('random_str', reverse({0})) as srcr, concat('random_str', reverse({1})),'') as dstr) = srcr, {0}, reverse(dstr))", ltrim, regex); + + return true; } bool TrimEnd::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + out = std::format("if ((replaceRegexpOne(concat('random_str', reverse({0})) as src, concat('random_str', reverse({1})),'') as dst) = src, {0}, reverse(dst))", source, regex); + + return true; } bool TrimStart::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + out = std::format("if ((replaceRegexpOne(concat('random_str', {0}) as src, concat('random_str', {1}),'') as dst) = src, {0}, dst)", source, regex); + + return true; } bool URLDecode::convertImpl(String & out,IParser::Pos & pos) diff --git a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp index d83ef4e2f537..af3c4e458753 100644 --- a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp +++ b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -11,10 +12,15 @@ namespace DB bool ParserKQLDateTypeTimespan :: parseImpl(Pos & pos, [[maybe_unused]] ASTPtr & node, Expected & expected) { - const String token(pos->begin,pos->end); + String token; const char * current_word = pos->begin; expected.add(pos, current_word); + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral ) + token = String(pos->begin + 1, pos->end -1); + else + token = String(pos->begin, pos->end); + if (!parseConstKQLTimespan(token)) return false; @@ -84,6 +90,7 @@ bool ParserKQLDateTypeTimespan :: parseConstKQLTimespan(const String & text) {"ticks", KQLTimespanUint::tick} }; + uint16_t days = 0, hours = 0, minutes = 0, seconds = 0, milliseconds = 0; const char * ptr = text.c_str(); @@ -99,21 +106,75 @@ bool ParserKQLDateTypeTimespan :: parseConstKQLTimespan(const String & text) if (number_len <= 0) return false; + days = std::stoi(String(ptr, ptr + number_len)); + if (*(ptr + number_len) == '.') { auto fractionLen = scanDigit(ptr + number_len + 1); if (fractionLen >= 0) { + hours = std::stoi(String(ptr + number_len + 1, ptr + number_len + 1 + fractionLen)); number_len += fractionLen + 1; } + else + { + hours = days; + days = 0; + } } - String timespan_suffix(ptr + number_len, ptr+text.size()); - if (TimespanSuffixes.find(timespan_suffix) == TimespanSuffixes.end()) + if (hours > 23) return false; - time_span = std::stod(String(ptr, ptr + number_len)); - time_span_unit =TimespanSuffixes[timespan_suffix] ; + if (*(ptr + number_len) != ':') + { + String timespan_suffix(ptr + number_len, ptr + text.size()); + + trim(timespan_suffix); + if (TimespanSuffixes.find(timespan_suffix) == TimespanSuffixes.end()) + return false; + + time_span = std::stod(String(ptr, ptr + number_len)); + time_span_unit = TimespanSuffixes[timespan_suffix] ; + + return true; + } + + auto min_len = scanDigit(ptr + number_len + 1); + if (min_len < 0) + return false; + + minutes = std::stoi(String(ptr + number_len + 1, ptr + number_len + 1 + min_len)); + if (minutes > 59) + return false; + + number_len += min_len + 1; + if (*(ptr + number_len) == ':') + { + auto sec_len = scanDigit(ptr + number_len + 1); + if (sec_len > 0) + { + seconds = std::stoi(String(ptr + number_len + 1, ptr + number_len + 1 + sec_len)); + if (seconds > 59) + return false; + + number_len += sec_len + 1; + if (*(ptr + number_len) == '.') + { + auto milli_len = scanDigit(ptr + number_len + 1); + if (milli_len > 0) + { + milliseconds = std::stoi(String(ptr + number_len + 1, ptr + number_len + 1 + milli_len)); + + if (milliseconds > 1000) + return false; + } + } + } + } + + time_span = days * 86400 + hours * 3600 + minutes * 60 + seconds + milliseconds / 1000; + time_span_unit = KQLTimespanUint::second; return true; } diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 6bd27ee62aea..40e358304176 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -182,7 +182,7 @@ Token Lexer::nextTokenImpl() for (const char * iterator = token_begin; iterator < pos; ++iterator) { - if (!isWordCharASCII(*iterator) && *iterator != '$') + if (!isWordCharASCII(*iterator) && *iterator != '$' && *iterator != '.') return Token(TokenType::ErrorWrongNumber, token_begin, pos); } diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp new file mode 100644 index 000000000000..f2994464e14a --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -0,0 +1,169 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +using namespace DB; +using namespace std::literals; +} +class ParserStringFuncTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +TEST_P(ParserStringFuncTest, ParseQuery) +{ const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + ASSERT_NE(nullptr, parser); + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserStringFuncTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print Quine = base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')", + "SELECT base64Encode('ae3133f2-6e22-49ae-b06a-16e6a9b212eb') AS Quine" + }, + { + "print base64_decode_toguid('YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi')", + "SELECT base64Decode('YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi')" + }, + { + "print base64_decode_toarray('S3VzdG8=')", + "SELECT arrayMap(x -> reinterpretAsUInt8(x), splitByRegexp('', base64Decode('S3VzdG8=')))" + }, + { + "print replace_regex('Hello, World!', '.', '\\0\\0')", + "SELECT replaceRegexpAll('Hello, World!', '.', '\\0\\0')" + }, + { + "print idx = has_any_index('this is an example', dynamic(['this', 'example'])) ", + "SELECT if(empty(['this', 'example']), -1, indexOf(arrayMap(x -> (x IN splitByChar(' ', 'this is an example')), if(empty(['this', 'example']), [''], arrayMap(x -> toString(x), ['this', 'example']))), 1) - 1) AS idx" + }, + { + "print idx = has_any_index('this is an example', dynamic([]))", + "SELECT if(empty([]), -1, indexOf(arrayMap(x -> (x IN splitByChar(' ', 'this is an example')), if(empty([]), [''], arrayMap(x -> toString(x), []))), 1) - 1) AS idx" + }, + { + "print translate('krasp', 'otsku', 'spark')", + "SELECT if(length('otsku') = 0, '', translate('spark', 'krasp', multiIf(length('otsku') = 0, 'krasp', (length('krasp') - length('otsku')) > 0, concat('otsku', repeat(substr('otsku', length('otsku'), 1), toUInt16(length('krasp') - length('otsku')))), (length('krasp') - length('otsku')) < 0, substr('otsku', 1, length('krasp')), 'otsku')))" + }, + { + "print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))", + "SELECT if((replaceRegexpOne(concat('random_str', concat('- ', 'Te st1', '// $')) AS src, concat('random_str', '[^\\\\w]+'), '') AS dst) = src, concat('- ', 'Te st1', '// $'), dst)" + }, + { + "print trim_end('.com', 'bing.com')", + "SELECT if((replaceRegexpOne(concat('random_str', reverse('bing.com')) AS src, concat('random_str', reverse('.com')), '') AS dst) = src, 'bing.com', reverse(dst))" + }, + { + "print trim('--', '--https://bing.com--')", + "SELECT if((replaceRegexpOne(concat('random_str', reverse(if((replaceRegexpOne(concat('random_str', '--https://bing.com--') AS srcl, concat('random_str', '--'), '') AS dstl) = srcl, '--https://bing.com--', dstl))) AS srcr, concat('random_str', reverse('--')), '') AS dstr) = srcr, if(dstl = srcl, '--https://bing.com--', dstl), reverse(dstr))" + }, + { + "print bool(1)", + "SELECT toBool(1)" + }, + { + "print datetime(2015-12-31 23:59:59.9)", + "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + }, + { + "print datetime(\"2015-12-31 23:59:59.9\")", + "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + }, + { + "print datetime('2015-12-31 23:59:59.9')", + "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + }, + { + "print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)", + "SELECT toUUID('74be27de-1e4e-49d9-b579-fe0b331d3642')" + }, + { + "print guid('74be27de-1e4e-49d9-b579-fe0b331d3642')", + "SELECT toUUID('74be27de-1e4e-49d9-b579-fe0b331d3642')" + }, + { + "print guid('74be27de1e4e49d9b579fe0b331d3642')", + "SELECT toUUID('74be27de1e4e49d9b579fe0b331d3642')" + }, + { + "print int(32.5)", + "SELECT toInt32(32.5)" + }, + { + "print long(32.5)", + "SELECT toInt64(32.5)" + }, + { + "print real(32.5)", + "SELECT toFloat64(32.5)" + }, + { + "print time('1.22:34:8.128')", + "SELECT 167648." + }, + { + "print time('1d')", + "SELECT 86400." + }, + { + "print time('1.5d')", + "SELECT 129600." + }, + { + "print timespan('1.5d')", + "SELECT 129600." + } + +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index 77ad97147354..74d13c60d050 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -121,6 +121,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, { "print now()", "SELECT now64(9, 'UTC')" + }, + { + "print now(1d)", + "SELECT now64(9, 'UTC') + 86400." } }))); From 41b7c80c71fbe5ae3a79a9101ec94493e3203d50 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 17 Aug 2022 18:42:46 -0700 Subject: [PATCH 088/342] Aggregate functions added --- src/Parsers/Kusto/KQL_ReleaseNote.md | 33 +++ .../KQLAggregationFunctions.cpp | 209 +++++++++++++++--- .../KQL/gtest_KQL_AggregateFunctions.cpp | 113 ++++++++++ 3 files changed, 328 insertions(+), 27 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index fa0a4c1240b2..7206c3cec898 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,5 +1,38 @@ ## KQL implemented features +# August XX, 2022 + +## Aggregate Functions +- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) + `Customers | summarize t = stdev(Age) by FirstName` + +- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction) + `Customers | summarize t = stdevif(Age, Age < 10) by FirstName` + +- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction) + `Customers | summarize t = binary_all_and(Age) by FirstName` + +- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction) + `Customers | summarize t = binary_all_or(Age) by FirstName` + +- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction) + `Customers | summarize t = binary_all_xor(Age) by FirstName` + +- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` + +- [percentiles_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + **do not support `range()` now** + `Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName` + `Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName` + +- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` + +- [percentilesw_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + **do not support `range()` now** + `DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))` + # August 15, 2022 ## DateTpye diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 30b33b5933ad..a16c4f6ea22d 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -40,23 +40,17 @@ bool AvgIf::convertImpl(String &out,IParser::Pos &pos) bool BinaryAllAnd::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"groupBitAnd"); } bool BinaryAllOr::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"groupBitOr"); } bool BinaryAllXor::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"groupBitXor"); } bool BuildSchema::convertImpl(String &out,IParser::Pos &pos) @@ -220,44 +214,205 @@ bool MinIf::convertImpl(String &out,IParser::Pos &pos) bool Percentiles::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name,pos); + column_name.pop_back(); + String expr = ""; + String value; + String value_in_column; + while(pos->type != TokenType::ClosingRoundBracket) + { + if(pos->type != TokenType::Comma){ + value = String(pos->begin, pos->end); + value_in_column = ""; + + for(size_t i = 0; i < value.size(); i++) + { + if(value[i] == '.') + value_in_column += '_'; + else + value_in_column += value[i]; + } + expr = expr + "quantile( " + value + "/100)(" + column_name + ") AS percentile_" + column_name + "_" + value_in_column; + ++pos; + if(pos->type != TokenType::ClosingRoundBracket) + expr += ", "; + } + else + ++pos; + } + out = expr; + return true; } bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name,pos); + column_name.pop_back(); + String expr = "quantiles("; + String value; + while(pos->type != TokenType::ClosingRoundBracket) + { + if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" + && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket){ + + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + + if(pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket) + expr += ", "; + ++pos; + } + else + { + ++pos; + } + + } + ++pos; + if(pos->type != TokenType::ClosingRoundBracket) + --pos; + + expr.pop_back(); + expr.pop_back(); + expr = expr + ")(" + column_name + ")"; + out = expr; + return true; } bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name,pos); + bucket_column.pop_back(); + + ++pos; + String frequency_column = getConvertedArgument(fn_name,pos); + frequency_column.pop_back(); + + String expr = ""; + String value; + String value_in_column; + + while(pos->type != TokenType::ClosingRoundBracket) + { + if(pos->type != TokenType::Comma){ + value = String(pos->begin, pos->end); + value_in_column = ""; + + for(size_t i = 0; i < value.size(); i++) + { + if(value[i] == '.') + value_in_column += '_'; + else + value_in_column += value[i]; + } + + expr = expr + "quantileExactWeighted( " + value + "/100)(" + bucket_column + ","+frequency_column + ") AS percentile_" + bucket_column + "_" + value_in_column; + ++pos; + if(pos->type != TokenType::ClosingRoundBracket) + expr += ", "; + } + else + ++pos; + } + out = expr; + return true; } bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name,pos); + bucket_column.pop_back(); + + ++pos; + String frequency_column = getConvertedArgument(fn_name,pos); + frequency_column.pop_back(); + + String expr = "quantilesExactWeighted("; + String value; + while(pos->type != TokenType::ClosingRoundBracket) + { + if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" + && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket){ + + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + + if(pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket) + expr += ", "; + ++pos; + } + else + { + ++pos; + } + + } + ++pos; + if(pos->type != TokenType::ClosingRoundBracket) + --pos; + + expr.pop_back(); + expr.pop_back(); + expr = expr + ")(" + bucket_column + ","+frequency_column + ")"; + out = expr; + return true; } bool Stdev::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + out = "sqrt(varSamp(" + expr + "))"; + return true; } bool StdevIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const auto predicate = getConvertedArgument(fn_name,pos); + out = "sqrt(varSampIf(" + expr + ", " + predicate + "))"; + return true; } bool Sum::convertImpl(String &out,IParser::Pos &pos) diff --git a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp new file mode 100644 index 000000000000..83bec1d53335 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp @@ -0,0 +1,113 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +using namespace DB; +using namespace std::literals; +} +class ParserAggregateFuncTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +TEST_P(ParserAggregateFuncTest, ParseQuery) +{ const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + ASSERT_NE(nullptr, parser); + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserAggregateFuncTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | summarize t = stdev(Age) by FirstName", + "SELECT\n FirstName,\n sqrt(varSamp(Age)) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = stdevif(Age, Age < 10) by FirstName", + "SELECT\n FirstName,\n sqrt(varSampIf(Age, Age < 10)) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = binary_all_and(Age) by FirstName", + "SELECT\n FirstName,\n groupBitAnd(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = binary_all_or(Age) by FirstName", + "SELECT\n FirstName,\n groupBitOr(Age) AS t\nFROM Customers\nGROUP BY FirstName" + + }, + { + "Customers | summarize t = binary_all_xor(Age) by FirstName", + "SELECT\n FirstName,\n groupBitXor(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName", + "SELECT\n FirstName,\n quantile(30 / 100)(Age) AS percentile_Age_30,\n quantile(40 / 100)(Age) AS percentile_Age_40,\n quantile(50 / 100)(Age) AS percentile_Age_50,\n quantile(60 / 100)(Age) AS percentile_Age_60,\n quantile(70 / 100)(Age) AS percentile_Age_70\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName", + "SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName", + "SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)", + "SELECT\n quantileExactWeighted(50 / 100)(Bucket, Frequency) AS percentile_Bucket_50,\n quantileExactWeighted(75 / 100)(Bucket, Frequency) AS percentile_Bucket_75,\n quantileExactWeighted(99.9 / 100)(Bucket, Frequency) AS percentile_Bucket_99_9\nFROM DataTable" + }, + { + "DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))", + "SELECT quantilesExactWeighted(10 / 100, 50 / 100, 30 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + } +}))); From 5fd3c554d6d6677be1bccb41ad2e2b8d95b722dc Mon Sep 17 00:00:00 2001 From: root Date: Thu, 18 Aug 2022 06:53:29 -0700 Subject: [PATCH 089/342] applied changes asked by Yong --- .../Kusto/KustoFunctions/KQLAggregationFunctions.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index a16c4f6ea22d..b0410ed4cfb5 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -221,8 +222,8 @@ bool Percentiles::convertImpl(String &out,IParser::Pos &pos) ++pos; String column_name = getConvertedArgument(fn_name,pos); - column_name.pop_back(); - String expr = ""; + trim(column_name); + String expr; String value; String value_in_column; while(pos->type != TokenType::ClosingRoundBracket) @@ -259,7 +260,7 @@ bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) ++pos; String column_name = getConvertedArgument(fn_name,pos); - column_name.pop_back(); + trim(column_name); String expr = "quantiles("; String value; while(pos->type != TokenType::ClosingRoundBracket) @@ -308,7 +309,7 @@ bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) String frequency_column = getConvertedArgument(fn_name,pos); frequency_column.pop_back(); - String expr = ""; + String expr; String value; String value_in_column; From c85cd9bc285b65d3f49289abb373c7bbc20b918c Mon Sep 17 00:00:00 2001 From: root Date: Fri, 19 Aug 2022 07:10:00 -0700 Subject: [PATCH 090/342] added percentile() and percentilew() --- src/Parsers/Kusto/KQL_ReleaseNote.md | 6 +++ .../KQLAggregationFunctions.cpp | 53 +++++++++++++++++-- .../KustoFunctions/KQLAggregationFunctions.h | 14 +++++ .../KustoFunctions/KQLFunctionFactory.cpp | 8 +++ .../Kusto/KustoFunctions/KQLFunctionFactory.h | 2 + .../KQL/gtest_KQL_AggregateFunctions.cpp | 8 +++ 6 files changed, 87 insertions(+), 4 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 7206c3cec898..d077d199dd93 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -33,6 +33,12 @@ **do not support `range()` now** `DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))` +- [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize t = percentile(Age, 50) by FirstName` + +- [percentilew](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` + # August 15, 2022 ## DateTpye diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index b0410ed4cfb5..54ac82a1fccb 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -213,6 +213,51 @@ bool MinIf::convertImpl(String &out,IParser::Pos &pos) return directMapping(out,pos,"minIf"); } +bool Percentile::convertImpl(String &out,IParser::Pos &pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name,pos); + trim(column_name); + + if(pos->type != TokenType::Comma) + return false; + ++pos; + String value = getConvertedArgument(fn_name,pos); + trim(value); + + out = "quantile(" + value + "/100)(" + column_name + ")"; + std::cout << "Mallik: " << out << std::endl; + return true; +} + +bool Percentilew::convertImpl(String &out,IParser::Pos &pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name,pos); + trim(bucket_column); + + ++pos; + String frequency_column = getConvertedArgument(fn_name,pos); + trim(frequency_column); + + ++pos; + String value = getConvertedArgument(fn_name,pos); + trim(value); + + out = "quantileExactWeighted( " + value + "/100)(" + bucket_column + ","+frequency_column + ")"; + return true; +} + bool Percentiles::convertImpl(String &out,IParser::Pos &pos) { String fn_name = getKQLFunctionName(pos); @@ -303,11 +348,11 @@ bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) ++pos; String bucket_column = getConvertedArgument(fn_name,pos); - bucket_column.pop_back(); + trim(bucket_column); ++pos; String frequency_column = getConvertedArgument(fn_name,pos); - frequency_column.pop_back(); + trim(frequency_column); String expr; String value; @@ -348,11 +393,11 @@ bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) ++pos; String bucket_column = getConvertedArgument(fn_name,pos); - bucket_column.pop_back(); + trim(bucket_column); ++pos; String frequency_column = getConvertedArgument(fn_name,pos); - frequency_column.pop_back(); + trim(frequency_column); String expr = "quantilesExactWeighted("; String value; diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h index 6e7130420f4c..86d948599227 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h @@ -165,6 +165,20 @@ class MinIf : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; +class Percentile : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentile()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentilew : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilew()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + class Percentiles : public IParserKQLFunction { protected: diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index 075d56d9608a..6cfd67514a24 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -152,6 +152,8 @@ namespace DB {"maxif", KQLFunctionValue::maxif}, {"min", KQLFunctionValue::min}, {"minif", KQLFunctionValue::minif}, + {"percentile", KQLFunctionValue::percentile}, + {"percentilew", KQLFunctionValue::percentilew}, {"percentiles", KQLFunctionValue::percentiles}, {"percentiles_array", KQLFunctionValue::percentiles_array}, {"percentilesw", KQLFunctionValue::percentilesw}, @@ -609,6 +611,12 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::minif: return std::make_unique(); + case KQLFunctionValue::percentile: + return std::make_unique(); + + case KQLFunctionValue::percentilew: + return std::make_unique(); + case KQLFunctionValue::percentiles: return std::make_unique(); diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index ed7479641756..38bac6d641a0 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -136,6 +136,8 @@ namespace DB maxif, min, minif, + percentile, + percentilew, percentiles, percentiles_array, percentilesw, diff --git a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp index 83bec1d53335..aaa980ddccef 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp @@ -109,5 +109,13 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserAggregateFuncTest, { "DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))", "SELECT quantilesExactWeighted(10 / 100, 50 / 100, 30 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + }, + { + "Customers | summarize t = percentile(Age, 50) by FirstName", + "SELECT\n FirstName,\n quantile(50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "DataTable | summarize t = percentilew(Bucket, Frequency, 50)", + "SELECT quantileExactWeighted(50 / 100)(Bucket, Frequency) AS t\nFROM DataTable" } }))); From 638777540f0e962bd5b277fca3dba596cb70e308 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 23 Aug 2022 07:40:55 -0700 Subject: [PATCH 091/342] addressed change requests by Yong --- .../KQLAggregationFunctions.cpp | 93 +++++++++---------- 1 file changed, 45 insertions(+), 48 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 54ac82a1fccb..5a2fa0c984b3 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -19,59 +19,59 @@ namespace DB { -bool ArgMax::convertImpl(String &out,IParser::Pos &pos) +bool ArgMax::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"argMax"); } -bool ArgMin::convertImpl(String &out,IParser::Pos &pos) +bool ArgMin::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"argMin"); } -bool Avg::convertImpl(String &out,IParser::Pos &pos) +bool Avg::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"avg"); } -bool AvgIf::convertImpl(String &out,IParser::Pos &pos) +bool AvgIf::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"avgIf"); } -bool BinaryAllAnd::convertImpl(String &out,IParser::Pos &pos) +bool BinaryAllAnd::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"groupBitAnd"); } -bool BinaryAllOr::convertImpl(String &out,IParser::Pos &pos) +bool BinaryAllOr::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"groupBitOr"); } -bool BinaryAllXor::convertImpl(String &out,IParser::Pos &pos) +bool BinaryAllXor::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"groupBitXor"); } -bool BuildSchema::convertImpl(String &out,IParser::Pos &pos) +bool BuildSchema::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Count::convertImpl(String &out,IParser::Pos &pos) +bool Count::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"count"); } -bool CountIf::convertImpl(String &out,IParser::Pos &pos) +bool CountIf::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"countIf"); } -bool DCount::convertImpl(String &out,IParser::Pos &pos) +bool DCount::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -84,7 +84,7 @@ bool DCount::convertImpl(String &out,IParser::Pos &pos) return true; } -bool DCountIf::convertImpl(String &out,IParser::Pos &pos) +bool DCountIf::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -98,21 +98,21 @@ bool DCountIf::convertImpl(String &out,IParser::Pos &pos) return true; } -bool MakeBag::convertImpl(String &out,IParser::Pos &pos) +bool MakeBag::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool MakeBagIf::convertImpl(String &out,IParser::Pos &pos) +bool MakeBagIf::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool MakeList::convertImpl(String &out,IParser::Pos &pos) +bool MakeList::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -130,7 +130,7 @@ bool MakeList::convertImpl(String &out,IParser::Pos &pos) return true; } -bool MakeListIf::convertImpl(String &out,IParser::Pos &pos) +bool MakeListIf::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -150,12 +150,12 @@ bool MakeListIf::convertImpl(String &out,IParser::Pos &pos) return true; } -bool MakeListWithNulls::convertImpl(String &out,IParser::Pos &pos) +bool MakeListWithNulls::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"groupArray"); //groupArray takes everything including NULLs + return directMapping(out,pos,"groupArray"); } -bool MakeSet::convertImpl(String &out,IParser::Pos &pos) +bool MakeSet::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -169,11 +169,11 @@ bool MakeSet::convertImpl(String &out,IParser::Pos &pos) const auto max_size = getConvertedArgument(fn_name,pos); out = "groupUniqArray(" + max_size + ")(" + expr + ")"; } else - out = "groupUniqArray(" + expr + ")"; + out = "groupUniqArray(" + expr + ")"; return true; } -bool MakeSetIf::convertImpl(String &out,IParser::Pos &pos) +bool MakeSetIf::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -193,27 +193,27 @@ bool MakeSetIf::convertImpl(String &out,IParser::Pos &pos) return true; } -bool Max::convertImpl(String &out,IParser::Pos &pos) +bool Max::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"max"); } -bool MaxIf::convertImpl(String &out,IParser::Pos &pos) +bool MaxIf::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"maxIf"); } -bool Min::convertImpl(String &out,IParser::Pos &pos) +bool Min::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"min"); } -bool MinIf::convertImpl(String &out,IParser::Pos &pos) +bool MinIf::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"minIf"); } -bool Percentile::convertImpl(String &out,IParser::Pos &pos) +bool Percentile::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -224,18 +224,15 @@ bool Percentile::convertImpl(String &out,IParser::Pos &pos) String column_name = getConvertedArgument(fn_name,pos); trim(column_name); - if(pos->type != TokenType::Comma) - return false; ++pos; String value = getConvertedArgument(fn_name,pos); trim(value); out = "quantile(" + value + "/100)(" + column_name + ")"; - std::cout << "Mallik: " << out << std::endl; return true; } -bool Percentilew::convertImpl(String &out,IParser::Pos &pos) +bool Percentilew::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -258,7 +255,7 @@ bool Percentilew::convertImpl(String &out,IParser::Pos &pos) return true; } -bool Percentiles::convertImpl(String &out,IParser::Pos &pos) +bool Percentiles::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -296,7 +293,7 @@ bool Percentiles::convertImpl(String &out,IParser::Pos &pos) return true; } -bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) +bool PercentilesArray::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -310,10 +307,10 @@ bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) String value; while(pos->type != TokenType::ClosingRoundBracket) { - if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" + if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket){ - + value = String(pos->begin, pos->end); expr = expr + value + "/100"; @@ -339,7 +336,7 @@ bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) return true; } -bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) +bool Percentilesw::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -384,7 +381,7 @@ bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) return true; } -bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) +bool PercentileswArray::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -403,10 +400,11 @@ bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) String value; while(pos->type != TokenType::ClosingRoundBracket) { - if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" + if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket - && pos->type != TokenType::ClosingSquareBracket){ - + && pos->type != TokenType::ClosingSquareBracket) + { + value = String(pos->begin, pos->end); expr = expr + value + "/100"; @@ -432,7 +430,7 @@ bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) return true; } -bool Stdev::convertImpl(String &out,IParser::Pos &pos) +bool Stdev::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -444,7 +442,7 @@ bool Stdev::convertImpl(String &out,IParser::Pos &pos) return true; } -bool StdevIf::convertImpl(String &out,IParser::Pos &pos) +bool StdevIf::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -461,42 +459,41 @@ bool StdevIf::convertImpl(String &out,IParser::Pos &pos) return true; } -bool Sum::convertImpl(String &out,IParser::Pos &pos) +bool Sum::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"sum"); } -bool SumIf::convertImpl(String &out,IParser::Pos &pos) +bool SumIf::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"sumIf"); } -bool TakeAny::convertImpl(String &out,IParser::Pos &pos) +bool TakeAny::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool TakeAnyIf::convertImpl(String &out,IParser::Pos &pos) +bool TakeAnyIf::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Variance::convertImpl(String &out,IParser::Pos &pos) +bool Variance::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool VarianceIf::convertImpl(String &out,IParser::Pos &pos) +bool VarianceIf::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } - } From 8e7ff798ab63830400dfefd168c93697ae1acf0d Mon Sep 17 00:00:00 2001 From: root Date: Wed, 24 Aug 2022 06:42:21 -0700 Subject: [PATCH 092/342] updated release note + test file --- src/Parsers/Kusto/KQL_ReleaseNote.md | 9 --- .../KQL/gtest_KQL_AggregateFunctions.cpp | 66 +------------------ 2 files changed, 1 insertion(+), 74 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index d077d199dd93..0a9a6d87df12 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -21,18 +21,9 @@ - [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` -- [percentiles_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - **do not support `range()` now** - `Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName` - `Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName` - - [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` -- [percentilesw_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - **do not support `range()` now** - `DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))` - - [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) `Customers | summarize t = percentile(Age, 50) by FirstName` diff --git a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp index aaa980ddccef..1a532f27ac08 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp @@ -1,71 +1,7 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -namespace -{ -using namespace DB; -using namespace std::literals; -} -class ParserAggregateFuncTest : public ::testing::TestWithParam, ParserTestCase>> -{}; - -TEST_P(ParserAggregateFuncTest, ParseQuery) -{ const auto & parser = std::get<0>(GetParam()); - const auto & [input_text, expected_ast] = std::get<1>(GetParam()); - ASSERT_NE(nullptr, parser); - if (expected_ast) - { - if (std::string(expected_ast).starts_with("throws")) - { - EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } - else - { - ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); - if (std::string("CREATE USER or ALTER USER query") != parser->getName() - && std::string("ATTACH access entity query") != parser->getName()) - { - EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); - } - else - { - if (input_text.starts_with("ATTACH")) - { - auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); - EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); - } - else - { - EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); - } - } - } - } - else - { - ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } -} - -INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserAggregateFuncTest, +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), ::testing::ValuesIn(std::initializer_list{ From 5bcaa849cf685b71b4c9f86bccf6e70602d2958a Mon Sep 17 00:00:00 2001 From: root Date: Wed, 24 Aug 2022 06:47:39 -0700 Subject: [PATCH 093/342] updated release notes to resolve conflicts --- src/Parsers/Kusto/KQL_ReleaseNote.md | 46 +++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 0a9a6d87df12..c3c038eb90d8 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,6 +1,50 @@ ## KQL implemented features -# August XX, 2022 +# August 29, 2022 + +## Dynamic functions +- [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) + `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` + +- [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` + `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` + +- [array_slice](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayslicefunction) + `print array_slice(dynamic([1,2,3]), 1, 2) == dynamic([2, 3])` + `print array_slice(dynamic([1,2,3,4,5]), 2, -1) == dynamic([3, 4, 5])` + `print array_slice(dynamic([1,2,3,4,5]), -3, -2) == dynamic([3, 4])` + +- [array_split](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysplitfunction) + `print array_split(dynamic([1,2,3,4,5]), 2) == dynamic([[1,2],[3,4,5]])` + `print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])) == dynamic([[1],[2,3],[4,5]])` + +## DateTimeFunctions + +- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) + `print ago(2h)` + +- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) + `print endofday(datetime(2017-01-01 10:10:17), -1)` + `print endofday(datetime(2017-01-01 10:10:17), 1)` + `print endofday(datetime(2017-01-01 10:10:17))` + +- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) + `print endofmonth(datetime(2017-01-01 10:10:17), -1)` + `print endofmonth(datetime(2017-01-01 10:10:17), 1)` + `print endofmonth(datetime(2017-01-01 10:10:17))` + +- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) + `print endofweek(datetime(2017-01-01 10:10:17), 1)` + `print endofweek(datetime(2017-01-01 10:10:17), -1)` + `print endofweek(datetime(2017-01-01 10:10:17))` + +- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) + `print endofyear(datetime(2017-01-01 10:10:17), -1)` + `print endofyear(datetime(2017-01-01 10:10:17), 1)` + `print endofyear(datetime(2017-01-01 10:10:17))` ## Aggregate Functions - [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) From 8218df2c103dafd2aa646fba665ad3d30170e9ae Mon Sep 17 00:00:00 2001 From: root Date: Wed, 24 Aug 2022 06:57:54 -0700 Subject: [PATCH 094/342] updated release notes to resolve conflicts --- src/Parsers/Kusto/KQL_ReleaseNote.md | 98 ++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 7 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index c3c038eb90d8..7e89108e78e2 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,7 +1,7 @@ + ## KQL implemented features # August 29, 2022 - ## Dynamic functions - [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` @@ -46,7 +46,27 @@ `print endofyear(datetime(2017-01-01 10:10:17), 1)` `print endofyear(datetime(2017-01-01 10:10:17))` -## Aggregate Functions +- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) + `print make_datetime(2017,10,01)` + `print make_datetime(2017,10,01,12,10)` + `print make_datetime(2017,10,01,12,11,0.1234567)` + +- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) + `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` + `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` + `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` + +- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) + `print unixtime_microseconds_todatetime(1546300800000000)` + +- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) + `print unixtime_milliseconds_todatetime(1546300800000)` + +- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) + `print unixtime_nanoseconds_todatetime(1546300800000000000)` + +## Aggregate Functions + - [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) `Customers | summarize t = stdev(Age) by FirstName` @@ -75,8 +95,59 @@ `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` # August 15, 2022 - -## DateTpye + **double quote support** + ``print res = strcat("double ","quote")`` +## Aggregate functions + - [bin_at](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binatfunction) + `print res = bin_at(6.5, 2.5, 7)` + `print res = bin_at(1h, 1d, 12h)` + `print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))` + `print res = bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0))` + + - [array_index_of](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayindexoffunction) + *Supports only basic lookup. Do not support start_index, length and occurrence* + `print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')` + `print output = array_index_of(dynamic([1, 2, 3]), 2)` + - [array_sum](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array-sum-function) + `print output = array_sum(dynamic([2, 5, 3]))` + `print output = array_sum(dynamic([2.5, 5.5, 3]))` + - [array_length](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraylengthfunction) + `print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))` + `print output = array_length(dynamic([1, 2, 3]))` + +## Conversion +- [tobool / toboolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/toboolfunction) + `print tobool(true) == true` + `print toboolean(false) == false` + `print tobool(0) == false` + `print toboolean(19819823) == true` + `print tobool(-2) == true` + `print isnull(toboolean('a'))` + `print tobool('true') == true` + `print toboolean('false') == false` + +- [todouble / toreal](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todoublefunction) + `print todouble(4) == 4` + `print toreal(4.2) == 4.2` + `print isnull(todouble('a'))` + `print toreal('-0.3') == -0.3` + +- [toint](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tointfunction) + `print isnull(toint('a'))` + `print toint(4) == 4` + `print toint('4') == 4` + `print isnull(toint(4.2))` + +- [tostring](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tostringfunction) + `print tostring(123) == '123'` + `print tostring('asd') == 'asd'` + +## Data Types + - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) + *Supports only 1D array* + `print output = dynamic(['a', 'b', 'c'])` + `print output = dynamic([1, 2, 3])` + - [bool,boolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/bool) `print bool(1)` `print boolean(0)` @@ -130,8 +201,6 @@ - [trim_start](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimstartfunction) `print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))` - - ## DateTimeFunctions - [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) `print startofyear(datetime(2017-01-01 10:10:17), -1)` @@ -230,12 +299,26 @@ `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` +- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) + `print ipv6_compare('::ffff:7f00:1', '127.0.0.1') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') < 0` + `print ipv6_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == 0` +- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) + `print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false` + `print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true` - [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` - +- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6-maskfunction) + `print parse_ipv6_mask('127.0.0.1', 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'` + `print parse_ipv6_mask('fe80::85d:e82c:9446:7994', 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'` # August 1, 2022 @@ -262,6 +345,7 @@ OR pass dialect setting with '--'. For example : ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` + - **strcmp** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcmpfunction) `print strcmp('abc','ABC')` From 0683b4cb536716e3fcb2db9706e45f4f9d671ad4 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 24 Aug 2022 07:24:06 -0700 Subject: [PATCH 095/342] retry resolve merge conflict --- src/Parsers/Kusto/KQL_ReleaseNote.md | 59 ++++++++++++++-------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 7e89108e78e2..ec1ed8c2c557 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -2,6 +2,34 @@ ## KQL implemented features # August 29, 2022 +## Aggregate Functions +- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) + `Customers | summarize t = stdev(Age) by FirstName` + +- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction) + `Customers | summarize t = stdevif(Age, Age < 10) by FirstName` + +- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction) + `Customers | summarize t = binary_all_and(Age) by FirstName` + +- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction) + `Customers | summarize t = binary_all_or(Age) by FirstName` + +- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction) + `Customers | summarize t = binary_all_xor(Age) by FirstName` + +- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` + +- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` + +- [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize t = percentile(Age, 50) by FirstName` + +- [percentilew](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` + ## Dynamic functions - [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` @@ -63,36 +91,7 @@ `print unixtime_milliseconds_todatetime(1546300800000)` - [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) - `print unixtime_nanoseconds_todatetime(1546300800000000000)` - -## Aggregate Functions - -- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) - `Customers | summarize t = stdev(Age) by FirstName` - -- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction) - `Customers | summarize t = stdevif(Age, Age < 10) by FirstName` - -- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction) - `Customers | summarize t = binary_all_and(Age) by FirstName` - -- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction) - `Customers | summarize t = binary_all_or(Age) by FirstName` - -- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction) - `Customers | summarize t = binary_all_xor(Age) by FirstName` - -- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` - -- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` - -- [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `Customers | summarize t = percentile(Age, 50) by FirstName` - -- [percentilew](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` + `print unixtime_nanoseconds_todatetime(1546300800000000000)` # August 15, 2022 **double quote support** From 130ddef2404128d9f9d2e7259701ee7b2f7de254 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 11 Aug 2022 17:11:22 -0700 Subject: [PATCH 096/342] Resubmit Aggregate functions - array_index_of, length, sum and dynamic data type --- src/Parsers/Kusto/KQL_ReleaseNote.md | 130 +----------------- .../KustoFunctions/KQLDynamicFunctions.cpp | 23 ++-- src/Parsers/tests/gtest_Parser.cpp | 36 +++++ 3 files changed, 51 insertions(+), 138 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index ec1ed8c2c557..31f09cc50710 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,108 +1,8 @@ ## KQL implemented features -# August 29, 2022 -## Aggregate Functions -- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) - `Customers | summarize t = stdev(Age) by FirstName` - -- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction) - `Customers | summarize t = stdevif(Age, Age < 10) by FirstName` - -- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction) - `Customers | summarize t = binary_all_and(Age) by FirstName` - -- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction) - `Customers | summarize t = binary_all_or(Age) by FirstName` - -- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction) - `Customers | summarize t = binary_all_xor(Age) by FirstName` - -- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` - -- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` - -- [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `Customers | summarize t = percentile(Age, 50) by FirstName` - -- [percentilew](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` - -## Dynamic functions -- [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) - `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` - -- [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) - `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` - `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` - `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` - `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` - -- [array_slice](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayslicefunction) - `print array_slice(dynamic([1,2,3]), 1, 2) == dynamic([2, 3])` - `print array_slice(dynamic([1,2,3,4,5]), 2, -1) == dynamic([3, 4, 5])` - `print array_slice(dynamic([1,2,3,4,5]), -3, -2) == dynamic([3, 4])` - -- [array_split](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysplitfunction) - `print array_split(dynamic([1,2,3,4,5]), 2) == dynamic([[1,2],[3,4,5]])` - `print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])) == dynamic([[1],[2,3],[4,5]])` - -## DateTimeFunctions - -- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) - `print ago(2h)` - -- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) - `print endofday(datetime(2017-01-01 10:10:17), -1)` - `print endofday(datetime(2017-01-01 10:10:17), 1)` - `print endofday(datetime(2017-01-01 10:10:17))` - -- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) - `print endofmonth(datetime(2017-01-01 10:10:17), -1)` - `print endofmonth(datetime(2017-01-01 10:10:17), 1)` - `print endofmonth(datetime(2017-01-01 10:10:17))` - -- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) - `print endofweek(datetime(2017-01-01 10:10:17), 1)` - `print endofweek(datetime(2017-01-01 10:10:17), -1)` - `print endofweek(datetime(2017-01-01 10:10:17))` - -- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) - `print endofyear(datetime(2017-01-01 10:10:17), -1)` - `print endofyear(datetime(2017-01-01 10:10:17), 1)` - `print endofyear(datetime(2017-01-01 10:10:17))` - -- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) - `print make_datetime(2017,10,01)` - `print make_datetime(2017,10,01,12,10)` - `print make_datetime(2017,10,01,12,11,0.1234567)` - -- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) - `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` - `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` - `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` - -- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) - `print unixtime_microseconds_todatetime(1546300800000000)` - -- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) - `print unixtime_milliseconds_todatetime(1546300800000)` - -- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) - `print unixtime_nanoseconds_todatetime(1546300800000000000)` - # August 15, 2022 - **double quote support** - ``print res = strcat("double ","quote")`` ## Aggregate functions - - [bin_at](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binatfunction) - `print res = bin_at(6.5, 2.5, 7)` - `print res = bin_at(1h, 1d, 12h)` - `print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))` - `print res = bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0))` - - [array_index_of](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayindexoffunction) *Supports only basic lookup. Do not support start_index, length and occurrence* `print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')` @@ -113,35 +13,7 @@ - [array_length](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraylengthfunction) `print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))` `print output = array_length(dynamic([1, 2, 3]))` - -## Conversion -- [tobool / toboolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/toboolfunction) - `print tobool(true) == true` - `print toboolean(false) == false` - `print tobool(0) == false` - `print toboolean(19819823) == true` - `print tobool(-2) == true` - `print isnull(toboolean('a'))` - `print tobool('true') == true` - `print toboolean('false') == false` - -- [todouble / toreal](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todoublefunction) - `print todouble(4) == 4` - `print toreal(4.2) == 4.2` - `print isnull(todouble('a'))` - `print toreal('-0.3') == -0.3` - -- [toint](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tointfunction) - `print isnull(toint('a'))` - `print toint(4) == 4` - `print toint('4') == 4` - `print isnull(toint(4.2))` - -- [tostring](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tostringfunction) - `print tostring(123) == '123'` - `print tostring('asd') == 'asd'` - -## Data Types +## DateType - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) *Supports only 1D array* `print output = dynamic(['a', 'b', 'c'])` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp index a6ff0a374ebc..3f534679c584 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -34,16 +34,23 @@ bool ArrayIif::convertImpl(String &out,IParser::Pos &pos) bool ArrayIndexOf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String array = getConvertedArgument(fn_name, pos); + ++pos; + const auto needle = getConvertedArgument(fn_name, pos); + out = "minus(indexOf(" + array + ", " + needle + ") , 1)"; + + return true; } bool ArrayLength::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "length"); } bool ArrayReverse::convertImpl(String &out,IParser::Pos &pos) @@ -111,9 +118,7 @@ bool ArraySplit::convertImpl(String &out,IParser::Pos &pos) bool ArraySum::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "arraySum"); } bool BagKeys::convertImpl(String &out,IParser::Pos &pos) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 960873d23428..fe7453bd2048 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -587,5 +587,41 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName", "SELECT\n FirstName,\n groupUniqArrayIf(10)(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "print output = dynamic([1, 2, 3])", + "SELECT [1, 2, 3] AS output" + }, + { + "print output = dynamic(['a', 'b', 'c'])", + "SELECT ['a', 'b', 'c'] AS output" + }, + { + "print output = array_index_of(dynamic([1, 2, 3]), 2)", + "SELECT indexOf([1, 2, 3], 2) - 1 AS output" + }, + { + "print output = array_index_of(dynamic(['a', 'b', 'c']), 'b')", + "SELECT indexOf(['a', 'b', 'c'], 'b') - 1 AS output" + }, + { + "print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')", + "SELECT indexOf(['John', 'Denver', 'Bob', 'Marley'], 'Marley') - 1 AS output" + }, + { + "print output = array_length(dynamic([1, 2, 3]))", + "SELECT length([1, 2, 3]) AS output" + }, + { + "print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))", + "SELECT length(['John', 'Denver', 'Bob', 'Marley']) AS output" + }, + { + "print output = array_sum(dynamic([2, 5, 3]))", + "SELECT arraySum([2, 5, 3]) AS output" + }, + { + "print output = array_sum(dynamic([2.5, 5.5, 3]))", + "SELECT arraySum([2.5, 5.5, 3]) AS output" } }))); From 3a02163d4b781e4e9459ab3ecd98e458160acfd2 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 12 Aug 2022 07:32:42 -0700 Subject: [PATCH 097/342] Implement KQL IPv6 functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 16 ++--- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 63 +++++++++++++++---- 2 files changed, 58 insertions(+), 21 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 31f09cc50710..043976466124 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -154,40 +154,40 @@ `print bitset_count_ones(42) == 3` ## IP functions -- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) +- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` `print format_ipv4(3232236031, 24) == '192.168.1.0'` -- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) +- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` -- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) +- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` -- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) +- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` -- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) +- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) `print ipv6_compare('::ffff:7f00:1', '127.0.0.1') == 0` `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') < 0` `print ipv6_compare('192.168.1.1/24', '192.168.1.255/24') == 0` `print ipv6_compare('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == 0` `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == 0` -- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) +- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) `print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true` `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false` `print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true` `print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true` `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true` -- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) +- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` -- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6-maskfunction) +- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) `print parse_ipv6_mask('127.0.0.1', 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'` `print parse_ipv6_mask('fe80::85d:e82c:9446:7994', 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 765912ff9361..a0e7cb30f79d 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -82,7 +82,7 @@ bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) const auto lhs = getArgument(function_name, pos); const auto rhs = getArgument(function_name, pos); const auto mask = getOptionalArgument(function_name, pos); - out = std::format("{} = 0", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask ? *mask : "32"}, pos.max_depth)); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask ? *mask : "32"}, pos.max_depth)); return true; } @@ -172,16 +172,43 @@ bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) bool Ipv6Compare::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + const auto calculated_mask = mask ? *mask : "128"; + out = std::format( + "if(length(splitByChar('/', {1}) as lhs_tokens_{0}) > 2 or length(splitByChar('/', {2}) as rhs_tokens_{0}) > 2 " + "or isNull(IPv6StringToNumOrNull(lhs_tokens_{0}[1]) as lhs_ipv6_{0}) or length(lhs_tokens_{0}) = 2 " + "and isNull((if(isIPv4String(lhs_tokens_{0}[1]), 96, 0) + toUInt8OrNull(lhs_tokens_{0}[-1])) as lhs_suffix_{0}) " + "or isNull(IPv6StringToNumOrNull(rhs_tokens_{0}[1]) as rhs_ipv6_{0}) or length(rhs_tokens_{0}) = 2 " + "and isNull((if(isIPv4String(rhs_tokens_{0}[1]), 96, 0) + toUInt8OrNull(rhs_tokens_{0}[-1])) as rhs_suffix_{0}) " + "or isNull(toUInt8(min2({3}, min2(ifNull(lhs_suffix_{0}, 128), ifNull(rhs_suffix_{0}, 128)))) as suffix_{0}) " + "or isNull(bitShiftLeft(bitShiftRight(bitNot(reinterpretAsFixedString(0::UInt128)), (128 - suffix_{0}) as zeroes_{0}), " + "zeroes_{0}) as mask_{0}) or isNull(bitAnd(lhs_ipv6_{0}, mask_{0}) as lhs_base_{0}) " + "or isNull(bitAnd(rhs_ipv6_{0}, mask_{0}) as rhs_base_{0}), null, " + "multiIf(lhs_base_{0} < rhs_base_{0}, -1, lhs_base_{0} > rhs_base_{0}, 1, 0))", + generateUniqueIdentifier(), + lhs, + rhs, + calculated_mask); + return true; } bool Ipv6IsMatch::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv6_compare", {lhs, rhs, mask ? *mask : "128"}, pos.max_depth)); + return true; } bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) @@ -192,19 +219,29 @@ bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); out = std::format( - "if(isNull(ifNull(if(isNull({1} as ipv4_{2}), null, IPv4ToIPv6(ipv4_{2})), IPv6StringToNumOrNull({0})) as ipv6_{2}), null, " - "arrayStringConcat(flatten(extractAllGroups(lower(hex(assumeNotNull(ipv6_{2}))), '([\\da-f]{{4}})')), ':'))", + "if(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(IPv6StringToNumOrNull(tokens_{1}[1]) as ip_{1}) " + "or length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " + "arrayStringConcat(flatten(extractAllGroups(lower(hex(tupleElement(IPv6CIDRToRange(assumeNotNull(ip_{1}), toUInt8(ifNull(mask_{1} " + "+ if(isIPv4String(tokens_{1}[1]), 96, 0), 128))), 1))), '([\\da-f]{{4}})')), ':'))", ip_address, - kqlCallToExpression("parse_ipv4", {ip_address}, pos.max_depth), generateUniqueIdentifier()); return true; } bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto mask = getArgument(function_name, pos); + out = std::format( + "if(isNull({0} as ipv4), {1}, {2})", + kqlCallToExpression("parse_ipv4_mask", {ip_address, mask}, pos.max_depth), + kqlCallToExpression("parse_ipv6", {"strcat(tostring(parse_ipv6(" + ip_address + ")), '/', tostring(" + mask + "))"}, pos.max_depth), + kqlCallToExpression("parse_ipv6", {"format_ipv4(ipv4)"}, pos.max_depth)); + return true; } bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) From e18f34f3a727bc08b34ab685e7815c1db044f3e7 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 12 Aug 2022 07:54:38 -0700 Subject: [PATCH 098/342] Correct rebase error --- src/Parsers/Kusto/KQL_ReleaseNote.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 043976466124..a6073d8e00d9 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -154,40 +154,40 @@ `print bitset_count_ones(42) == 3` ## IP functions -- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) +- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` `print format_ipv4(3232236031, 24) == '192.168.1.0'` -- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) +- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` -- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) +- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` -- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) +- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` -- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) +- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) `print ipv6_compare('::ffff:7f00:1', '127.0.0.1') == 0` `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') < 0` `print ipv6_compare('192.168.1.1/24', '192.168.1.255/24') == 0` `print ipv6_compare('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == 0` `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == 0` -- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) +- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) `print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true` `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false` `print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true` `print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true` `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true` -- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) +- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` -- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) +- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) `print parse_ipv6_mask('127.0.0.1', 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'` `print parse_ipv6_mask('fe80::85d:e82c:9446:7994', 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'` From bf13deb57a69b2c9e15683c5013e4e7a3ed63869 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 12 Aug 2022 09:54:49 -0700 Subject: [PATCH 099/342] Add unit tests --- src/Parsers/tests/KQL/gtest_KQL_IP.cpp | 30 +++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp index c2257d055009..731715b45424 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -51,6 +51,18 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, "print ipv4_compare(A, B, C)", "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" }, + { + "print ipv6_compare(A, B)", + "SELECT if\\(\\(length\\(splitByChar\\('/', A\\) AS lhs_tokens_\\d+\\) > 2\\) OR \\(length\\(splitByChar\\('/', B\\) AS rhs_tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(lhs_tokens_\\d+\\[1\\]\\) AS lhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(lhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(lhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(lhs_tokens_\\d+\\[-1\\]\\)\\) AS lhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(IPv6StringToNumOrNull\\(rhs_tokens_\\d+\\[1\\]\\) AS rhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(rhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(rhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(rhs_tokens_\\d+\\[-1\\]\\)\\) AS rhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(toUInt8\\(min2\\(128, min2\\(ifNull\\(lhs_suffix_\\d+, 128\\), ifNull\\(rhs_suffix_\\d+, 128\\)\\)\\)\\) AS suffix_\\d+\\) IS NULL\\) OR \\(\\(bitShiftLeft\\(bitShiftRight\\(bitNot\\(reinterpretAsFixedString\\(CAST\\('0', 'UInt128'\\)\\)\\), 128 - suffix_\\d+ AS zeroes_\\d+\\), zeroes_\\d+\\) AS mask_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(lhs_ipv6_\\d+, mask_\\d+\\) AS lhs_base_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(rhs_ipv6_\\d+, mask_\\d+\\) AS rhs_base_\\d+\\) IS NULL\\), NULL, multiIf\\(lhs_base_\\d+ < rhs_base_\\d+, -1, lhs_base_\\d+ > rhs_base_\\d+, 1, 0\\)\\)" + }, + { + "print ipv6_compare(A, B, C)", + "SELECT if\\(\\(length\\(splitByChar\\('/', A\\) AS lhs_tokens_\\d+\\) > 2\\) OR \\(length\\(splitByChar\\('/', B\\) AS rhs_tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(lhs_tokens_\\d+\\[1\\]\\) AS lhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(lhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(lhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(lhs_tokens_\\d+\\[-1\\]\\)\\) AS lhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(IPv6StringToNumOrNull\\(rhs_tokens_\\d+\\[1\\]\\) AS rhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(rhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(rhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(rhs_tokens_\\d+\\[-1\\]\\)\\) AS rhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(toUInt8\\(min2\\(C, min2\\(ifNull\\(lhs_suffix_\\d+, 128\\), ifNull\\(rhs_suffix_\\d+, 128\\)\\)\\)\\) AS suffix_\\d+\\) IS NULL\\) OR \\(\\(bitShiftLeft\\(bitShiftRight\\(bitNot\\(reinterpretAsFixedString\\(CAST\\('0', 'UInt128'\\)\\)\\), 128 - suffix_\\d+ AS zeroes_\\d+\\), zeroes_\\d+\\) AS mask_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(lhs_ipv6_\\d+, mask_\\d+\\) AS lhs_base_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(rhs_ipv6_\\d+, mask_\\d+\\) AS rhs_base_\\d+\\) IS NULL\\), NULL, multiIf\\(lhs_base_\\d+ < rhs_base_\\d+, -1, lhs_base_\\d+ > rhs_base_\\d+, 1, 0\\)\\)" + }, + { + "print ipv4_is_in_range(A, B)", + "SELECT if\\(\\(\\(IPv4StringToNumOrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS range_start_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS range_mask_\\d+\\) IS NULL\\), NULL, bitXor\\(range_start_ip_\\d+, bitAnd\\(ip_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - range_mask_\\d+\\) - 1\\)\\)\\)\\) = 0\\)" + }, { "print ipv4_is_match(A, B)", "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" @@ -60,12 +72,12 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" }, { - "print parse_ipv4_mask(A, B)", - "SELECT if\\(\\(\\(toIPv4OrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(toUInt8OrNull\\(toString\\(B\\)\\) AS mask_\\d+\\) IS NULL\\), NULL, toUInt32\\(IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(max2\\(0, min2\\(32, assumeNotNull\\(mask_\\d+\\)\\)\\)\\)\\).1\\)\\)" + "print ipv6_is_match(A, B)", + "SELECT if\\(\\(length\\(splitByChar\\('/', A\\) AS lhs_tokens_\\d+\\) > 2\\) OR \\(length\\(splitByChar\\('/', B\\) AS rhs_tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(lhs_tokens_\\d+\\[1\\]\\) AS lhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(lhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(lhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(lhs_tokens_\\d+\\[-1\\]\\)\\) AS lhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(IPv6StringToNumOrNull\\(rhs_tokens_\\d+\\[1\\]\\) AS rhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(rhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(rhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(rhs_tokens_\\d+\\[-1\\]\\)\\) AS rhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(toUInt8\\(min2\\(128, min2\\(ifNull\\(lhs_suffix_\\d+, 128\\), ifNull\\(rhs_suffix_\\d+, 128\\)\\)\\)\\) AS suffix_\\d+\\) IS NULL\\) OR \\(\\(bitShiftLeft\\(bitShiftRight\\(bitNot\\(reinterpretAsFixedString\\(CAST\\('0', 'UInt128'\\)\\)\\), 128 - suffix_\\d+ AS zeroes_\\d+\\), zeroes_\\d+\\) AS mask_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(lhs_ipv6_\\d+, mask_\\d+\\) AS lhs_base_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(rhs_ipv6_\\d+, mask_\\d+\\) AS rhs_base_\\d+\\) IS NULL\\), NULL, multiIf\\(lhs_base_\\d+ < rhs_base_\\d+, -1, lhs_base_\\d+ > rhs_base_\\d+, 1, 0\\)\\) = 0" }, { - "print ipv4_is_in_range(A, B)", - "SELECT if\\(\\(\\(IPv4StringToNumOrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS range_start_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS range_mask_\\d+\\) IS NULL\\), NULL, bitXor\\(range_start_ip_\\d+, bitAnd\\(ip_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - range_mask_\\d+\\) - 1\\)\\)\\)\\) = 0\\)" + "print ipv6_is_match(A, B, C)", + "SELECT if\\(\\(length\\(splitByChar\\('/', A\\) AS lhs_tokens_\\d+\\) > 2\\) OR \\(length\\(splitByChar\\('/', B\\) AS rhs_tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(lhs_tokens_\\d+\\[1\\]\\) AS lhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(lhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(lhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(lhs_tokens_\\d+\\[-1\\]\\)\\) AS lhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(IPv6StringToNumOrNull\\(rhs_tokens_\\d+\\[1\\]\\) AS rhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(rhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(rhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(rhs_tokens_\\d+\\[-1\\]\\)\\) AS rhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(toUInt8\\(min2\\(C, min2\\(ifNull\\(lhs_suffix_\\d+, 128\\), ifNull\\(rhs_suffix_\\d+, 128\\)\\)\\)\\) AS suffix_\\d+\\) IS NULL\\) OR \\(\\(bitShiftLeft\\(bitShiftRight\\(bitNot\\(reinterpretAsFixedString\\(CAST\\('0', 'UInt128'\\)\\)\\), 128 - suffix_\\d+ AS zeroes_\\d+\\), zeroes_\\d+\\) AS mask_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(lhs_ipv6_\\d+, mask_\\d+\\) AS lhs_base_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(rhs_ipv6_\\d+, mask_\\d+\\) AS rhs_base_\\d+\\) IS NULL\\), NULL, multiIf\\(lhs_base_\\d+ < rhs_base_\\d+, -1, lhs_base_\\d+ > rhs_base_\\d+, 1, 0\\)\\) = 0" }, { "print ipv4_is_private(A)", @@ -79,8 +91,16 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, "print parse_ipv4(A)", "SELECT multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)" }, + { + "print parse_ipv4_mask(A, B)", + "SELECT if\\(\\(\\(toIPv4OrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(toUInt8OrNull\\(toString\\(B\\)\\) AS mask_\\d+\\) IS NULL\\), NULL, toUInt32\\(IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(max2\\(0, min2\\(32, assumeNotNull\\(mask_\\d+\\)\\)\\)\\)\\).1\\)\\)" + }, { "print parse_ipv6(A)", - "SELECT if\\(\\(ifNull\\(if\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS ipv4_\\d+\\) IS NULL, NULL, IPv4ToIPv6\\(ipv4_\\d+\\)\\), IPv6StringToNumOrNull\\(A\\)\\) AS ipv6_\\d+\\) IS NULL, NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(assumeNotNull\\(ipv6_\\d+\\)\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)" + "SELECT if\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)" + }, + { + "print parse_ipv6_mask(A, B)", + "SELECT if\\(\\(if\\(\\(\\(toIPv4OrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(toUInt8OrNull\\(toString\\(B\\)\\) AS mask_\\d+\\) IS NULL\\), NULL, toUInt32\\(IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(max2\\(0, min2\\(32, assumeNotNull\\(mask_\\d+\\)\\)\\)\\)\\).1\\)\\) AS ipv4\\) IS NULL, if\\(\\(length\\(splitByChar\\('/', concat\\(ifNull\\(toString\\(if\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)\\), ''\\), '/', ifNull\\(toString\\(B\\), ''\\)\\)\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\), if\\(\\(length\\(splitByChar\\('/', ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(ipv4\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(ipv4\\) = 'String'\\)\\) OR \\(32 < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(ipv4\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\)\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)\\)" } }))); From 5ad64959ef5947f36100f0adeea5fd550a68978d Mon Sep 17 00:00:00 2001 From: kashwy Date: Fri, 12 Aug 2022 11:47:25 -0700 Subject: [PATCH 100/342] Kusto-phase2: add bin_at function. fix trim error --- src/Parsers/Kusto/KQL_ReleaseNote.md | 8 +++++++ .../KustoFunctions/KQLGeneralFunctions.cpp | 24 ++++++++++--------- .../KustoFunctions/KQLStringFunctions.cpp | 8 +++---- .../tests/KQL/gtest_KQL_StringFunctions.cpp | 18 +++++++++++--- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index a6073d8e00d9..91b3630f3f70 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -2,7 +2,15 @@ ## KQL implemented features # August 15, 2022 + **double quote support** + ``print res = strcat("double ","quote")`` ## Aggregate functions + - [bin_at](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binatfunction) + `print res = bin_at(6.5, 2.5, 7)` + `print res = bin_at(1h, 1d, 12h)` + `print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))` + `print res = bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0))` + - [array_index_of](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayindexoffunction) *Supports only basic lookup. Do not support start_index, length and occurrence* `print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp index 714265633d5e..dd79cc06898b 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -29,36 +29,38 @@ bool Bin::convertImpl(String &out,IParser::Pos &pos) bool BinAt::convertImpl(String & out,IParser::Pos & pos) { - ParserKQLDateTypeTimespan time_span; double bin_size; - const String fn_name = getKQLFunctionName(pos); - if (fn_name.empty()) return false; + ++pos; + String origal_expr(pos->begin, pos->end); String expression_str = getConvertedArgument(fn_name, pos); + ++pos; String bin_size_str = getConvertedArgument(fn_name, pos); + ++pos; String fixed_point_str = getConvertedArgument(fn_name, pos); - bin_size_str = bin_size_str.substr(0, bin_size_str.size()-1); - auto t1 = std::format("toFloat64({})", fixed_point_str); auto t2 = std::format("toFloat64({})", expression_str); int dir = t2 >= t1 ? 0 : -1; + bin_size = std::stod(bin_size_str); - if (time_span.parseConstKQLTimespan(bin_size_str)) + if (origal_expr == "datetime" or origal_expr == "date") { - bin_size = time_span.toSeconds(); - - out = std::format("toDateTime64({} + toInt64(({} -{}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); + out = std::format("toDateTime64({} + toInt64(({} - {}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); + } + else if (origal_expr == "timespan" or origal_expr =="time" or ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) + { + String bin_value = std::format("{} + toInt64(({} - {}) / {} + {}) * {}", t1, t2, t1, bin_size, dir, bin_size); + out = std::format("concat(toString( toInt32((({}) as x) / 3600)),':', toString( toInt32(x % 3600 / 60)),':',toString( toInt32(x % 3600 % 60)))", bin_value); } else { - bin_size = std::stod(bin_size_str); - out = std::format("{} + toInt64(({} -{}) / {} + {}) * {}", t1, t2, t1, bin_size, dir, bin_size); + out = std::format("{} + toInt64(({} - {}) / {} + {}) * {}", t1, t2, t1, bin_size, dir, bin_size); } return true; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 2a88a56b8442..285ed5c4a177 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -557,8 +557,8 @@ bool Trim::convertImpl(String & out,IParser::Pos & pos) String regex = getConvertedArgument(fn_name, pos); ++pos; String source = getConvertedArgument(fn_name, pos); - String ltrim = std::format("if ((replaceRegexpOne(concat('random_str', {0}) as srcl, concat('random_str', {1}),'') as dstl) = srcl, {0}, dstl)", source, regex); - out = std::format("if ((replaceRegexpOne(concat('random_str', reverse({0})) as srcr, concat('random_str', reverse({1})),'') as dstr) = srcr, {0}, reverse(dstr))", ltrim, regex); + String ltrim = std::format("if ((replaceRegexpOne(concat('start_random_str_', {0}) as srcl, concat('start_random_str_', {1}),'') as dstl) = srcl, {0}, dstl)", source, regex); + out = std::format("if ((replaceRegexpOne(concat({0}, '_end_random_str') as srcr, concat({1}, '_end_random_str'),'') as dstr) = srcr, {0}, dstr)", ltrim, regex); return true; } @@ -573,7 +573,7 @@ bool TrimEnd::convertImpl(String & out,IParser::Pos & pos) String regex = getConvertedArgument(fn_name, pos); ++pos; String source = getConvertedArgument(fn_name, pos); - out = std::format("if ((replaceRegexpOne(concat('random_str', reverse({0})) as src, concat('random_str', reverse({1})),'') as dst) = src, {0}, reverse(dst))", source, regex); + out = std::format("if ((replaceRegexpOne(concat({0}, '_end_random_str') as src, concat({1},'_end_random_str'),'') as dst) = src, {0}, dst)", source, regex); return true; } @@ -588,7 +588,7 @@ bool TrimStart::convertImpl(String & out,IParser::Pos & pos) String regex = getConvertedArgument(fn_name, pos); ++pos; String source = getConvertedArgument(fn_name, pos); - out = std::format("if ((replaceRegexpOne(concat('random_str', {0}) as src, concat('random_str', {1}),'') as dst) = src, {0}, dst)", source, regex); + out = std::format("if ((replaceRegexpOne(concat('start_random_str_', {0}) as src, concat('start_random_str_', {1}),'') as dst) = src, {0}, dst)", source, regex); return true; } diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp index f2994464e14a..0ef816646a44 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -99,15 +99,15 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserStringFuncTest, }, { "print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))", - "SELECT if((replaceRegexpOne(concat('random_str', concat('- ', 'Te st1', '// $')) AS src, concat('random_str', '[^\\\\w]+'), '') AS dst) = src, concat('- ', 'Te st1', '// $'), dst)" + "SELECT if((replaceRegexpOne(concat('start_random_str_', concat('- ', 'Te st1', '// $')) AS src, concat('start_random_str_', '[^\\\\w]+'), '') AS dst) = src, concat('- ', 'Te st1', '// $'), dst)" }, { "print trim_end('.com', 'bing.com')", - "SELECT if((replaceRegexpOne(concat('random_str', reverse('bing.com')) AS src, concat('random_str', reverse('.com')), '') AS dst) = src, 'bing.com', reverse(dst))" + "SELECT if((replaceRegexpOne(concat('bing.com', '_end_random_str') AS src, concat('.com', '_end_random_str'), '') AS dst) = src, 'bing.com', dst)" }, { "print trim('--', '--https://bing.com--')", - "SELECT if((replaceRegexpOne(concat('random_str', reverse(if((replaceRegexpOne(concat('random_str', '--https://bing.com--') AS srcl, concat('random_str', '--'), '') AS dstl) = srcl, '--https://bing.com--', dstl))) AS srcr, concat('random_str', reverse('--')), '') AS dstr) = srcr, if(dstl = srcl, '--https://bing.com--', dstl), reverse(dstr))" + "SELECT if((replaceRegexpOne(concat(if((replaceRegexpOne(concat('start_random_str_', '--https://bing.com--') AS srcl, concat('start_random_str_', '--'), '') AS dstl) = srcl, '--https://bing.com--', dstl), '_end_random_str') AS srcr, concat('--', '_end_random_str'), '') AS dstr) = srcr, if(dstl = srcl, '--https://bing.com--', dstl), dstr)" }, { "print bool(1)", @@ -164,6 +164,18 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserStringFuncTest, { "print timespan('1.5d')", "SELECT 129600." + }, + { + "print res = bin_at(6.5, 2.5, 7)", + "SELECT toFloat64(7) + (toInt64(((toFloat64(6.5) - toFloat64(7)) / 2.5) + -1) * 2.5) AS res" + }, + { + "print res = bin_at(1h, 1d, 12h)", + "SELECT concat(toString(toInt32(((toFloat64(43200.) + (toInt64(((toFloat64(3600.) - toFloat64(43200.)) / 86400) + -1) * 86400)) AS x) / 3600)), ':', toString(toInt32((x % 3600) / 60)), ':', toString(toInt32((x % 3600) % 60))) AS res" + }, + { + "print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))", + "SELECT toDateTime64(toFloat64(toDateTime64('1970-01-01 12:00:00.0', 9, 'UTC')) + (toInt64(((toFloat64(toDateTime64('2017-05-15 10:20:00.0', 9, 'UTC')) - toFloat64(toDateTime64('1970-01-01 12:00:00.0', 9, 'UTC'))) / 86400) + 0) * 86400), 9, 'UTC') AS res" } }))); From e0da9f2cf9a0da0bdc246a5547d8bcb629d80f80 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 12 Aug 2022 13:34:55 -0700 Subject: [PATCH 101/342] Implement some KQL conversion functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 28 +++++++++++++++ .../KustoFunctions/IParserKQLFunction.cpp | 8 +++++ .../Kusto/KustoFunctions/IParserKQLFunction.h | 1 + .../KustoFunctions/KQLCastingFunctions.cpp | 34 +++++++++++++----- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 11 ------ src/Parsers/tests/KQL/gtest_KQL_Binary.cpp | 2 -- .../tests/KQL/gtest_KQL_Conversion.cpp | 35 +++++++++++++++++++ 7 files changed, 97 insertions(+), 22 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 91b3630f3f70..b948dbf2443c 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -21,6 +21,34 @@ - [array_length](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraylengthfunction) `print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))` `print output = array_length(dynamic([1, 2, 3]))` + +## Conversion +- [tobool / toboolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/toboolfunction) + `print tobool(true) == true` + `print toboolean(false) == false` + `print tobool(0) == false` + `print toboolean(19819823) == true` + `print tobool(-2) == true` + `print isnull(toboolean('a'))` + `print tobool('true') == true` + `print toboolean('false') == false` + +- [todouble / toreal](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todoublefunction) + `print todouble(4) == 4` + `print toreal(4.2) == 4.2` + `print isnull(todouble('a'))` + `print toreal('-0.3') == -0.3` + +- [toint](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tointfunction) + `print isnull(toint('a'))` + `print toint(4) == 4` + `print toint('4') == 4` + `print isnull(toint(4.2))` + +- [tostring](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tostringfunction) + `print tostring(123) == '123'` + `print tostring('asd') == 'asd'` + ## DateType - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) *Supports only 1D array* diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 243b67b73082..2310879862ad 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -18,6 +18,8 @@ #include #include +#include + #include namespace DB @@ -80,6 +82,12 @@ bool IParserKQLFunction::directMapping(String & out, IParser::Pos & pos, const S return false; } +String IParserKQLFunction::generateUniqueIdentifier() +{ + static pcg32_unique unique_random_generator; + return std::to_string(unique_random_generator()); +} + String IParserKQLFunction::getArgument(const String & function_name, DB::IParser::Pos & pos) { if (auto optionalArgument = getOptionalArgument(function_name, pos)) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index b7f8427043cc..5758356b81e4 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -44,6 +44,7 @@ class IParserKQLFunction virtual bool convertImpl(String & out, IParser::Pos & pos) = 0; static bool directMapping(String & out, IParser::Pos & pos, const String & ch_fn); + static String generateUniqueIdentifier(); static String getArgument(const String & function_name, DB::IParser::Pos & pos); static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); static std::optional getOptionalArgument(const String & function_name, DB::IParser::Pos & pos); diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp index acbb7468d204..b6082995ec15 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -9,9 +9,17 @@ namespace DB { bool ToBool::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format( + "multiIf(toString({0}) = 'true', true, " + "toString({0}) = 'false', false, toInt64OrNull(toString({0})) != 0)", + param, + generateUniqueIdentifier()); + return true; } bool ToDateTime::convertImpl(String & out, IParser::Pos & pos) @@ -23,16 +31,24 @@ bool ToDateTime::convertImpl(String & out, IParser::Pos & pos) bool ToDouble::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("toFloat64OrNull(toString({0}))", param); + return true; } bool ToInt::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("toInt32OrNull(toString({0}))", param); + return true; } bool ToString::convertImpl(String & out, IParser::Pos & pos) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index a0e7cb30f79d..40f34f766b54 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -15,19 +15,8 @@ #include #include -#include - #include -namespace -{ -String generateUniqueIdentifier() -{ - static pcg32_unique unique_random_generator; - return std::to_string(unique_random_generator()); -} -} - namespace DB { bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) diff --git a/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp index 600965dcef60..a1b26ee56147 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp @@ -1,7 +1,5 @@ #include -#include -#include #include INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Binary, ParserTest, diff --git a/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp b/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp new file mode 100644 index 000000000000..27e1167bde31 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp @@ -0,0 +1,35 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P( + ParserKQLQuery_Conversion, + ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print tobool(A)", + "SELECT multiIf(toString(A) = 'true', true, toString(A) = 'false', false, toInt64OrNull(toString(A)) != 0)" + }, + { + "print toboolean(A)", + "SELECT multiIf(toString(A) = 'true', true, toString(A) = 'false', false, toInt64OrNull(toString(A)) != 0)" + }, + { + "print todouble(A)", + "SELECT toFloat64OrNull(toString(A))" + }, + { + "print toint(A)", + "SELECT toInt32OrNull(toString(A))" + }, + { + "print toreal(A)", + "SELECT toFloat64OrNull(toString(A))" + }, + { + "print tostring(A)", + "SELECT ifNull(toString(A), '')" + } +}))); From 45b574485fb33eae3205ffe62f75ff982a4f486f Mon Sep 17 00:00:00 2001 From: kashwy Date: Tue, 16 Aug 2022 06:48:49 -0700 Subject: [PATCH 102/342] Kusto-phase2 : finish make series --- src/Parsers/Kusto/ParserKQLMakeSeries.cpp | 300 +++++++++++++++------- src/Parsers/Kusto/ParserKQLMakeSeries.h | 20 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 34 ++- 3 files changed, 263 insertions(+), 91 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp index 0c658b0ba7f1..03528b6af1ea 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -116,7 +116,7 @@ bool ParserKQLMakeSeries :: parseFromToStepClause(FromToStepClause & from_to_ste if (end_pos == begin) end_pos = pos; - if (step_pos == begin) + if (String(step_pos->begin, step_pos->end) != "step") return false; if (String(from_pos->begin, from_pos->end) == "from") @@ -124,42 +124,236 @@ bool ParserKQLMakeSeries :: parseFromToStepClause(FromToStepClause & from_to_ste ++from_pos; auto end_from_pos = (to_pos != begin) ? to_pos : step_pos; --end_from_pos; - from_to_step.from = String(from_pos->begin, end_from_pos->end); + from_to_step.from_str = String(from_pos->begin, end_from_pos->end); } - if (to_pos != begin) + if (String(to_pos->begin, to_pos->end) == "to") { ++to_pos; --step_pos; - from_to_step.to = String(to_pos->begin, step_pos->end); - ++step_pos; + from_to_step.to_str = String(to_pos->begin, step_pos->end); ++step_pos; } --end_pos; - from_to_step.step = String(step_pos->begin, end_pos->end); + ++step_pos; + from_to_step.step_str = String(step_pos->begin, end_pos->end); + + if (String(step_pos->begin, step_pos->end) == "time" || String(step_pos->begin, step_pos->end) == "timespan" || ParserKQLDateTypeTimespan().parseConstKQLTimespan(from_to_step.step_str)) + { + from_to_step.is_timespan = true; + from_to_step.step = std::stod(getExprFromToken(from_to_step.step_str, pos.max_depth)); + } + else + from_to_step.step = std::stod(from_to_step.step_str); + return true; } +void ParserKQLMakeSeries :: makeNumericSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth) +{ + String start_str, end_str; + String sub_query, main_query; + + auto & aggregation_columns = kql_make_series.aggregation_columns; + auto & from_to_step = kql_make_series.from_to_step; + auto & subquery_columns = kql_make_series.subquery_columns; + auto & axis_column = kql_make_series.axis_column; + auto & group_expression = kql_make_series.group_expression; + auto step = from_to_step.step; + + if (!kql_make_series.from_to_step.from_str.empty()) + start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth); + + if (!kql_make_series.from_to_step.to_str.empty()) + end_str = getExprFromToken(from_to_step.to_str, max_depth); + + String bin_str, start, end; + + if (!start_str.empty()) // has from + { + bin_str = std::format(" toFloat64({0}) + (toInt64(((toFloat64({1}) - toFloat64({0})) / {2}) ) * {2}) AS {1}_ali ", + start_str, axis_column, step); + start = std::format("toUInt64({})", start_str); + } + else + { + bin_str = std::format(" toFloat64(toInt64((toFloat64({0}) ) / {1}) * {1}) AS {0}_ali ", + axis_column, step); + } + + auto sub_sub_query = std::format(" (Select {0}, {1}, {2} FROM {3} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", group_expression, subquery_columns, bin_str, table_name, axis_column); + + if (!end_str.empty()) + end = std::format("toUInt64({})", end_str); + + String range, condition; + if (!start_str.empty() && !end_str.empty()) + { + range = std::format("range({},{}, toUInt64({}))", start, end, step); + condition = std::format("{0}_ali >= {1} and {0}_ali <= {2}", axis_column, start, end); + } + else if (start_str.empty() && !end_str.empty()) + { + range = std::format("range(low, {} , toUInt64({}))", end, step); + condition = std::format("{}_ali <= {}", axis_column, end); + } + else if (!start_str.empty() && end_str.empty()) + { + range = std::format("range({}, high, toUInt64({}))", start, step); + condition = std::format("{}_ali >= {}", axis_column, start); + } + else + { + range = std::format("range(low, high, toUInt64({}))", step); + condition = "1"; //true + } + + auto range_len = std::format("length({})", range); + main_query = std::format("{} ", group_expression); + + auto axis_and_agg_alias_list = axis_column; + auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}",axis_column); //tupleElement(pp,2) as PriceAvg ,tupleElement(pp,1) + int idx = 2; + for (auto agg_column : aggregation_columns) + { + String agg_group_column = std::format("arrayConcat(groupArrayIf ({}_ali,{}) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", + agg_column.alias, condition, agg_column.default_value, range_len, range_len, agg_column.alias); + main_query +=", " + agg_group_column; + + axis_and_agg_alias_list +=", " + agg_column.alias; + final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); + } + + auto axis_str = std::format("arrayDistinct(arrayConcat(groupArrayIf({0}_ali, {1}), arrayMap( x->(toFloat64(x)), {2})) ) as {0}", + axis_column, condition,range); + + main_query += ", " + axis_str; + auto sub_group_by = std::format("{}", group_expression); + + sub_query = std::format("( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} GROUP BY {} )", + axis_column, axis_column,step, axis_and_agg_alias_list,main_query,sub_sub_query, sub_group_by); + + main_query = std::format("{},{}", group_expression, final_axis_agg_alias_list); + + kql_make_series.sub_query = std::move(sub_query); + kql_make_series.main_query = std::move(main_query); +} + +void ParserKQLMakeSeries :: makeTimeSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth) +{ + const uint64_t era_diff = 62135596800; // this magic number is the differicen is second form 0001-01-01 (Azure start time ) and 1970-01-01 (CH start time) + + String start_str, end_str; + String sub_query, main_query; + + auto & aggregation_columns = kql_make_series.aggregation_columns; + auto & from_to_step = kql_make_series.from_to_step; + auto & subquery_columns = kql_make_series.subquery_columns; + auto & axis_column = kql_make_series.axis_column; + auto & group_expression = kql_make_series.group_expression; + auto step = from_to_step.step; + + if (!kql_make_series.from_to_step.from_str.empty()) + start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth); + + if (!kql_make_series.from_to_step.to_str.empty()) + end_str = getExprFromToken(from_to_step.to_str, max_depth); + + String bin_str, start, end; + + uint64_t diff = 0; + if (!start_str.empty()) // has from + { + bin_str = std::format(" toFloat64(toDateTime64({0}, 9, 'UTC')) + (toInt64(((toFloat64(toDateTime64({1}, 9, 'UTC')) - toFloat64(toDateTime64({0}, 9, 'UTC'))) / {2}) ) * {2}) AS {1}_ali ", + start_str, axis_column, step); + start = std::format("toUInt64(toDateTime64({},9,'UTC'))", start_str); + } + else + { + bin_str = std::format(" toInt64((toFloat64(toDateTime64({0}, 9, 'UTC')) + {1}) / {2}) * {2} AS {0}_ali ", + axis_column, era_diff, step); + diff = era_diff; + } + + auto sub_sub_query = std::format(" (Select {0}, {1}, {2} FROM {3} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", group_expression, subquery_columns, bin_str, table_name, axis_column); + + if (!end_str.empty()) + end = std::format("toUInt64(toDateTime64({}, 9, 'UTC'))", end_str); + + String range, condition; + if (!start_str.empty() && !end_str.empty()) + { + range = std::format("range({},{}, toUInt64({}))", start, end, step); + condition = std::format("{0}_ali >= {1} and {0}_ali <= {2}", axis_column, start, end); + } + else if (start_str.empty() && !end_str.empty()) + { + range = std::format("range(low, {} + {}, toUInt64({}))", end, era_diff, step); + condition = std::format("{0}_ali - {1} < {2}", axis_column, era_diff, end); + } + else if (!start_str.empty() && end_str.empty()) + { + range = std::format("range({}, high, toUInt64({}))", start, step); + condition = std::format("{}_ali >= {}", axis_column, start); + } + else + { + range = std::format("range(low, high, toUInt64({}))", step); + condition = "1"; //true + } + + auto range_len = std::format("length({})", range); + main_query = std::format("{} ", group_expression); + + auto axis_and_agg_alias_list = axis_column; + auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}",axis_column); //tupleElement(pp,2) as PriceAvg ,tupleElement(pp,1) + int idx = 2; + for (auto agg_column : aggregation_columns) + { + String agg_group_column = std::format("arrayConcat(groupArrayIf ({}_ali,{}) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", + agg_column.alias, condition, agg_column.default_value, range_len, range_len, agg_column.alias); + main_query +=", " + agg_group_column; + + axis_and_agg_alias_list +=", " + agg_column.alias; + final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); + } + auto axis_str = std::format("arrayDistinct(arrayConcat(groupArrayIf(toDateTime64({0}_ali - {1},9,'UTC'), {2}), arrayMap( x->(toDateTime64(x - {1} ,9,'UTC')), {3}) )) as {0}", + axis_column, diff, condition,range); + + main_query += ", " + axis_str; + auto sub_group_by = std::format("{}", group_expression); + + sub_query = std::format("( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} GROUP BY {} )", + axis_column, axis_column,step, axis_and_agg_alias_list, main_query, sub_sub_query, sub_group_by); + + main_query = std::format("{},{}", group_expression, final_axis_agg_alias_list); + + kql_make_series.sub_query = std::move(sub_query); + kql_make_series.main_query = std::move(main_query); +} + bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (op_pos.empty()) return true; auto begin = pos; - pos = op_pos.back(); - String axis_column; - String group_expression; - ParserKeyword s_on("on"); ParserKeyword s_by("by"); ParserToken equals(TokenType::Equals); ParserToken comma(TokenType::Comma); - AggregationColumns aggregation_columns; - FromToStepClause from_to_step; + ASTPtr sub_qurery_table; + + KQLMakeSeries kql_make_series; + auto & aggregation_columns = kql_make_series.aggregation_columns; + auto & from_to_step = kql_make_series.from_to_step; + auto & subquery_columns = kql_make_series.subquery_columns; + auto & axis_column = kql_make_series.axis_column; + auto & group_expression = kql_make_series.group_expression; ParserKQLDateTypeTimespan time_span; @@ -177,8 +371,6 @@ bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!parseFromToStepClause(from_to_step, pos)) return false; - // 'on' statement parameter, expecting scalar value of type 'int', 'long', 'real', 'datetime' or 'timespan'. - if (s_by.ignore(pos, expected)) { group_expression = getExprFromToken(pos); @@ -186,8 +378,6 @@ bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; } - String subquery_columns; - for (auto agg_column : aggregation_columns) { String column_str = std::format("{}({}) AS {}_ali", agg_column.aggregation_fun, agg_column.column, agg_column.alias); @@ -197,90 +387,26 @@ bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expec subquery_columns += ", "+ column_str; } - ASTPtr sub_qurery_table; - double step; - String sub_query ; - String main_query ; - String group_by; - - String start_str = getExprFromToken(from_to_step.from, pos.max_depth); - String end_str = getExprFromToken(from_to_step.to, pos.max_depth); - String step_str = from_to_step.step; - - if (time_span.parseConstKQLTimespan(step_str)) - { - step = time_span.toSeconds(); - - auto bin_str = std::format(" toUInt64(toFloat64(toDateTime64({},6,'UTC')) / {}) * {} AS {}_ali ", axis_column, step,step, axis_column); - auto sub_sub_query = std::format(" (Select {},{}, {} FROM {} GROUP BY {},{}_ali ORDER BY {}_ali) ", group_expression, subquery_columns, bin_str, table_name, group_expression, axis_column, axis_column); - - auto start = std::format("toUInt64(toDateTime64({},6,'UTC'))", start_str); - auto end = std::format("toUInt64(toDateTime64({},6,'UTC'))", end_str); - auto range = std::format("range({},{}, toUInt64({}))", start, end, step); - auto range_len = std::format("length({})", range); - main_query = std::format("{} ", group_expression); - - auto axis_and_agg_alias_list = axis_column; - auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}",axis_column); - int idx = 2; - for (auto agg_column : aggregation_columns) - { - String agg_group_column = std::format("arrayConcat(groupArrayIf ({}_ali,{}_ali >= {} and {}_ali <= {}) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", - agg_column.alias, axis_column, start, axis_column, end, agg_column.default_value, range_len, range_len, agg_column.alias); - main_query +=", " + agg_group_column; - - axis_and_agg_alias_list +=", " + agg_column.alias; - final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); - } - auto axis_str = std::format("arrayDistinct(arrayConcat(groupArrayIf(toDateTime64({}_ali,6,'UTC'),{}_ali >= {} and {}_ali <= {}), arrayMap( x->(toDateTime64(x,6,'UTC')), {}) )) as {}", - axis_column, axis_column, start, axis_column, end, range, axis_column); - - main_query += ", " + axis_str; - auto sub_group_by = std::format("{}", group_expression); - - sub_query = std::format("( SELECT min({}_ali) AS low,max({}_ali) AS high, arraySort(arrayZip({})) as zipped, {} FROM {} GROUP BY {} )", - axis_column, axis_column,axis_and_agg_alias_list,main_query,sub_sub_query, sub_group_by); - - main_query = std::format("{},{}", group_expression, final_axis_agg_alias_list); - - } + if (from_to_step.is_timespan) + makeTimeSeries(kql_make_series, pos.max_depth); else - { - step = stod(step_str); + makeNumericSeries(kql_make_series, pos.max_depth); - sub_query = std::format("kql( {} | summarize {}, {} = toint({} / {}) * {} by {},{} )", - table_name, subquery_columns, axis_column, axis_column, step, subquery_columns, axis_column); - } - - Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); + Tokens token_subquery(kql_make_series.sub_query.c_str(), kql_make_series.sub_query.c_str() + kql_make_series.sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); if (!ParserTablesInSelectQuery().parse(pos_subquery, sub_qurery_table, expected)) return false; tables = std::move(sub_qurery_table); - String converted_columns = main_query; - - Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); - IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + Tokens token_main_query(kql_make_series.main_query.c_str(), kql_make_series.main_query.c_str() + kql_make_series.main_query.size()); + IParser::Pos pos_main_query(token_main_query, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, node, expected)) + if (!ParserNotEmptyExpressionList(true).parse(pos_main_query, node, expected)) return false; - if (!group_by.empty()) - { - String converted_groupby = group_by; - - Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); - IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); - - if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) - return false; - } - pos = begin; return true; } - } diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.h b/src/Parsers/Kusto/ParserKQLMakeSeries.h index b30155b1bd81..a89ec97174e3 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.h +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.h @@ -26,11 +26,25 @@ class ParserKQLMakeSeries : public ParserKQLBase using AggregationColumns = std::vector; struct FromToStepClause { - String from; - String to; - String step; + String from_str; + String to_str; + String step_str; + bool is_timespan = false; + double step; }; + struct KQLMakeSeries { + AggregationColumns aggregation_columns; + FromToStepClause from_to_step; + String axis_column; + String group_expression; + String subquery_columns; + String sub_query; + String main_query; + }; + + void makeNumericSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth); + void makeTimeSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth); bool parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos); bool parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos); const char * getName() const override { return "KQL project"; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index bfa52368c168..3c8705bc9fde 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -12,6 +12,9 @@ #include #include #include +#include +#include + namespace DB { @@ -94,6 +97,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKQLSort kql_sort_p; ParserKQLSummarize kql_summarize_p; ParserKQLTable kql_table_p; + ParserKQLMakeSeries kql_make_series_p; ASTPtr select_expression_list; ASTPtr tables; @@ -111,7 +115,8 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { "sort",&kql_sort_p}, { "order",&kql_sort_p}, { "summarize",&kql_summarize_p}, - { "table",&kql_table_p} + { "table",&kql_table_p}, + { "make-series",&kql_make_series_p} }; std::vector> operation_pos; @@ -137,6 +142,20 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ++pos; String kql_operator(pos->begin,pos->end); + if (kql_operator == "make") + { + ++pos; + ParserKeyword s_series("series"); + ParserToken s_dash(TokenType::Minus); + if (s_dash.ignore(pos,expected)) + { + if (s_series.ignore(pos,expected)) + { + kql_operator = "make-series"; + --pos; + } + } + } if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; @@ -187,6 +206,19 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) where_expression = kql_summarize_p.where_expression; } + kql_make_series_p.setTableName(table_name); + if (!kql_make_series_p.parse(pos, select_expression_list, expected)) + return false; + else + { + if (kql_make_series_p.group_expression_list) + group_expression_list = kql_make_series_p.group_expression_list; + + if (kql_make_series_p.tables) + tables = kql_make_series_p.tables; + + } + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); From 0637312ac20d882df61a5192053c1d00c7812b62 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Fri, 12 Aug 2022 12:34:03 -0400 Subject: [PATCH 103/342] Added functional tests for IP, String, Binary and Datetime --- .../0_stateless/02366_kql_create_table.sql | 8 +-- .../0_stateless/02366_kql_func_ip.reference | 53 +++++++++++++++++-- .../queries/0_stateless/02366_kql_func_ip.sql | 48 ++++++++++++++++- .../02366_kql_func_string.reference | 25 +++++++++ .../0_stateless/02366_kql_func_string.sql | 26 ++++++++- 5 files changed, 146 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/02366_kql_create_table.sql b/tests/queries/0_stateless/02366_kql_create_table.sql index 67f099a2d709..8820d4c30f8d 100644 --- a/tests/queries/0_stateless/02366_kql_create_table.sql +++ b/tests/queries/0_stateless/02366_kql_create_table.sql @@ -8,13 +8,7 @@ CREATE TABLE Customers Age Nullable(UInt8) ) ENGINE = Memory; -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); -INSERT INTO Customers VALUES ('Stephanie','Cox','Management abcd defg','Bachelors',33); -INSERT INTO Customers VALUES ('Peter','Nara','Skilled Manual','Graduate Degree',26); -INSERT INTO Customers VALUES ('Latoya','Shen','Professional','Graduate Degree',25); -INSERT INTO Customers VALUES ('Apple','','Skilled Manual','Bachelors',28); -INSERT INTO Customers VALUES (NULL,'why','Professional','Partial College',38); - +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); Select '-- test create table --' ; Select * from kql(Customers|project FirstName) limit 1;; DROP TABLE IF EXISTS kql_table1; diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference index b6074a33b55b..732a5ad38b70 100644 --- a/tests/queries/0_stateless/02366_kql_func_ip.reference +++ b/tests/queries/0_stateless/02366_kql_func_ip.reference @@ -1,11 +1,11 @@ -- ipv4_is_private(\'127.0.0.1\') -false +0 -- ipv4_is_private(\'10.1.2.3\') -true +1 -- ipv4_is_private(\'192.168.1.1/24\') -true +1 ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\')) -true +1 -- ipv4_is_private(\'abc\') \N -- ipv4_netmask_suffix(\'192.168.1.1/24\') @@ -34,3 +34,48 @@ ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\')) fe80:0000:0000:0000:085d:e82c:9446:7994 -- parse_ipv4(\'127.0.0.1\') 2130706433 +-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\') +1 +-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432 +1 +-- parse_ipv4_mask(\'abc\', 31) +\N +\N +-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31) +1 +-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\') +1 +-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\') +0 +-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\') +1 +-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24) +1 +-- ipv4_is_match(\'abc\', \'def\', 24) +\N +-- ipv4_compare() +0 +-1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +-- format_ipv4() +192.168.1.0 +192.168.1.1 +192.168.1.0 +192.168.1.0 +1 +1 +-- format_ipv4_mask() +192.168.1.0/24 +192.168.1.0/24 +192.168.1.0/24 +192.168.1.1/32 +1 +1 diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql index a625c0bf4707..c70e01e2a5e7 100644 --- a/tests/queries/0_stateless/02366_kql_func_ip.sql +++ b/tests/queries/0_stateless/02366_kql_func_ip.sql @@ -38,9 +38,53 @@ print '-- parse_ipv6(fe80::85d:e82c:9446:7994)'; print parse_ipv6('fe80::85d:e82c:9446:7994'); print '-- parse_ipv4(\'127.0.0.1\')'; print parse_ipv4('127.0.0.1'); --- TODO: --- print parse_ipv4('192.1.168.1') < parse_ipv4('192.1.168.2'); -- == true +print '-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\')'; +print parse_ipv4('192.1.168.1') < parse_ipv4('192.1.168.2'); +print '-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432'; +print parse_ipv4_mask('127.0.0.1', 24) == 2130706432; +print '-- parse_ipv4_mask(\'abc\', 31)'; +print parse_ipv4_mask('abc', 31) +print '-- parse_ipv4_mask(\'192.1.168.2\', 1000)'; +print parse_ipv4_mask('192.1.168.2', 1000); +print '-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31)'; +print parse_ipv4_mask('192.1.168.2', 31) == parse_ipv4_mask('192.1.168.3', 31); +print '-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\')'; +print ipv4_is_match('127.0.0.1', '127.0.0.1'); +print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\')'; +print ipv4_is_match('192.168.1.1', '192.168.1.255'); +print '-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\')'; +print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24'); +print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24)'; +print ipv4_is_match('192.168.1.1', '192.168.1.255', 24); +print '-- ipv4_is_match(\'abc\', \'def\', 24)'; +print ipv4_is_match('abc', 'dev', 24); +print '-- ipv4_compare()'; +print ipv4_compare('127.0.0.1', '127.0.0.1'); +print ipv4_compare('192.168.1.1', '192.168.1.255'); +print ipv4_compare('192.168.1.255', '192.168.1.1'); +print ipv4_compare('192.168.1.1/24', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1', '192.168.1.255', 24); +print ipv4_compare('192.168.1.1/24', '192.168.1.255'); +print ipv4_compare('192.168.1.1', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1/30', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1', '192.168.1.0', 31); +print ipv4_compare('192.168.1.1/24', '192.168.1.255', 31); +print ipv4_compare('192.168.1.1', '192.168.1.255', 24); +print '-- format_ipv4()'; +print format_ipv4('192.168.1.255', 24); +print format_ipv4('192.168.1.1', 32); +print format_ipv4('192.168.1.1/24', 32); +print format_ipv4(3232236031, 24); +print format_ipv4('192.168.1.1/24', -1) == ''; +print format_ipv4('abc', 24) == ''; +print '-- format_ipv4_mask()'; +print format_ipv4_mask('192.168.1.255', 24); +print format_ipv4_mask(3232236031, 24); +print format_ipv4_mask('192.168.1.1', 24); +print format_ipv4_mask('192.168.1.1', 32); +print format_ipv4_mask('192.168.1.1/24', -1) == ''; +print format_ipv4_mask('abc', 24) == ''; diff --git a/tests/queries/0_stateless/02366_kql_func_string.reference b/tests/queries/0_stateless/02366_kql_func_string.reference index 255acb486cd9..78e130ad092d 100644 --- a/tests/queries/0_stateless/02366_kql_func_string.reference +++ b/tests/queries/0_stateless/02366_kql_func_string.reference @@ -260,3 +260,28 @@ PINEAPPLE 2 2 -1 +-- base64_encode_fromguid() +YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi +-- base64_decode_toarray() +[75,117,115,116,111] +-- base64_decode_toguid() +1 +-- parse_url() +{"Scheme":"scheme","Host":"","Port":"0","Path":"/this/is/a/path","Username":"username","Password":"password","Query Parameters":{"k1":"v1","k2":"v2"},"Fragment":"fragment"} +-- parse_urlquery() +{"Query Parameters":{"k1":"v1","k2":"v2","k3":"v3"}} +-- strcmp() +0 1 -1 1 +-- translate() +kusto xxx +-- trim() +https://www.ibm.com +-- trim_start() +www.ibm.com +Te st1// $ +-- trim_end() +https +-- replace_regex +Number was: 1 +-- has_any_index() +0 1 -1 -1 diff --git a/tests/queries/0_stateless/02366_kql_func_string.sql b/tests/queries/0_stateless/02366_kql_func_string.sql index cdf9b1e4b171..b49a80a363fb 100644 --- a/tests/queries/0_stateless/02366_kql_func_string.sql +++ b/tests/queries/0_stateless/02366_kql_func_string.sql @@ -182,4 +182,28 @@ print '-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/qu Customers | project indexof('abcdefg','cde') | take 1; Customers | project indexof('abcdefg','cde',2) | take 1; Customers | project indexof('abcdefg','cde',6) | take 1; - +print '-- base64_encode_fromguid()'; +print base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'); +print '-- base64_decode_toarray()'; +print base64_decode_toarray('S3VzdG8='); +print '-- base64_decode_toguid()'; +print base64_decode_toguid(base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')) == 'ae3133f2-6e22-49ae-b06a-16e6a9b212eb'; +print '-- parse_url()'; +print parse_url('scheme://username:password@host:1234/this/is/a/path?k1=v1&k2=v2#fragment'); +print '-- parse_urlquery()'; +print parse_urlquery('k1=v1&k2=v2&k3=v3'); +print '-- strcmp()'; +print strcmp('ABC','ABC'), strcmp('abc','ABC'), strcmp('ABC','abc'), strcmp('abcde','abc'); +print '-- translate()'; +print translate('krasp', 'otsku', 'spark'), translate('abc', '', 'ab'), translate('abc', 'x', 'abc'); +print '-- trim()'; +print trim("--", "--https://www.ibm.com--"); +print '-- trim_start()'; +print trim_start("https://", "https://www.ibm.com"); +print trim_start("[^\w]+", strcat("- ","Te st", "1", "// $")); +print '-- trim_end()'; +print trim_end("://www.ibm.com", "https://www.ibm.com"); +print '-- replace_regex'; +print replace_regex(strcat('Number is ', '1'), 'is (\d+)', 'was: \1'); +print '-- has_any_index()'; +print has_any_index('this is an example', dynamic(['this', 'example'])), has_any_index("this is an example", dynamic(['not', 'example'])), has_any_index("this is an example", dynamic(['not', 'found'])), has_any_index("this is an example", dynamic([])); From afb6064b14f446c7a1e736e782429091e5e533ee Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Fri, 12 Aug 2022 12:35:56 -0400 Subject: [PATCH 104/342] Add tests for binary and datetime --- .../02366_kql_func_binary.reference | 7 ++++ .../0_stateless/02366_kql_func_binary.sql | 8 ++++ .../02366_kql_func_datetime.reference | 28 +++++++++++++ .../0_stateless/02366_kql_func_datetime.sql | 41 +++++++++++++++++++ 4 files changed, 84 insertions(+) create mode 100644 tests/queries/0_stateless/02366_kql_func_binary.reference create mode 100644 tests/queries/0_stateless/02366_kql_func_binary.sql create mode 100644 tests/queries/0_stateless/02366_kql_func_datetime.reference create mode 100644 tests/queries/0_stateless/02366_kql_func_datetime.sql diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference new file mode 100644 index 000000000000..6276cd6d8675 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_binary.reference @@ -0,0 +1,7 @@ + -- binary functions +4 7 +1 +1 +1 +7 3 +1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql new file mode 100644 index 000000000000..824022b564ce --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_binary.sql @@ -0,0 +1,8 @@ +set dialect='kusto'; +print ' -- binary functions'; +print binary_and(4,7), binary_or(4,7); +print binary_shift_left(1, 1) == binary_shift_left(1, 65); +print binary_shift_right(2, 1) == binary_shift_right(2, 65); +print binary_shift_right(binary_shift_left(1, 65), 65) == 1; +print binary_xor(2, 5), bitset_count_ones(42); +print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.reference b/tests/queries/0_stateless/02366_kql_func_datetime.reference new file mode 100644 index 000000000000..e87db62ebdf8 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_datetime.reference @@ -0,0 +1,28 @@ +-- dayofmonth() +31 +-- dayofweek() +4 +-- dayofyear() +365 +-- getmonth() +10 +-- getyear() +2015 +-- hoursofday() +23 +-- startofday() +2017-01-02 00:00:00.000000000 +-- startofmonth() +2016-12-01 00:00:00.000000000 +2017-02-01 00:00:00.000000000 +-- startofweek() +2017-01-08 00:00:00.000000000 +-- startofyear() +2018-01-01 00:00:00.000000000 +-- unixtime_seconds_todatetime() +2019-01-01 00:00:00.000000000 +-- weekofyear() +52 +-- monthofyear() +-- now() +1 diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.sql b/tests/queries/0_stateless/02366_kql_func_datetime.sql new file mode 100644 index 000000000000..fb2e0f68be1b --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_datetime.sql @@ -0,0 +1,41 @@ +set dialect = 'kusto'; + + +print '-- dayofmonth()'; +print dayofmonth(datetime(2015-12-31)); +print '-- dayofweek()'; +print dayofweek(datetime(2015-12-31)); +print '-- dayofyear()'; +print dayofyear(datetime(2015-12-31)); +print '-- getmonth()'; +print getmonth(datetime(2015-10-12)); +print '-- getyear()'; +print getyear(datetime(2015-10-12)); +print '-- hoursofday()'; +print hourofday(datetime(2015-12-31 23:59:59.9)); +print '-- startofday()' +-- print startofday(datetime(2017-01-01 10:10:17)); +print startofday(datetime(2017-01-01 10:10:17), -1); +print startofday(datetime(2017-01-01 10:10:17), 1); +print '-- startofmonth()'; +-- print startofmonth(datetime(2017-01-01 10:10:17)); +print startofmonth(datetime(2017-01-01 10:10:17), -1); +print startofmonth(datetime(2017-01-01 10:10:17), 1); +print '-- startofweek()' +-- print startofweek(datetime(2017-01-01 10:10:17)); +print startofweek(datetime(2017-01-01 10:10:17), -1); +print startofweek(datetime(2017-01-01 10:10:17), 1); +print '-- startofyear()' +-- print startofyear(datetime(2017-01-01 10:10:17)); +print startofyear(datetime(2017-01-01 10:10:17), -1); +print startofyear(datetime(2017-01-01 10:10:17), 1); +print '-- unixtime_seconds_todatetime()'; +print unixtime_seconds_todatetime(1546300800); +print '-- weekofyear()'; +print week_of_year(datetime(2000-01-01)); +print '-- monthofyear()' +print monthofyear(datetime(2015-12-31)); +print '-- now()'; +print getyear(now(-2d))>1900; + + From 979ea4db188961adb504eaab683109d1bca887e2 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Fri, 12 Aug 2022 14:22:47 -0400 Subject: [PATCH 105/342] Added ipv6 tests --- .../queries/0_stateless/02366_kql_func_ip.sql | 40 +++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql index c70e01e2a5e7..638aaf3a2b0f 100644 --- a/tests/queries/0_stateless/02366_kql_func_ip.sql +++ b/tests/queries/0_stateless/02366_kql_func_ip.sql @@ -85,6 +85,40 @@ print format_ipv4_mask('192.168.1.1', 24); print format_ipv4_mask('192.168.1.1', 32); print format_ipv4_mask('192.168.1.1/24', -1) == ''; print format_ipv4_mask('abc', 24) == ''; - - - +print '-- parse_ipv6_mask()'; +print parse_ipv6_mask("127.0.0.1", 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'; +print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'; +-- print parse_ipv6_mask("192.168.255.255", 120) == '0000:0000:0000:0000:0000:ffff:c0a8:ff00'; +print parse_ipv6_mask("192.168.255.255/24", 124) == '0000:0000:0000:0000:0000:ffff:c0a8:ff00'; +print parse_ipv6_mask("255.255.255.255", 128) == '0000:0000:0000:0000:0000:ffff:ffff:ffff'; +print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 128) == 'fe80:0000:0000:0000:085d:e82c:9446:7994'; +print parse_ipv6_mask("fe80::85d:e82c:9446:7994/120", 124) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'; +-- print parse_ipv6_mask("::192.168.255.255", 128) == '0000:0000:0000:0000:0000:ffff:c0a8:ffff'; +-- print parse_ipv6_mask("::192.168.255.255/24", 128) == '0000:0000:0000:0000:0000:ffff:c0a8:ff00'; +print '-- ipv6_is_match()'; +print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false; +print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true; +print ipv6_is_match('192.168.1.1', '192.168.1.1'); -- // Equal IPs +print ipv6_is_match('192.168.1.1/24', '192.168.1.255'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7994'); -- // Equal IPs +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '::ffff:c0a8:0101'); -- // Equal IPs +print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.0', 31); -- // 31 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1/24', '192.168.1.255', 31); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.255', 24); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127); -- // 127 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7998', 120); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998', 127); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff', 127); -- // 127 bit IP6-prefix is used for comparison +print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255', 120); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24', 127); -- // 120 bit IP6-prefix is used for comparison \ No newline at end of file From 92b11aedaf2f63d6f65222441f5260e914baaf0f Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Fri, 12 Aug 2022 14:26:52 -0400 Subject: [PATCH 106/342] Added missing file for IPv6 tests --- .../0_stateless/02366_kql_func_ip.reference | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference index 732a5ad38b70..7c1d2907d5a8 100644 --- a/tests/queries/0_stateless/02366_kql_func_ip.reference +++ b/tests/queries/0_stateless/02366_kql_func_ip.reference @@ -79,3 +79,37 @@ fe80:0000:0000:0000:085d:e82c:9446:7994 192.168.1.1/32 1 1 +-- parse_ipv6_mask() +1 +1 +1 +1 +1 +1 +-- ipv6_is_match() +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 From 94fa77787f840fb8dfd29ace2567675fed1e1a13 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Mon, 15 Aug 2022 17:12:04 -0400 Subject: [PATCH 107/342] Added datatype tests --- .../0_stateless/02366_kql_datatype.reference | 68 ++++++++++++++ .../0_stateless/02366_kql_datatype.sql | 88 +++++++++++++++++++ .../02366_kql_func_string.reference | 2 + .../0_stateless/02366_kql_func_string.sql | 2 + 4 files changed, 160 insertions(+) create mode 100644 tests/queries/0_stateless/02366_kql_datatype.reference create mode 100644 tests/queries/0_stateless/02366_kql_datatype.sql diff --git a/tests/queries/0_stateless/02366_kql_datatype.reference b/tests/queries/0_stateless/02366_kql_datatype.reference new file mode 100644 index 000000000000..eb34b5761d85 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_datatype.reference @@ -0,0 +1,68 @@ +-- bool +true +\N +-- int +123 +\N +-- long +123 +255 +-1 +\N +456 +-- real +0.01 +\N +nan +inf +-inf +-- datetime +2015-12-31 23:59:59.900000000 +2015-12-31 00:00:00.000000000 +2014-05-25 08:20:03.123456000 +2014-11-08 15:55:55.000000000 +1970-01-01 00:00:00.000000000 +2014-11-08 00:00:00.000000000 +-- guid +172800 +5400 +1800 +10 +0.1 +0.1 +0.00001 +0 +3 +-- null +1 +\N [NULL] \N \N \N \N +-- dynamic +[1,2,3] +['a','b','c'] +-- cast functions +true +1 +-- tobool("false") +false +1 +-- tobool(1) +true +1 +-- tobool(123) +true +1 +-- tobool("abc") +\N +\N +-- todouble() +123.4 +\N +-- toreal() +123.4 +\N +-- toint() +1 +\N +-- tostring() +123 +1 diff --git a/tests/queries/0_stateless/02366_kql_datatype.sql b/tests/queries/0_stateless/02366_kql_datatype.sql new file mode 100644 index 000000000000..9b1d0346360f --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_datatype.sql @@ -0,0 +1,88 @@ +set dialect = 'kusto'; + +print '-- bool' +print bool(true); +print bool(true); +print bool(null); +print '-- int'; +print int(123); +print int(null); +print '-- long'; +print long(123); +print long(0xff); +print long(-1); +print long(null); +print 456; +print '-- real'; +print real(0.01); +print real(null); +print real(nan); +print real(+inf); +print real(-inf); +print '-- datetime'; +print datetime(2015-12-31 23:59:59.9); +print datetime(2015-12-31); +print datetime('2014-05-25T08:20:03.123456'); +print datetime('2014-11-08 15:55:55'); +print datetime('2014-11-08 15:55'); +print datetime('2014-11-08'); +-- print datetime(null); +-- print datetime('2014-05-25T08:20:03.123456Z'); +-- print datetime('2014-11-08 15:55:55.123456Z'); +print '-- guid' +print guid(74be27de-1e4e-49d9-b579-fe0b331d3642) +-- print guid(null) +print '-- timespan (time)'; +print timespan(2d); -- 2 days +print timespan(1.5h); -- 1.5 hour +print timespan(30m); -- 30 minutes +print timespan(10s); -- 10 seconds +print timespan(0.1s); -- 0.1 second +print timespan(100ms); -- 100 millisecond +print timespan(10microsecond); -- 10 microseconds +print timespan(1tick); +print timespan(1.5h) / timespan(30m); +print '-- null'; +print isnull(null); +print bool(null), dynamic(null), int(null), long(null), real(null), double(null); +print '-- dynamic'; -- only support 1D array at the moment +print dynamic([1,2,3]); +print dynamic(['a', 'b', 'c']); + +print '-- cast functions' +print '--tobool("true")'; -- == true +print tobool('true'); -- == true +print tobool('true') == toboolean('true'); -- == true +print '-- tobool("false")'; -- == false +print tobool('false'); -- == false +print tobool('false') == toboolean('false'); -- == false +print '-- tobool(1)'; -- == true +print tobool(1); -- == true +print tobool(1) == toboolean(1); -- == true +print '-- tobool(123)'; -- == true +print tobool(123); -- == true +print tobool(123) == toboolean(123); -- == true +print '-- tobool("abc")'; -- == null +print tobool('abc'); -- == null +print tobool('abc') == toboolean('abc'); -- == null +print '-- todouble()'; +print todouble('123.4'); +print todouble('abc') == null; +print '-- toreal()'; +print toreal("123.4"); +print toreal('abc') == null; +print '-- toint()'; +print toint("123") == int(123); +print toint('abc'); +print '-- tostring()'; +print tostring(123); +print tostring(null) == ''; + +-- TODO: +-- print '-- totimespan()'; +-- print totimespan('0.00:01:00'); +-- print totimespan('abc') == null; +-- print '-- todatetime()'; +-- print todatetime('2015-12-24'); +-- print todatetime('abc') == null; + diff --git a/tests/queries/0_stateless/02366_kql_func_string.reference b/tests/queries/0_stateless/02366_kql_func_string.reference index 78e130ad092d..25da15bc25db 100644 --- a/tests/queries/0_stateless/02366_kql_func_string.reference +++ b/tests/queries/0_stateless/02366_kql_func_string.reference @@ -276,11 +276,13 @@ YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi kusto xxx -- trim() https://www.ibm.com +Te st1 -- trim_start() www.ibm.com Te st1// $ -- trim_end() https +- Te st1 -- replace_regex Number was: 1 -- has_any_index() diff --git a/tests/queries/0_stateless/02366_kql_func_string.sql b/tests/queries/0_stateless/02366_kql_func_string.sql index b49a80a363fb..d367ec553c4a 100644 --- a/tests/queries/0_stateless/02366_kql_func_string.sql +++ b/tests/queries/0_stateless/02366_kql_func_string.sql @@ -198,11 +198,13 @@ print '-- translate()'; print translate('krasp', 'otsku', 'spark'), translate('abc', '', 'ab'), translate('abc', 'x', 'abc'); print '-- trim()'; print trim("--", "--https://www.ibm.com--"); +print trim("[^\w]+", strcat("- ","Te st", "1", "// $")); print '-- trim_start()'; print trim_start("https://", "https://www.ibm.com"); print trim_start("[^\w]+", strcat("- ","Te st", "1", "// $")); print '-- trim_end()'; print trim_end("://www.ibm.com", "https://www.ibm.com"); +print trim_end("[^\w]+", strcat("- ","Te st", "1", "// $")); print '-- replace_regex'; print replace_regex(strcat('Number is ', '1'), 'is (\d+)', 'was: \1'); print '-- has_any_index()'; From 63dbb3f7f0d15442c4faccc3910ba7af745a14af Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 17 Aug 2022 23:17:29 -0700 Subject: [PATCH 108/342] Kusto-phase2 : fixed the double quote issue --- .../KustoFunctions/IParserKQLFunction.cpp | 9 ++++-- .../KustoFunctions/KQLDataTypeFunctions.cpp | 28 ++++++++++--------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 2310879862ad..d81ba571aec0 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -102,7 +102,7 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: std::vector tokens; std::unique_ptr fun; - if (pos->type == TokenType::ClosingRoundBracket) + if (pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) return converted_arg; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -117,7 +117,7 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: { tokens.push_back(IParserKQLFunction::getExpression(pos)); } - else if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + else if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) { break; } @@ -133,7 +133,7 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: } } ++pos; - if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) break; } for (auto token : tokens) @@ -213,6 +213,9 @@ String IParserKQLFunction::getExpression(IParser::Pos & pos) arg = std::to_string(time_span.toSeconds()); } } + else if (pos->type == TokenType::QuotedIdentifier) + arg = "'" + String(pos->begin + 1,pos->end - 1) + "'"; + return arg; } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp index 0f60bf6d3266..4caf4188c8c7 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -52,24 +52,26 @@ bool DatatypeDatetime::convertImpl(String &out,IParser::Pos &pos) bool DatatypeDynamic::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin, pos->end); + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String array; - ++pos; //go pass "dynamic" string - while (pos->type != TokenType::ClosingRoundBracket) - { - if (pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket) + ++pos; + if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + while (pos->type != TokenType::ClosingRoundBracket) { - array += String(pos->begin, pos->end); + auto tmp_arg = getConvertedArgument(fn_name, pos); + array = array.empty() ? tmp_arg : array +", " + tmp_arg; + ++pos; } - ++pos; + out = "array (" + array + ")"; + return true; } - if (pos->type == TokenType::ClosingRoundBracket) - array += String(pos->begin, pos->end); else - return false; - - out = "array" + array; - return true; + return false; // should throw exception , later } bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) From 6754f6d67b1b654c2f81cd907b06f9fc4f214c92 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Wed, 17 Aug 2022 05:53:02 -0700 Subject: [PATCH 109/342] Implement array_iff / array_iif --- src/Parsers/Kusto/KQL_ReleaseNote.md | 10 +++++++++- .../KustoFunctions/KQLDynamicFunctions.cpp | 20 ++++++++++++++++--- .../KustoFunctions/KQLFunctionFactory.cpp | 1 + src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp | 17 ++++++++++++++++ 4 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index b948dbf2443c..1c68a6563f94 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,6 +1,14 @@ ## KQL implemented features +# August XX, 2022 +## Dynamic functions +- [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` + `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` + # August 15, 2022 **double quote support** ``print res = strcat("double ","quote")`` @@ -49,7 +57,7 @@ `print tostring(123) == '123'` `print tostring('asd') == 'asd'` -## DateType +## Data Types - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) *Supports only 1D array* `print output = dynamic(['a', 'b', 'c'])` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp index 3f534679c584..0dfa1907dc3a 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -15,6 +15,8 @@ #include #include +#include + namespace DB { @@ -25,11 +27,23 @@ bool ArrayConcat::convertImpl(String &out,IParser::Pos &pos) return false; } -bool ArrayIif::convertImpl(String &out,IParser::Pos &pos) +bool ArrayIif::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) return false; + + const auto conditions = getArgument(function_name, pos); + const auto if_true = getArgument(function_name, pos); + const auto if_false = getArgument(function_name, pos); + + out = std::format( + "arrayMap(x -> if(x.1 != 0, x.2, x.3), arrayZip({0}, arrayResize({1}, length({0}), null), arrayResize({2}, length({0}), null)))", + conditions, + if_true, + if_false); + + return true; } bool ArrayIndexOf::convertImpl(String &out,IParser::Pos &pos) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index 6cfd67514a24..d7619c8dc40f 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -93,6 +93,7 @@ namespace DB {"url_encode", KQLFunctionValue::url_encode}, {"array_concat", KQLFunctionValue::array_concat}, + {"array_iff", KQLFunctionValue::array_iif}, {"array_iif", KQLFunctionValue::array_iif}, {"array_index_of", KQLFunctionValue::array_index_of}, {"array_length", KQLFunctionValue::array_length}, diff --git a/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp new file mode 100644 index 000000000000..1e316593a5b9 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp @@ -0,0 +1,17 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Dynamic, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print array_iff(A, B, C)", + "SELECT arrayMap(x -> if((x.1) != 0, x.2, x.3), arrayZip(A, arrayResize(B, length(A), NULL), arrayResize(C, length(A), NULL)))" + }, + { + "print array_iif(A, B, C)", + "SELECT arrayMap(x -> if((x.1) != 0, x.2, x.3), arrayZip(A, arrayResize(B, length(A), NULL), arrayResize(C, length(A), NULL)))" + } +}))); From 59099b3d3ccbc6f2bf9457d93b64b6c28af3f7a0 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 19 Aug 2022 07:24:36 -0700 Subject: [PATCH 110/342] Set the release date --- src/Parsers/Kusto/KQL_ReleaseNote.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 1c68a6563f94..5905625bb82c 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,7 +1,7 @@ ## KQL implemented features -# August XX, 2022 +# August 29, 2022 ## Dynamic functions - [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` From ea136d13970a6eb31cf04eaea1023e113678e8cd Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Fri, 12 Aug 2022 10:24:52 -0700 Subject: [PATCH 111/342] Part2 DateTime functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 41 ++++ .../KustoFunctions/KQLDateTimeFunctions.cpp | 207 +++++++++++++++--- .../KustoFunctions/KQLDateTimeFunctions.h | 8 + .../KustoFunctions/KQLFunctionFactory.cpp | 5 + .../Kusto/KustoFunctions/KQLFunctionFactory.h | 1 + .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 73 ++++++ 6 files changed, 301 insertions(+), 34 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 5905625bb82c..178ba4fcc083 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -162,6 +162,15 @@ - [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) `print unixtime_seconds_todatetime(1546300800)` + +- [unixtime_microseconds_todatetime] + `print unixtime_microseconds_todatetime(1546300800000000)` + +- [unixtime_milliseconds_todatetime] + `print unixtime_milliseconds_todatetime(1546300800000)` + +- [unixtime_nanoseconds_todatetime] + `print unixtime_nanoseconds_todatetime(1546300800000000000)` - [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) `print dayofweek(datetime(2015-12-20))` @@ -176,6 +185,38 @@ `print now(-2d) ` `print now(time(1d))` +- [ago] + `print ago(2h)` + +- [endofday] + `print endofday(datetime(2017-01-01 10:10:17), -1)` + `print endofday(datetime(2017-01-01 10:10:17), 1)` + `print endofday(datetime(2017-01-01 10:10:17))` + +- [endofmonth] + `print endofmonth(datetime(2017-01-01 10:10:17), -1)` + `print endofmonth(datetime(2017-01-01 10:10:17), 1)` + `print endofmonth(datetime(2017-01-01 10:10:17))` + +- [endofweek] + `print endofweek(datetime(2017-01-01 10:10:17), 1)` + `print endofweek(datetime(2017-01-01 10:10:17), -1)` + `print endofweek(datetime(2017-01-01 10:10:17))` + +- [endofyear] + `print endofyear(datetime(2017-01-01 10:10:17), -1)` + `print endofyear(datetime(2017-01-01 10:10:17), 1)` + `print endofyear(datetime(2017-01-01 10:10:17))` + +- [make_datetime] + `print make_datetime(2017,10,01)` + `print make_datetime(2017,10,01,12,10)` + `print make_datetime(2017,10,01,12,11,0.1234567)` + +- [datetime_diff] + `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` + `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` + `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` ## Binary functions - [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 58d8536fb49b..094410cb023f 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -35,16 +35,24 @@ bool DateTime::convertImpl(String & out, IParser::Pos & pos) bool Ago::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + { + const auto offset = getConvertedArgument(fn_name, pos); + out = std::format("now64(9,'UTC') - {}", offset); + } + else + out = "now64(9,'UTC')"; + return true; } bool DatetimeAdd::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + return directMapping(out, pos, "date_add"); }; bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) @@ -56,9 +64,21 @@ bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + String arguments; + + arguments = arguments + getConvertedArgument(fn_name, pos) + ","; + ++pos; + arguments = arguments + getConvertedArgument(fn_name, pos) + ","; + ++pos; + arguments = arguments + getConvertedArgument(fn_name, pos); + + out = std::format("ABS(DateDiff({}))",arguments); + return true; + } bool DayOfMonth::convertImpl(String & out, IParser::Pos & pos) @@ -84,25 +104,93 @@ bool DayOfYear::convertImpl(String & out, IParser::Pos & pos) return directMapping(out, pos, "toDayOfYear"); } +bool EndOfMonth::convertImpl(String & out, IParser::Pos & pos) +{ + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset = "0"; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + } + + out = std::format("toDateTime(toStartOfDay({}),9,'UTC') + (INTERVAL {} +1 MONTH) - (INTERVAL 1 microsecond)", datetime_str, toString(offset)); + + return true; + +} + bool EndOfDay::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset = "0"; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + } + out = std::format("toDateTime(toStartOfDay({}),9,'UTC') + (INTERVAL {} +1 DAY) - (INTERVAL 1 microsecond)", datetime_str, toString(offset)); + + return true; + } bool EndOfWeek::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset = "0"; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + } + out = std::format("toDateTime(toStartOfDay({}),9,'UTC') + (INTERVAL {} +1 WEEK) - (INTERVAL 1 microsecond)", datetime_str, toString(offset)); + + return true; + } bool EndOfYear::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset = "0"; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + } + out = std::format("toDateTime(toStartOfDay({}),9,'UTC') + (INTERVAL {} +1 YEAR) - (INTERVAL 1 microsecond)", datetime_str, toString(offset)); + + return true; + } bool FormatDateTime::convertImpl(String & out, IParser::Pos & pos) @@ -143,9 +231,44 @@ bool MakeTimeSpan::convertImpl(String & out, IParser::Pos & pos) bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String arguments; + int number_of_arguments=1; + String argument; + while (pos->type != TokenType::ClosingRoundBracket) + { + argument = String(pos->begin,pos->end); + auto dot_pos = argument.find('.'); + + if (dot_pos == String::npos) + arguments = arguments + String(pos->begin,pos->end); + else + { + arguments = arguments + argument.substr(dot_pos-1, dot_pos) + "," + argument.substr(dot_pos+1,argument.length()); + number_of_arguments++; + } + + ++pos; + if(pos->type == TokenType::Comma) + number_of_arguments++; + } + + while(number_of_arguments < 7) + { + arguments = arguments+ ","; + arguments = arguments+ "0"; + number_of_arguments++; + } + arguments = arguments + ",7,'UTC'"; + + out = std::format("makeDateTime64({})",arguments); + + return true; } bool Now::convertImpl(String & out, IParser::Pos & pos) @@ -174,7 +297,7 @@ bool StartOfDay::convertImpl(String & out, IParser::Pos & pos) ++pos; const String datetime_str = getConvertedArgument(fn_name, pos); - String offset; + String offset = "0"; if (pos->type == TokenType::Comma) { @@ -194,7 +317,7 @@ bool StartOfMonth::convertImpl(String & out, IParser::Pos & pos) ++pos; const String datetime_str = getConvertedArgument(fn_name, pos); - String offset; + String offset = "0"; if (pos->type == TokenType::Comma) { @@ -214,7 +337,7 @@ bool StartOfWeek::convertImpl(String & out, IParser::Pos & pos) ++pos; const String datetime_str = getConvertedArgument(fn_name, pos); - String offset; + String offset = "0"; if (pos->type == TokenType::Comma) { @@ -234,7 +357,7 @@ bool StartOfYear::convertImpl(String & out, IParser::Pos & pos) ++pos; const String datetime_str = getConvertedArgument(fn_name, pos); - String offset ; + String offset = "0"; if (pos->type == TokenType::Comma) { @@ -247,23 +370,39 @@ bool StartOfYear::convertImpl(String & out, IParser::Pos & pos) bool UnixTimeMicrosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String value = getConvertedArgument(fn_name, pos); + out = std::format("fromUnixTimestamp64Micro({},'UTC')", value); + return true; } bool UnixTimeMillisecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String value = getConvertedArgument(fn_name, pos); + out = std::format("fromUnixTimestamp64Milli({},'UTC')", value); + return true; + } bool UnixTimeNanosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String value = getConvertedArgument(fn_name, pos); + out = std::format("fromUnixTimestamp64Nano({},'UTC')", value); + return true; } bool UnixTimeSecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h index adf95a39a64a..bde104e88b96 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -19,6 +19,7 @@ class DateTime : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; };*/ + class Ago : public IParserKQLFunction { protected: @@ -75,6 +76,13 @@ class EndOfDay : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; +class EndOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + class EndOfWeek : public IParserKQLFunction { protected: diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index d7619c8dc40f..a25ca15b9244 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -31,6 +31,8 @@ namespace DB {"endofday", KQLFunctionValue::endofday}, {"endofweek", KQLFunctionValue::endofweek}, {"endofyear", KQLFunctionValue::endofyear}, + {"endofmonth", KQLFunctionValue::endofmonth}, + {"format_datetime", KQLFunctionValue::format_datetime}, {"format_timespan", KQLFunctionValue::format_timespan}, {"getmonth", KQLFunctionValue::getmonth}, @@ -273,6 +275,9 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::endofyear: return std::make_unique(); + case KQLFunctionValue::endofmonth: + return std::make_unique(); + case KQLFunctionValue::monthofyear: return std::make_unique(); diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index 38bac6d641a0..1938d449fd3a 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -19,6 +19,7 @@ namespace DB endofday, endofweek, endofyear, + endofmonth, monthofyear, format_datetime, format_timespan, diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index 74d13c60d050..87af9836c6cd 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -86,6 +86,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, "SELECT toDateTime64(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalDay(-1)" }, + { + "print startofyear(datetime(2017-01-01 10:10:17), -1)", + "SELECT toDateTime64(toStartOfYear(toDateTime64('2017-01-01 10:10:17', 9, 'UTC'), 'UTC'), 9, 'UTC') + toIntervalYear(-1)" + }, { "print monthofyear(datetime(2015-12-14))", "SELECT toMonth(toDateTime64('2015-12-14', 9, 'UTC'))" @@ -125,6 +129,75 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, { "print now(1d)", "SELECT now64(9, 'UTC') + 86400." + }, + { + "print ago(2d)", + "SELECT now64(9, 'UTC') - 172800." + }, + { + "print endofday(datetime(2017-01-01 10:10:17), -1)", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalDay(-1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print endofday(datetime(2017-01-01 10:10:17), 1)", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalDay(1 + 1)) - toIntervalMicrosecond(1)" + + }, + { + "print endofmonth(datetime(2017-01-01 10:10:17), -1)", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalMonth(-1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print endofmonth(datetime(2017-01-01 10:10:17), 1)", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalMonth(1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print endofweek(datetime(2017-01-01 10:10:17), -1)", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalWeek(-1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print endofweek(datetime(2017-01-01 10:10:17), 1)", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalWeek(1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print endofyear(datetime(2017-01-01 10:10:17), -1) ", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalYear(-1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print endofyear(datetime(2017-01-01 10:10:17), 1)" , + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalYear(1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print make_datetime(2017,10,01)", + "SELECT makeDateTime64(2017, 10, 1, 0, 0, 0, 0, 7, 'UTC')" + }, + { + "print make_datetime(2017,10,01,12,10)", + "SELECT makeDateTime64(2017, 10, 1, 12, 10, 0, 0, 7, 'UTC')" + }, + { + "print make_datetime(2017,10,01,12,11,0.1234567)", + "SELECT makeDateTime64(2017, 10, 1, 12, 11, 0, 1234567, 7, 'UTC')" + }, + { + "print unixtime_microseconds_todatetime(1546300800000000)", + "SELECT fromUnixTimestamp64Micro(1546300800000000, 'UTC')" + }, + { + "print unixtime_milliseconds_todatetime(1546300800000)", + "SELECT fromUnixTimestamp64Milli(1546300800000, 'UTC')" + }, + { + "print unixtime_nanoseconds_todatetime(1546300800000000000)", + "SELECT fromUnixTimestamp64Nano(1546300800000000000, 'UTC')" + }, + { + "print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))", + "SELECT ABS(dateDiff('year', toDateTime64('2017-01-01', 9, 'UTC'), toDateTime64('2000-12-31', 9, 'UTC')))" + }, + { + "print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))", + "SELECT ABS(dateDiff('minute', toDateTime64('2017-10-30 23:05:01', 9, 'UTC'), toDateTime64('2017-10-30 23:00:59', 9, 'UTC')))" } }))); From 785b01c0991094b59222d3a6b31c34f6d11223f3 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Fri, 12 Aug 2022 12:58:25 -0700 Subject: [PATCH 112/342] Updated release notes --- src/Parsers/Kusto/KQL_ReleaseNote.md | 20 +++++++++---------- .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 178ba4fcc083..b8e8b75b9973 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -163,13 +163,13 @@ - [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) `print unixtime_seconds_todatetime(1546300800)` -- [unixtime_microseconds_todatetime] +- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) `print unixtime_microseconds_todatetime(1546300800000000)` -- [unixtime_milliseconds_todatetime] +- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) `print unixtime_milliseconds_todatetime(1546300800000)` -- [unixtime_nanoseconds_todatetime] +- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) `print unixtime_nanoseconds_todatetime(1546300800000000000)` - [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) @@ -185,35 +185,35 @@ `print now(-2d) ` `print now(time(1d))` -- [ago] +- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) `print ago(2h)` -- [endofday] +- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) `print endofday(datetime(2017-01-01 10:10:17), -1)` `print endofday(datetime(2017-01-01 10:10:17), 1)` `print endofday(datetime(2017-01-01 10:10:17))` -- [endofmonth] +- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) `print endofmonth(datetime(2017-01-01 10:10:17), -1)` `print endofmonth(datetime(2017-01-01 10:10:17), 1)` `print endofmonth(datetime(2017-01-01 10:10:17))` -- [endofweek] +- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) `print endofweek(datetime(2017-01-01 10:10:17), 1)` `print endofweek(datetime(2017-01-01 10:10:17), -1)` `print endofweek(datetime(2017-01-01 10:10:17))` -- [endofyear] +- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) `print endofyear(datetime(2017-01-01 10:10:17), -1)` `print endofyear(datetime(2017-01-01 10:10:17), 1)` `print endofyear(datetime(2017-01-01 10:10:17))` -- [make_datetime] +- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) `print make_datetime(2017,10,01)` `print make_datetime(2017,10,01,12,10)` `print make_datetime(2017,10,01,12,11,0.1234567)` -- [datetime_diff] +- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index 87af9836c6cd..1efbc97d2bb9 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -127,6 +127,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, "SELECT now64(9, 'UTC')" }, { + "print now(1d)", "SELECT now64(9, 'UTC') + 86400." }, From babd0126ff46a51debcd6505f88c9e0596595136 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Mon, 15 Aug 2022 20:45:39 -0700 Subject: [PATCH 113/342] Incorporated review comments --- .../KustoFunctions/KQLDateTimeFunctions.cpp | 34 ++++++------------- .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 6 ++-- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 094410cb023f..6aa877c21121 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -76,7 +76,7 @@ bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) ++pos; arguments = arguments + getConvertedArgument(fn_name, pos); - out = std::format("ABS(DateDiff({}))",arguments); + out = std::format("DateDiff({}) * -1",arguments); return true; } @@ -238,33 +238,19 @@ bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) ++pos; String arguments; - int number_of_arguments=1; - String argument; + String argument[7] = {"0","0","0","0","0","0","0"}; + + int i = 0; while (pos->type != TokenType::ClosingRoundBracket) { - argument = String(pos->begin,pos->end); - auto dot_pos = argument.find('.'); - - if (dot_pos == String::npos) - arguments = arguments + String(pos->begin,pos->end); - else - { - arguments = arguments + argument.substr(dot_pos-1, dot_pos) + "," + argument.substr(dot_pos+1,argument.length()); - number_of_arguments++; - } - - ++pos; + argument[i] = getConvertedArgument(fn_name, pos); if(pos->type == TokenType::Comma) - number_of_arguments++; - } - - while(number_of_arguments < 7) - { - arguments = arguments+ ","; - arguments = arguments+ "0"; - number_of_arguments++; + ++pos; + + i++; } - arguments = arguments + ",7,'UTC'"; + + arguments = argument[0] + "," + argument[1] + "," + argument[2] + "," + argument[3] + "," + argument[4] + "," + argument[5] + "," + argument[6] + ",7,'UTC'"; out = std::format("makeDateTime64({})",arguments); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index 1efbc97d2bb9..55d88e2aa780 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -178,7 +178,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, }, { "print make_datetime(2017,10,01,12,11,0.1234567)", - "SELECT makeDateTime64(2017, 10, 1, 12, 11, 0, 1234567, 7, 'UTC')" + "SELECT makeDateTime64(2017, 10, 1, 12, 11, 0.1234567, 0, 7, 'UTC')" }, { "print unixtime_microseconds_todatetime(1546300800000000)", @@ -194,11 +194,11 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, }, { "print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))", - "SELECT ABS(dateDiff('year', toDateTime64('2017-01-01', 9, 'UTC'), toDateTime64('2000-12-31', 9, 'UTC')))" + "SELECT dateDiff('year', toDateTime64('2017-01-01', 9, 'UTC'), toDateTime64('2000-12-31', 9, 'UTC')) * -1" }, { "print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))", - "SELECT ABS(dateDiff('minute', toDateTime64('2017-10-30 23:05:01', 9, 'UTC'), toDateTime64('2017-10-30 23:00:59', 9, 'UTC')))" + "SELECT dateDiff('minute', toDateTime64('2017-10-30 23:05:01', 9, 'UTC'), toDateTime64('2017-10-30 23:00:59', 9, 'UTC')) * -1" } }))); From e7218ff10eb5f986205c65d57fa5a5570ef7567a Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Fri, 12 Aug 2022 10:24:52 -0700 Subject: [PATCH 114/342] Part2 DateTime functions --- .../KustoFunctions/KQLDateTimeFunctions.cpp | 33 ++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 6aa877c21121..fe6b15299167 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -77,6 +77,7 @@ bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) arguments = arguments + getConvertedArgument(fn_name, pos); out = std::format("DateDiff({}) * -1",arguments); + return true; } @@ -238,19 +239,35 @@ bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) ++pos; String arguments; - String argument[7] = {"0","0","0","0","0","0","0"}; - - int i = 0; + + int number_of_arguments=1; + String argument; while (pos->type != TokenType::ClosingRoundBracket) { - argument[i] = getConvertedArgument(fn_name, pos); + argument = String(pos->begin,pos->end); + auto dot_pos = argument.find('.'); + + if (dot_pos == String::npos) + arguments = arguments + String(pos->begin,pos->end); + else + { + arguments = arguments + argument.substr(dot_pos-1, dot_pos) + "," + argument.substr(dot_pos+1,argument.length()); + number_of_arguments++; + } + + ++pos; if(pos->type == TokenType::Comma) - ++pos; - - i++; + number_of_arguments++; + } + + while(number_of_arguments < 7) + { + arguments = arguments+ ","; + arguments = arguments+ "0"; + number_of_arguments++; } + arguments = arguments + ",7,'UTC'"; - arguments = argument[0] + "," + argument[1] + "," + argument[2] + "," + argument[3] + "," + argument[4] + "," + argument[5] + "," + argument[6] + ",7,'UTC'"; out = std::format("makeDateTime64({})",arguments); From 6dc02810dccae85557541bd47dd5f36f527ae2b0 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Mon, 15 Aug 2022 20:45:39 -0700 Subject: [PATCH 115/342] Incorporated review comments --- .../KustoFunctions/KQLDateTimeFunctions.cpp | 32 ++++++------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index fe6b15299167..e994028d6a46 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -239,35 +239,21 @@ bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) ++pos; String arguments; - int number_of_arguments=1; - String argument; + + String argument[7] = {"0","0","0","0","0","0","0"}; + + int i = 0; while (pos->type != TokenType::ClosingRoundBracket) { - argument = String(pos->begin,pos->end); - auto dot_pos = argument.find('.'); - - if (dot_pos == String::npos) - arguments = arguments + String(pos->begin,pos->end); - else - { - arguments = arguments + argument.substr(dot_pos-1, dot_pos) + "," + argument.substr(dot_pos+1,argument.length()); - number_of_arguments++; - } - - ++pos; + argument[i] = getConvertedArgument(fn_name, pos); if(pos->type == TokenType::Comma) - number_of_arguments++; - } - - while(number_of_arguments < 7) - { - arguments = arguments+ ","; - arguments = arguments+ "0"; - number_of_arguments++; + ++pos; + + i++; } - arguments = arguments + ",7,'UTC'"; + arguments = argument[0] + "," + argument[1] + "," + argument[2] + "," + argument[3] + "," + argument[4] + "," + argument[5] + "," + argument[6] + ",7,'UTC'"; out = std::format("makeDateTime64({})",arguments); From bc3b541549a56a1a7ef780083d02b23cbca69743 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Tue, 16 Aug 2022 13:01:01 -0700 Subject: [PATCH 116/342] Updated as per review comments --- .../KustoFunctions/KQLDateTimeFunctions.cpp | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index e994028d6a46..96c5bca06de6 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -16,6 +16,10 @@ #include #include +namespace DB::ErrorCodes +{ +extern const int SYNTAX_ERROR; +} namespace DB { @@ -239,24 +243,29 @@ bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) ++pos; String arguments; - int number_of_arguments=1; - - String argument[7] = {"0","0","0","0","0","0","0"}; + int arg_count = 0; - int i = 0; - while (pos->type != TokenType::ClosingRoundBracket) + while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket) { - argument[i] = getConvertedArgument(fn_name, pos); + String arg = getConvertedArgument(fn_name, pos); if(pos->type == TokenType::Comma) - ++pos; - - i++; + ++pos; + arguments = arguments + arg + ","; + ++arg_count; } + + if (arg_count < 1 || arg_count > 7) + throw Exception("argument count out of bound in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); + + if(arg_count < 7) + { + for(int i = arg_count;i < 7 ; ++i) + arguments = arguments + "0 ,"; + } - arguments = argument[0] + "," + argument[1] + "," + argument[2] + "," + argument[3] + "," + argument[4] + "," + argument[5] + "," + argument[6] + ",7,'UTC'"; - + arguments = arguments + "7,'UTC'"; out = std::format("makeDateTime64({})",arguments); - + return true; } From 13ad4e97f20a695d81b114948402bbe63110415e Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Fri, 19 Aug 2022 07:35:22 -0700 Subject: [PATCH 117/342] Updated Readme --- src/Parsers/Kusto/KQL_ReleaseNote.md | 86 +++++++++++++++------------- 1 file changed, 45 insertions(+), 41 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index b8e8b75b9973..299e003f37f0 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -9,6 +9,51 @@ `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` + +## DateTimeFunctions + +- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) + `print ago(2h)` + +- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) + `print endofday(datetime(2017-01-01 10:10:17), -1)` + `print endofday(datetime(2017-01-01 10:10:17), 1)` + `print endofday(datetime(2017-01-01 10:10:17))` + +- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) + `print endofmonth(datetime(2017-01-01 10:10:17), -1)` + `print endofmonth(datetime(2017-01-01 10:10:17), 1)` + `print endofmonth(datetime(2017-01-01 10:10:17))` + +- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) + `print endofweek(datetime(2017-01-01 10:10:17), 1)` + `print endofweek(datetime(2017-01-01 10:10:17), -1)` + `print endofweek(datetime(2017-01-01 10:10:17))` + +- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) + `print endofyear(datetime(2017-01-01 10:10:17), -1)` + `print endofyear(datetime(2017-01-01 10:10:17), 1)` + `print endofyear(datetime(2017-01-01 10:10:17))` + +- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) + `print make_datetime(2017,10,01)` + `print make_datetime(2017,10,01,12,10)` + `print make_datetime(2017,10,01,12,11,0.1234567)` + +- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) + `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` + `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` + `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` + +- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) + `print unixtime_microseconds_todatetime(1546300800000000)` + +- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) + `print unixtime_milliseconds_todatetime(1546300800000)` + +- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) + `print unixtime_nanoseconds_todatetime(1546300800000000000)` + # August 15, 2022 **double quote support** ``print res = strcat("double ","quote")`` @@ -162,15 +207,6 @@ - [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) `print unixtime_seconds_todatetime(1546300800)` - -- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) - `print unixtime_microseconds_todatetime(1546300800000000)` - -- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) - `print unixtime_milliseconds_todatetime(1546300800000)` - -- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) - `print unixtime_nanoseconds_todatetime(1546300800000000000)` - [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) `print dayofweek(datetime(2015-12-20))` @@ -185,38 +221,6 @@ `print now(-2d) ` `print now(time(1d))` -- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) - `print ago(2h)` - -- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) - `print endofday(datetime(2017-01-01 10:10:17), -1)` - `print endofday(datetime(2017-01-01 10:10:17), 1)` - `print endofday(datetime(2017-01-01 10:10:17))` - -- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) - `print endofmonth(datetime(2017-01-01 10:10:17), -1)` - `print endofmonth(datetime(2017-01-01 10:10:17), 1)` - `print endofmonth(datetime(2017-01-01 10:10:17))` - -- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) - `print endofweek(datetime(2017-01-01 10:10:17), 1)` - `print endofweek(datetime(2017-01-01 10:10:17), -1)` - `print endofweek(datetime(2017-01-01 10:10:17))` - -- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) - `print endofyear(datetime(2017-01-01 10:10:17), -1)` - `print endofyear(datetime(2017-01-01 10:10:17), 1)` - `print endofyear(datetime(2017-01-01 10:10:17))` - -- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) - `print make_datetime(2017,10,01)` - `print make_datetime(2017,10,01,12,10)` - `print make_datetime(2017,10,01,12,11,0.1234567)` - -- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) - `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` - `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` - `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` ## Binary functions - [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) From 71ddae36ca4af7163769ae8ac6082329d54cf8a6 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 21 Aug 2022 22:31:03 -0700 Subject: [PATCH 118/342] Kusto-phase2, updated make-series operator --- src/Parsers/Kusto/ParserKQLMakeSeries.cpp | 194 +++++++++------------- src/Parsers/Kusto/ParserKQLMakeSeries.h | 3 +- 2 files changed, 76 insertions(+), 121 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp index 03528b6af1ea..65eeafe737b7 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -13,7 +13,7 @@ namespace DB bool ParserKQLMakeSeries :: parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos) { - std::unordered_set allowed_aggregation + std::unordered_set allowed_aggregation ({ "avg", "avgif", @@ -54,6 +54,7 @@ bool ParserKQLMakeSeries :: parseAggregationColumns(AggregationColumns & aggrega { alias = std::move(first_token); aggregation_fun = String(pos->begin,pos->end); + ++pos; } else aggregation_fun = std::move(first_token); @@ -61,7 +62,6 @@ bool ParserKQLMakeSeries :: parseAggregationColumns(AggregationColumns & aggrega if (allowed_aggregation.find(aggregation_fun) == allowed_aggregation.end()) return false; - ++pos; if (open_bracket.ignore(pos, expected)) column = String(pos->begin,pos->end); else @@ -148,9 +148,10 @@ bool ParserKQLMakeSeries :: parseFromToStepClause(FromToStepClause & from_to_ste return true; } - -void ParserKQLMakeSeries :: makeNumericSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth) +void ParserKQLMakeSeries :: makeSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth) { + const uint64_t era_diff = 62135596800; // this magic number is the differicen is second form 0001-01-01 (Azure start time ) and 1970-01-01 (CH start time) + String start_str, end_str; String sub_query, main_query; @@ -169,164 +170,123 @@ void ParserKQLMakeSeries :: makeNumericSeries(KQLMakeSeries & kql_make_series, c String bin_str, start, end; - if (!start_str.empty()) // has from - { - bin_str = std::format(" toFloat64({0}) + (toInt64(((toFloat64({1}) - toFloat64({0})) / {2}) ) * {2}) AS {1}_ali ", - start_str, axis_column, step); - start = std::format("toUInt64({})", start_str); - } - else - { - bin_str = std::format(" toFloat64(toInt64((toFloat64({0}) ) / {1}) * {1}) AS {0}_ali ", - axis_column, step); - } - - auto sub_sub_query = std::format(" (Select {0}, {1}, {2} FROM {3} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", group_expression, subquery_columns, bin_str, table_name, axis_column); - - if (!end_str.empty()) - end = std::format("toUInt64({})", end_str); + uint64_t diff = 0; + String axis_column_format; + String axis_str; - String range, condition; - if (!start_str.empty() && !end_str.empty()) - { - range = std::format("range({},{}, toUInt64({}))", start, end, step); - condition = std::format("{0}_ali >= {1} and {0}_ali <= {2}", axis_column, start, end); - } - else if (start_str.empty() && !end_str.empty()) + auto get_group_expression_alias = [&] { - range = std::format("range(low, {} , toUInt64({}))", end, step); - condition = std::format("{}_ali <= {}", axis_column, end); - } - else if (!start_str.empty() && end_str.empty()) - { - range = std::format("range({}, high, toUInt64({}))", start, step); - condition = std::format("{}_ali >= {}", axis_column, start); - } - else - { - range = std::format("range(low, high, toUInt64({}))", step); - condition = "1"; //true - } + std::vector group_expression_tokens; + Tokens tokens(group_expression.c_str(), group_expression.c_str() + group_expression.size()); + IParser::Pos pos(tokens, max_depth); + while (!pos->isEnd()) + { + if (String(pos->begin, pos->end) == "AS") + { + if (!group_expression_tokens.empty()) + group_expression_tokens.pop_back(); + ++pos; + group_expression_tokens.push_back(String(pos->begin, pos->end)); + } + else + group_expression_tokens.push_back(String(pos->begin, pos->end)); + ++pos; + } + String res; + for (auto token : group_expression_tokens) + res = res + token + " "; + return res; + }; - auto range_len = std::format("length({})", range); - main_query = std::format("{} ", group_expression); + auto group_expression_alias = get_group_expression_alias(); - auto axis_and_agg_alias_list = axis_column; - auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}",axis_column); //tupleElement(pp,2) as PriceAvg ,tupleElement(pp,1) - int idx = 2; - for (auto agg_column : aggregation_columns) + if (from_to_step.is_timespan) { - String agg_group_column = std::format("arrayConcat(groupArrayIf ({}_ali,{}) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", - agg_column.alias, condition, agg_column.default_value, range_len, range_len, agg_column.alias); - main_query +=", " + agg_group_column; - - axis_and_agg_alias_list +=", " + agg_column.alias; - final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); + axis_column_format = std::format("toFloat64(toDateTime64({}, 9, 'UTC'))", axis_column); } - - auto axis_str = std::format("arrayDistinct(arrayConcat(groupArrayIf({0}_ali, {1}), arrayMap( x->(toFloat64(x)), {2})) ) as {0}", - axis_column, condition,range); - - main_query += ", " + axis_str; - auto sub_group_by = std::format("{}", group_expression); - - sub_query = std::format("( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} GROUP BY {} )", - axis_column, axis_column,step, axis_and_agg_alias_list,main_query,sub_sub_query, sub_group_by); - - main_query = std::format("{},{}", group_expression, final_axis_agg_alias_list); - - kql_make_series.sub_query = std::move(sub_query); - kql_make_series.main_query = std::move(main_query); -} - -void ParserKQLMakeSeries :: makeTimeSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth) -{ - const uint64_t era_diff = 62135596800; // this magic number is the differicen is second form 0001-01-01 (Azure start time ) and 1970-01-01 (CH start time) - - String start_str, end_str; - String sub_query, main_query; - - auto & aggregation_columns = kql_make_series.aggregation_columns; - auto & from_to_step = kql_make_series.from_to_step; - auto & subquery_columns = kql_make_series.subquery_columns; - auto & axis_column = kql_make_series.axis_column; - auto & group_expression = kql_make_series.group_expression; - auto step = from_to_step.step; - - if (!kql_make_series.from_to_step.from_str.empty()) - start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth); - - if (!kql_make_series.from_to_step.to_str.empty()) - end_str = getExprFromToken(from_to_step.to_str, max_depth); - - String bin_str, start, end; + else + axis_column_format = std::format("toFloat64({})", axis_column); - uint64_t diff = 0; if (!start_str.empty()) // has from { - bin_str = std::format(" toFloat64(toDateTime64({0}, 9, 'UTC')) + (toInt64(((toFloat64(toDateTime64({1}, 9, 'UTC')) - toFloat64(toDateTime64({0}, 9, 'UTC'))) / {2}) ) * {2}) AS {1}_ali ", - start_str, axis_column, step); - start = std::format("toUInt64(toDateTime64({},9,'UTC'))", start_str); + bin_str = std::format(" toFloat64({0}) + (toInt64((({1} - toFloat64({0})) / {2}) ) * {2}) AS {3}_ali ", + start_str, axis_column_format, step, axis_column); + start = std::format("toUInt64({})", start_str); } else { - bin_str = std::format(" toInt64((toFloat64(toDateTime64({0}, 9, 'UTC')) + {1}) / {2}) * {2} AS {0}_ali ", - axis_column, era_diff, step); - diff = era_diff; + if (from_to_step.is_timespan) + diff = era_diff; + bin_str = std::format(" toFloat64(toInt64(({0} + {1}) / {2}) * {2}) AS {3}_ali ", axis_column_format, diff, step, axis_column); } - auto sub_sub_query = std::format(" (Select {0}, {1}, {2} FROM {3} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", group_expression, subquery_columns, bin_str, table_name, axis_column); - if (!end_str.empty()) - end = std::format("toUInt64(toDateTime64({}, 9, 'UTC'))", end_str); + end = std::format("toUInt64({})", end_str); String range, condition; + if (!start_str.empty() && !end_str.empty()) { - range = std::format("range({},{}, toUInt64({}))", start, end, step); - condition = std::format("{0}_ali >= {1} and {0}_ali <= {2}", axis_column, start, end); + range = std::format("range({}, {}, toUInt64({}))", start, end, step); + condition = std::format("where toInt64({0}) >= {1} and toInt64({0}) < {2}", axis_column_format, start, end); } else if (start_str.empty() && !end_str.empty()) { - range = std::format("range(low, {} + {}, toUInt64({}))", end, era_diff, step); - condition = std::format("{0}_ali - {1} < {2}", axis_column, era_diff, end); + range = std::format("range(low, {} + {}, toUInt64({}))", end, diff, step); + condition = std::format("where toInt64({0}) - {1} < {2}", axis_column_format, diff, end); } else if (!start_str.empty() && end_str.empty()) { range = std::format("range({}, high, toUInt64({}))", start, step); - condition = std::format("{}_ali >= {}", axis_column, start); + condition = std::format("where toInt64({}) >= {}", axis_column_format, start); } else { range = std::format("range(low, high, toUInt64({}))", step); - condition = "1"; //true + condition = " "; } auto range_len = std::format("length({})", range); - main_query = std::format("{} ", group_expression); + + String sub_sub_query; + if (group_expression.empty()) + sub_sub_query = std::format(" (Select {0}, {1} FROM {2} {4} GROUP BY {3}_ali ORDER BY {3}_ali) ", subquery_columns, bin_str, table_name, axis_column, condition); + else + sub_sub_query = std::format(" (Select {0}, {1}, {2} FROM {3} {5} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", group_expression, subquery_columns, bin_str, table_name, axis_column, condition); + + if (!group_expression.empty()) + main_query = std::format("{} ", group_expression_alias); auto axis_and_agg_alias_list = axis_column; auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}",axis_column); //tupleElement(pp,2) as PriceAvg ,tupleElement(pp,1) int idx = 2; for (auto agg_column : aggregation_columns) { - String agg_group_column = std::format("arrayConcat(groupArrayIf ({}_ali,{}) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", - agg_column.alias, condition, agg_column.default_value, range_len, range_len, agg_column.alias); - main_query +=", " + agg_group_column; + String agg_group_column = std::format("arrayConcat(groupArray ({}_ali) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", + agg_column.alias, agg_column.default_value, range_len, range_len, agg_column.alias); + main_query = main_query.empty() ? agg_group_column : main_query + ", " + agg_group_column; axis_and_agg_alias_list +=", " + agg_column.alias; final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); } - auto axis_str = std::format("arrayDistinct(arrayConcat(groupArrayIf(toDateTime64({0}_ali - {1},9,'UTC'), {2}), arrayMap( x->(toDateTime64(x - {1} ,9,'UTC')), {3}) )) as {0}", - axis_column, diff, condition,range); + + if (from_to_step.is_timespan) + axis_str = std::format("arrayDistinct(arrayConcat(groupArray(toDateTime64({0}_ali - {1},9,'UTC')), arrayMap( x->(toDateTime64(x - {1} ,9,'UTC')), {2}) )) as {0}", + axis_column, diff, range); + else + axis_str = std::format("arrayDistinct(arrayConcat(groupArray({0}_ali), arrayMap( x->(toFloat64(x)), {1}) )) as {0}", + axis_column, range); main_query += ", " + axis_str; - auto sub_group_by = std::format("{}", group_expression); + auto sub_group_by = group_expression.empty()? "" : std::format("GROUP BY {}", group_expression_alias); - sub_query = std::format("( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} GROUP BY {} )", + sub_query = std::format("( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} {} )", axis_column, axis_column,step, axis_and_agg_alias_list, main_query, sub_sub_query, sub_group_by); - main_query = std::format("{},{}", group_expression, final_axis_agg_alias_list); + if (group_expression.empty()) + main_query = std::format("{}", final_axis_agg_alias_list); + else + main_query = std::format("{},{}", group_expression_alias, final_axis_agg_alias_list); kql_make_series.sub_query = std::move(sub_query); kql_make_series.main_query = std::move(main_query); @@ -387,10 +347,7 @@ bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expec subquery_columns += ", "+ column_str; } - if (from_to_step.is_timespan) - makeTimeSeries(kql_make_series, pos.max_depth); - else - makeNumericSeries(kql_make_series, pos.max_depth); + makeSeries(kql_make_series, pos.max_depth); Tokens token_subquery(kql_make_series.sub_query.c_str(), kql_make_series.sub_query.c_str() + kql_make_series.sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); @@ -407,6 +364,5 @@ bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expec pos = begin; return true; - } } diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.h b/src/Parsers/Kusto/ParserKQLMakeSeries.h index a89ec97174e3..f00eaa1dc99f 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.h +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.h @@ -43,8 +43,7 @@ class ParserKQLMakeSeries : public ParserKQLBase String main_query; }; - void makeNumericSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth); - void makeTimeSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth); + void makeSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth); bool parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos); bool parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos); const char * getName() const override { return "KQL project"; } From d5bf4d96fca6181a334b2b6497d13593bcf93698 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 22 Aug 2022 13:33:01 -0700 Subject: [PATCH 119/342] Kusto-phase2: add bin function, unit test for make-series --- .../KustoFunctions/KQLGeneralFunctions.cpp | 39 ++++++++++++++++--- .../tests/KQL/gtest_KQL_MakeSeries.cpp | 25 ++++++++++++ .../tests/KQL/gtest_KQL_StringFunctions.cpp | 12 ++++++ 3 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp index dd79cc06898b..e1d932e9ce19 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -20,11 +20,38 @@ namespace DB { -bool Bin::convertImpl(String &out,IParser::Pos &pos) +bool Bin::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + double bin_size; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String origal_expr(pos->begin, pos->end); + String value = getConvertedArgument(fn_name, pos); + + ++pos; + String round_to = getConvertedArgument(fn_name, pos); + + auto t = std::format("toFloat64({})", value); + + bin_size = std::stod(round_to); + + if (origal_expr == "datetime" || origal_expr == "date") + { + out = std::format("toDateTime64(toInt64({0} / {1} ) * {1}, 9, 'UTC')", t, bin_size); + } + else if (origal_expr == "timespan" || origal_expr =="time" || ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) + { + String bin_value = std::format(" toInt64({0} / {1} ) * {1}", t, bin_size); + out = std::format("concat(toString( toInt32((({}) as x) / 3600)),':', toString( toInt32(x % 3600 / 60)),':',toString( toInt32(x % 3600 % 60)))", bin_value); + } + else + { + out = std::format("toInt64({0} / {1} ) * {1}", t, bin_size); + } + return true; } bool BinAt::convertImpl(String & out,IParser::Pos & pos) @@ -49,11 +76,11 @@ bool BinAt::convertImpl(String & out,IParser::Pos & pos) int dir = t2 >= t1 ? 0 : -1; bin_size = std::stod(bin_size_str); - if (origal_expr == "datetime" or origal_expr == "date") + if (origal_expr == "datetime" || origal_expr == "date") { out = std::format("toDateTime64({} + toInt64(({} - {}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); } - else if (origal_expr == "timespan" or origal_expr =="time" or ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) + else if (origal_expr == "timespan" || origal_expr =="time" || ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) { String bin_value = std::format("{} + toInt64(({} - {}) / {} + {}) * {}", t1, t2, t1, bin_size, dir, bin_size); out = std::format("concat(toString( toInt32((({}) as x) / 3600)),':', toString( toInt32(x % 3600 / 60)),':',toString( toInt32(x % 3600 % 60)))", bin_value); diff --git a/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp b/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp new file mode 100644 index 000000000000..5c94ab4665e5 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp @@ -0,0 +1,25 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_MakeSeries, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga)) < 0, 0, length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 0, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 0, 9, 'UTC'), range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toDateTime64('2016-09-10', 9, 'UTC')) + (toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) - toFloat64(toDateTime64('2016-09-10', 9, 'UTC'))) / 86400) * 86400) AS Purchase_ali\n FROM T\n WHERE (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) >= toUInt64(toDateTime64('2016-09-10', 9, 'UTC'))) AND (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) < toUInt64(toDateTime64('2016-09-13', 9, 'UTC')))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 1 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(10), toUInt64(15), toUInt64(1))) - length(ga)) < 0, 0, length(range(toUInt64(10), toUInt64(15), toUInt64(1))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(Purchase_ali), arrayMap(x -> toFloat64(x), range(toUInt64(10), toUInt64(15), toUInt64(1))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(10) + (toInt64((toFloat64(Purchase) - toFloat64(10)) / 1) * 1) AS Purchase_ali\n FROM T2\n WHERE (toInt64(toFloat64(Purchase)) >= toUInt64(10)) AND (toInt64(toFloat64(Purchase)) < toUInt64(15))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(low, high, toUInt64(86400))) - length(ga)) < 0, 0, length(range(low, high, toUInt64(86400))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 62135596800, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 62135596800, 9, 'UTC'), range(low, high, toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) + 62135596800) / 86400) * 86400) AS Purchase_ali\n FROM T\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 1.0 by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 1 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(low, high, toUInt64(1))) - length(ga)) < 0, 0, length(range(low, high, toUInt64(1))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(Purchase_ali), arrayMap(x -> toFloat64(x), range(low, high, toUInt64(1))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toInt64((toFloat64(Purchase) + 0) / 1) * 1) AS Purchase_ali\n FROM T2\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp index 0ef816646a44..6fa03ef946da 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -176,6 +176,18 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserStringFuncTest, { "print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))", "SELECT toDateTime64(toFloat64(toDateTime64('1970-01-01 12:00:00.0', 9, 'UTC')) + (toInt64(((toFloat64(toDateTime64('2017-05-15 10:20:00.0', 9, 'UTC')) - toFloat64(toDateTime64('1970-01-01 12:00:00.0', 9, 'UTC'))) / 86400) + 0) * 86400), 9, 'UTC') AS res" + }, + { + "print bin(4.5, 1)", + "SELECT toInt64(toFloat64(4.5) / 1) * 1" + }, + { + "print bin(time(16d), 7d)", + "SELECT concat(toString(toInt32(((toInt64(toFloat64(1382400.) / 604800) * 604800) AS x) / 3600)), ':', toString(toInt32((x % 3600) / 60)), ':', toString(toInt32((x % 3600) % 60)))" + }, + { + "print bin(datetime(1970-05-11 13:45:07), 1d)", + "SELECT toDateTime64(toInt64(toFloat64(toDateTime64('1970-05-11 13:45:07', 9, 'UTC')) / 86400) * 86400, 9, 'UTC')" } }))); From 2ba955f79bac554043e28abf4f6b83f2476fca76 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Mon, 22 Aug 2022 15:08:43 -0700 Subject: [PATCH 120/342] Implement some KQL array functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 13 +- .../KustoFunctions/KQLDynamicFunctions.cpp | 167 ++++++++++-------- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 6 +- src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp | 53 +++++- src/Parsers/tests/KQL/gtest_KQL_IP.cpp | 23 +-- .../tests/KQL/gtest_KQL_StringFunctions.cpp | 67 +------ .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 67 +------ src/Parsers/tests/gtest_Parser.cpp | 73 -------- src/Parsers/tests/gtest_common.cpp | 64 +++++++ src/Parsers/tests/gtest_common.h | 3 + 10 files changed, 232 insertions(+), 304 deletions(-) create mode 100644 src/Parsers/tests/gtest_common.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 299e003f37f0..d9be1f8e3f48 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -3,12 +3,23 @@ # August 29, 2022 ## Dynamic functions +- [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) + `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` + - [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` +- [array_slice](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayslicefunction) + `print array_slice(dynamic([1,2,3]), 1, 2) == dynamic([2, 3])` + `print array_slice(dynamic([1,2,3,4,5]), 2, -1) == dynamic([3, 4, 5])` + `print array_slice(dynamic([1,2,3,4,5]), -3, -2) == dynamic([3, 4])` + +- [array_split](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysplitfunction) + `print array_split(dynamic([1,2,3,4,5]), 2) == dynamic([[1,2],[3,4,5]])` + `print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])) == dynamic([[1],[2,3],[4,5]])` ## DateTimeFunctions @@ -276,7 +287,7 @@ `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` -- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) +- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6-maskfunction) `print parse_ipv6_mask('127.0.0.1', 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'` `print parse_ipv6_mask('fe80::85d:e82c:9446:7994', 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp index 0dfa1907dc3a..2c79ae45601d 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -1,18 +1,12 @@ -#include -#include #include #include -#include -#include +#include #include -#include -#include -#include -#include #include -#include -#include #include +#include +#include +#include #include #include @@ -20,18 +14,16 @@ namespace DB { -bool ArrayConcat::convertImpl(String &out,IParser::Pos &pos) +bool ArrayConcat::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "arrayConcat"); } bool ArrayIif::convertImpl(String & out, IParser::Pos & pos) { const auto function_name = getKQLFunctionName(pos); if (function_name.empty()) - return false; + return false; const auto conditions = getArgument(function_name, pos); const auto if_true = getArgument(function_name, pos); @@ -46,189 +38,214 @@ bool ArrayIif::convertImpl(String & out, IParser::Pos & pos) return true; } -bool ArrayIndexOf::convertImpl(String &out,IParser::Pos &pos) +bool ArrayIndexOf::convertImpl(String & out, IParser::Pos & pos) { - String fn_name = getKQLFunctionName(pos); - + const auto fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; - ++pos; - String array = getConvertedArgument(fn_name, pos); - ++pos; - const auto needle = getConvertedArgument(fn_name, pos); - out = "minus(indexOf(" + array + ", " + needle + ") , 1)"; - + const auto array = getArgument(fn_name, pos); + const auto needle = getArgument(fn_name, pos); + out = "minus(indexOf(" + array + ", " + needle + "), 1)"; + return true; } -bool ArrayLength::convertImpl(String &out,IParser::Pos &pos) +bool ArrayLength::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "length"); } -bool ArrayReverse::convertImpl(String &out,IParser::Pos &pos) +bool ArrayReverse::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArrayRotateLeft::convertImpl(String &out,IParser::Pos &pos) +bool ArrayRotateLeft::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArrayRotateRight::convertImpl(String &out,IParser::Pos &pos) +bool ArrayRotateRight::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArrayShiftLeft::convertImpl(String &out,IParser::Pos &pos) +bool ArrayShiftLeft::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArrayShiftRight::convertImpl(String &out,IParser::Pos &pos) +bool ArrayShiftRight::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArraySlice::convertImpl(String &out,IParser::Pos &pos) +bool ArraySlice::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto start = getArgument(function_name, pos); + const auto end = getArgument(function_name, pos); + + out = std::format( + "arraySlice({0}, plus(1, if({1} >= 0, {1}, toInt64(max2(-length({0}), {1})) + length({0}))) as offset_{3}, " + " plus(1, if({2} >= 0, {2}, toInt64(max2(-length({0}), {2})) + length({0}))) - offset_{3} + 1)", + array, + start, + end, + generateUniqueIdentifier()); + + return true; } -bool ArraySortAsc::convertImpl(String &out,IParser::Pos &pos) +bool ArraySortAsc::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArraySortDesc::convertImpl(String &out,IParser::Pos &pos) +bool ArraySortDesc::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArraySplit::convertImpl(String &out,IParser::Pos &pos) +bool ArraySplit::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) return false; + + const auto array = getArgument(function_name, pos); + const auto indices = getArgument(function_name, pos); + + out = std::format( + "if(empty(arrayMap(x -> if(x >= 0, x, toInt64(max2(0, x + length({0})))), flatten([{1}])) as indices_{2}), [{0}], " + "arrayConcat([arraySlice({0}, 1, indices_{2}[1])], arrayMap(i -> arraySlice({0}, indices_{2}[i] + 1, " + "if(i = length(indices_{2}), length({0})::Int64, indices_{2}[i + 1]::Int64) - indices_{2}[i]), " + "range(1, length(indices_{2}) + 1))))", + array, + indices, + generateUniqueIdentifier()); + + return true; } -bool ArraySum::convertImpl(String &out,IParser::Pos &pos) +bool ArraySum::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "arraySum"); } -bool BagKeys::convertImpl(String &out,IParser::Pos &pos) +bool BagKeys::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool BagMerge::convertImpl(String &out,IParser::Pos &pos) +bool BagMerge::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool BagRemoveKeys::convertImpl(String &out,IParser::Pos &pos) +bool BagRemoveKeys::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool JaccardIndex::convertImpl(String &out,IParser::Pos &pos) +bool JaccardIndex::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Pack::convertImpl(String &out,IParser::Pos &pos) +bool Pack::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool PackAll::convertImpl(String &out,IParser::Pos &pos) +bool PackAll::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool PackArray::convertImpl(String &out,IParser::Pos &pos) +bool PackArray::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Repeat::convertImpl(String &out,IParser::Pos &pos) +bool Repeat::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool SetDifference::convertImpl(String &out,IParser::Pos &pos) +bool SetDifference::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool SetHasElement::convertImpl(String &out,IParser::Pos &pos) +bool SetHasElement::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool SetIntersect::convertImpl(String &out,IParser::Pos &pos) +bool SetIntersect::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool SetUnion::convertImpl(String &out,IParser::Pos &pos) +bool SetUnion::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool TreePath::convertImpl(String &out,IParser::Pos &pos) +bool TreePath::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Zip::convertImpl(String &out,IParser::Pos &pos) +bool Zip::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 40f34f766b54..53c81a662084 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -225,11 +225,13 @@ bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); const auto mask = getArgument(function_name, pos); + const auto unique_identifier = generateUniqueIdentifier(); out = std::format( - "if(isNull({0} as ipv4), {1}, {2})", + "if(isNull({0} as ipv4_{3}), {1}, {2})", kqlCallToExpression("parse_ipv4_mask", {ip_address, mask}, pos.max_depth), kqlCallToExpression("parse_ipv6", {"strcat(tostring(parse_ipv6(" + ip_address + ")), '/', tostring(" + mask + "))"}, pos.max_depth), - kqlCallToExpression("parse_ipv6", {"format_ipv4(ipv4)"}, pos.max_depth)); + kqlCallToExpression("parse_ipv6", {"format_ipv4(ipv4_" + unique_identifier + ")"}, pos.max_depth), + unique_identifier); return true; } diff --git a/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp index 1e316593a5b9..566cc5791dcd 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp @@ -2,10 +2,18 @@ #include -INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Dynamic, ParserTest, +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicExactMatch, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), ::testing::ValuesIn(std::initializer_list{ + { + "print array_concat(A, B)", + "SELECT arrayConcat(A, B)" + }, + { + "print array_concat(A, B, C, D)", + "SELECT arrayConcat(A, B, C, D)" + }, { "print array_iff(A, B, C)", "SELECT arrayMap(x -> if((x.1) != 0, x.2, x.3), arrayZip(A, arrayResize(B, length(A), NULL), arrayResize(C, length(A), NULL)))" @@ -13,5 +21,48 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Dynamic, ParserTest, { "print array_iif(A, B, C)", "SELECT arrayMap(x -> if((x.1) != 0, x.2, x.3), arrayZip(A, arrayResize(B, length(A), NULL), arrayResize(C, length(A), NULL)))" + }, + { + "print output = array_index_of(dynamic([1, 2, 3]), 2)", + "SELECT indexOf([1, 2, 3], 2) - 1 AS output" + }, + { + "print output = array_index_of(dynamic(['a', 'b', 'c']), 'b')", + "SELECT indexOf(['a', 'b', 'c'], 'b') - 1 AS output" + }, + { + "print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')", + "SELECT indexOf(['John', 'Denver', 'Bob', 'Marley'], 'Marley') - 1 AS output" + }, + + { + "print output = array_length(dynamic([1, 2, 3]))", + "SELECT length([1, 2, 3]) AS output" + }, + { + "print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))", + "SELECT length(['John', 'Denver', 'Bob', 'Marley']) AS output" + }, + { + "print output = array_sum(dynamic([2, 5, 3]))", + "SELECT arraySum([2, 5, 3]) AS output" + }, + { + "print output = array_sum(dynamic([2.5, 5.5, 3]))", + "SELECT arraySum([2.5, 5.5, 3]) AS output" + } +}))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicRegex, ParserRegexTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print array_slice(A, B, C)", + "SELECT arraySlice\\(A, 1 \\+ if\\(B >= 0, B, toInt64\\(max2\\(-length\\(A\\), B\\)\\) \\+ length\\(A\\)\\) AS offset_\\d+, \\(\\(1 \\+ if\\(C >= 0, C, toInt64\\(max2\\(-length\\(A\\), C\\)\\) \\+ length\\(A\\)\\)\\) - offset_\\d+\\) \\+ 1\\)" + }, + { + "print array_split(A, B)", + "SELECT if\\(empty\\(arrayMap\\(x -> if\\(x >= 0, x, toInt64\\(max2\\(0, x \\+ length\\(A\\)\\)\\)\\), flatten\\(\\[B\\]\\)\\) AS indices_\\d+\\), \\[A\\], arrayConcat\\(\\[arraySlice\\(A, 1, indices_\\d+\\[1\\]\\)\\], arrayMap\\(i -> arraySlice\\(A, \\(indices_\\d+\\[i\\]\\) \\+ 1, if\\(i = length\\(indices_\\d+\\), CAST\\(length\\(A\\), 'Int64'\\), CAST\\(indices_\\d+\\[i \\+ 1\\], 'Int64'\\)\\) - \\(indices_\\d+\\[i\\]\\)\\), range\\(1, length\\(indices_\\d+\\) \\+ 1\\)\\)\\)\\)" } }))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp index 731715b45424..7f1d93625be8 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -1,28 +1,7 @@ #include -#include -#include #include -#include -#include - -class ParserRegexTest : public ::testing::TestWithParam, ParserTestCase>> -{}; - -TEST_P(ParserRegexTest, parseQuery) -{ - const auto & parser = std::get<0>(GetParam()); - const auto & [input_text, expected_ast] = std::get<1>(GetParam()); - - ASSERT_TRUE(parser); - ASSERT_TRUE(expected_ast); - - DB::ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); - EXPECT_THAT(serializeAST(*ast->clone(), false), ::testing::MatchesRegex(expected_ast)); -} - INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, ::testing::Combine( ::testing::Values(std::make_shared()), @@ -101,6 +80,6 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, }, { "print parse_ipv6_mask(A, B)", - "SELECT if\\(\\(if\\(\\(\\(toIPv4OrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(toUInt8OrNull\\(toString\\(B\\)\\) AS mask_\\d+\\) IS NULL\\), NULL, toUInt32\\(IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(max2\\(0, min2\\(32, assumeNotNull\\(mask_\\d+\\)\\)\\)\\)\\).1\\)\\) AS ipv4\\) IS NULL, if\\(\\(length\\(splitByChar\\('/', concat\\(ifNull\\(toString\\(if\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)\\), ''\\), '/', ifNull\\(toString\\(B\\), ''\\)\\)\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\), if\\(\\(length\\(splitByChar\\('/', ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(ipv4\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(ipv4\\) = 'String'\\)\\) OR \\(32 < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(ipv4\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\)\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)\\)" + "SELECT if\\(\\(if\\(\\(\\(toIPv4OrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(toUInt8OrNull\\(toString\\(B\\)\\) AS mask_\\d+\\) IS NULL\\), NULL, toUInt32\\(IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(max2\\(0, min2\\(32, assumeNotNull\\(mask_\\d+\\)\\)\\)\\)\\).1\\)\\) AS ipv4_\\d+\\) IS NULL, if\\(\\(length\\(splitByChar\\('/', concat\\(ifNull\\(toString\\(if\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)\\), ''\\), '/', ifNull\\(toString\\(B\\), ''\\)\\)\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\), if\\(\\(length\\(splitByChar\\('/', ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(ipv4_\\d+\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(ipv4_\\d+\\) = 'String'\\)\\) OR \\(32 < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(ipv4_\\d+\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\)\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)\\)" } }))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp index 6fa03ef946da..3dd5447b7db6 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -1,71 +1,8 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace -{ -using namespace DB; -using namespace std::literals; -} -class ParserStringFuncTest : public ::testing::TestWithParam, ParserTestCase>> -{}; -TEST_P(ParserStringFuncTest, ParseQuery) -{ const auto & parser = std::get<0>(GetParam()); - const auto & [input_text, expected_ast] = std::get<1>(GetParam()); - ASSERT_NE(nullptr, parser); - if (expected_ast) - { - if (std::string(expected_ast).starts_with("throws")) - { - EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } - else - { - ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); - if (std::string("CREATE USER or ALTER USER query") != parser->getName() - && std::string("ATTACH access entity query") != parser->getName()) - { - EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); - } - else - { - if (input_text.starts_with("ATTACH")) - { - auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); - EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); - } - else - { - EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); - } - } - } - } - else - { - ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } -} +#include -INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserStringFuncTest, +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), ::testing::ValuesIn(std::initializer_list{ diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index 55d88e2aa780..e5d2ee5e0634 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -1,71 +1,8 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace -{ -using namespace DB; -using namespace std::literals; -} -class ParserDateTimeFuncTest : public ::testing::TestWithParam, ParserTestCase>> -{}; -TEST_P(ParserDateTimeFuncTest, ParseQuery) -{ const auto & parser = std::get<0>(GetParam()); - const auto & [input_text, expected_ast] = std::get<1>(GetParam()); - ASSERT_NE(nullptr, parser); - if (expected_ast) - { - if (std::string(expected_ast).starts_with("throws")) - { - EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } - else - { - ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); - if (std::string("CREATE USER or ALTER USER query") != parser->getName() - && std::string("ATTACH access entity query") != parser->getName()) - { - EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); - } - else - { - if (input_text.starts_with("ATTACH")) - { - auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); - EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); - } - else - { - EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); - } - } - } - } - else - { - ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } -} +#include -INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), ::testing::ValuesIn(std::initializer_list{ diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index fe7453bd2048..c726ed6fd0a4 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -15,8 +14,6 @@ #include #include #include -#include -#include namespace { @@ -34,48 +31,6 @@ std::ostream & operator<<(std::ostream & ostr, const ParserTestCase & test_case) return ostr << "ParserTestCase input: " << test_case.input_text; } -TEST_P(ParserTest, parseQuery) -{ - const auto & parser = std::get<0>(GetParam()); - const auto & [input_text, expected_ast] = std::get<1>(GetParam()); - - ASSERT_NE(nullptr, parser); - - if (expected_ast) - { - if (std::string(expected_ast).starts_with("throws")) - { - EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } - else - { - ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); - if (std::string("CREATE USER or ALTER USER query") != parser->getName() - && std::string("ATTACH access entity query") != parser->getName()) - { - EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); - } - else - { - if (input_text.starts_with("ATTACH")) - { - auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); - EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); - } - else - { - EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); - } - } - } - } - else - { - ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } -} - INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), @@ -595,33 +550,5 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "print output = dynamic(['a', 'b', 'c'])", "SELECT ['a', 'b', 'c'] AS output" - }, - { - "print output = array_index_of(dynamic([1, 2, 3]), 2)", - "SELECT indexOf([1, 2, 3], 2) - 1 AS output" - }, - { - "print output = array_index_of(dynamic(['a', 'b', 'c']), 'b')", - "SELECT indexOf(['a', 'b', 'c'], 'b') - 1 AS output" - }, - { - "print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')", - "SELECT indexOf(['John', 'Denver', 'Bob', 'Marley'], 'Marley') - 1 AS output" - }, - { - "print output = array_length(dynamic([1, 2, 3]))", - "SELECT length([1, 2, 3]) AS output" - }, - { - "print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))", - "SELECT length(['John', 'Denver', 'Bob', 'Marley']) AS output" - }, - { - "print output = array_sum(dynamic([2, 5, 3]))", - "SELECT arraySum([2, 5, 3]) AS output" - }, - { - "print output = array_sum(dynamic([2.5, 5.5, 3]))", - "SELECT arraySum([2.5, 5.5, 3]) AS output" } }))); diff --git a/src/Parsers/tests/gtest_common.cpp b/src/Parsers/tests/gtest_common.cpp new file mode 100644 index 000000000000..c9efdbe105c8 --- /dev/null +++ b/src/Parsers/tests/gtest_common.cpp @@ -0,0 +1,64 @@ +#include "gtest_common.h" + +#include +#include +#include + +#include + +#include + +TEST_P(ParserTest, parseQuery) +{ + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + + ASSERT_NE(nullptr, parser); + + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + DB::ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +TEST_P(ParserRegexTest, parseQuery) +{ + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + + ASSERT_TRUE(parser); + ASSERT_TRUE(expected_ast); + + DB::ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + EXPECT_THAT(serializeAST(*ast->clone(), false), ::testing::MatchesRegex(expected_ast)); +} diff --git a/src/Parsers/tests/gtest_common.h b/src/Parsers/tests/gtest_common.h index aac3dddb117c..4eca9390d92c 100644 --- a/src/Parsers/tests/gtest_common.h +++ b/src/Parsers/tests/gtest_common.h @@ -12,3 +12,6 @@ struct ParserTestCase class ParserTest : public ::testing::TestWithParam, ParserTestCase>> {}; + +class ParserRegexTest : public ::testing::TestWithParam, ParserTestCase>> +{}; From 40a6e5a0c0b528525259afb26576808f4009fe71 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Tue, 16 Aug 2022 07:31:15 -0700 Subject: [PATCH 121/342] date_add and date_part --- .../KustoFunctions/KQLDateTimeFunctions.cpp | 71 +++++++++++++++++-- 1 file changed, 67 insertions(+), 4 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 96c5bca06de6..c1a5000c6fc4 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -56,14 +56,77 @@ bool Ago::convertImpl(String & out, IParser::Pos & pos) bool DatetimeAdd::convertImpl(String & out, IParser::Pos & pos) { - return directMapping(out, pos, "date_add"); + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String period = getConvertedArgument(fn_name, pos); + //remove quotes from period. + if ( period.front() == '\"' || period.front() == '\'' ) + { + //period.remove + period.erase( 0, 1 ); // erase the first quote + period.erase( period.size() - 2 ); // erase the last quuote(Since token includes trailing space alwayas as per implememtation) + } + ++pos; + const String offset = getConvertedArgument(fn_name, pos); + ++pos; + const String datetime = getConvertedArgument(fn_name, pos); + + out = std::format("date_add({}, {}, {} )",period,offset,datetime); + + return true; + }; bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String part = Poco::toUpper(getConvertedArgument(fn_name, pos)); + if ( part.front() == '\"' || part.front() == '\'' ) + { + //period.remove + part.erase( 0, 1 ); // erase the first quote + part.erase( part.size() - 2 ); // erase the last quuote + } + String date; + if (pos->type == TokenType::Comma) + { + ++pos; + date = getConvertedArgument(fn_name, pos); + } + + String format; + + if(part == "YEAR" ) + format = "%G"; + else if (part == "QUARTER" ) + format = "%Q"; + else if (part == "MONTH") + format = "%m"; + else if (part == "WEEK_OF_YEAR") + format = "%V"; + else if (part == "DAY") + format = "%e"; + else if (part == "DAYOFYEAR") + format = "%j"; + else if (part == "HOUR") + format = "%I"; + else if (part == "MINUTE") + format = "%M"; + else if (part == "SECOND") + format = "%S"; + else + return false; + + out = std::format("formatDateTime(toDateTime64({}, 9, 'UTC'), '{}' )", date, format); + + return true; } bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) From aaa060aacb12900d0e565a5d69f16b87fc649b5d Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Tue, 23 Aug 2022 09:43:43 -0700 Subject: [PATCH 122/342] DateTime part3 functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 18 ++ .../KustoFunctions/KQLCastingFunctions.cpp | 21 +- .../KustoFunctions/KQLDataTypeFunctions.cpp | 2 +- .../KustoFunctions/KQLDateTimeFunctions.cpp | 256 ++++++++++++++++-- .../KustoFunctions/KQLDateTimeFunctions.h | 31 +++ .../KustoFunctions/KQLGeneralFunctions.cpp | 2 +- src/Parsers/Kusto/ParserKQLMakeSeries.cpp | 2 +- .../tests/KQL/gtest_KQL_StringFunctions.cpp | 8 +- .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 67 ++++- 9 files changed, 360 insertions(+), 47 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index d9be1f8e3f48..9e69a91b88de 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -65,6 +65,24 @@ - [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) `print unixtime_nanoseconds_todatetime(1546300800000000000)` +- [datetime_part] (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-partfunction) + `print datetime_part('day', datetime(2017-10-30 01:02:03.7654321))` + +- [datetime_add] (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-addfunction) + `print datetime_add('day',1,datetime(2017-10-30 01:02:03.7654321))` + +-[format_timespan] (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-timespanfunction) + `print format_timespan(time(1d), 'd-[hh:mm:ss]')` + `print format_timespan(time('12:30:55.123'), 'ddddd-[hh:mm:ss.ffff]')` + +-[format_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-datetimefunction) + `print format_datetime(todatetime('2009-06-15T13:45:30.6175425'), 'yy-M-dd [H:mm:ss.fff]')` + `print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s tt')` + +-[todatetime] (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todatetimefunction) + `print todatetime('2014-05-25T08:20:03.123456Z')` + `print todatetime('2014-05-25 20:03.123')` + # August 15, 2022 **double quote support** ``print res = strcat("double ","quote")`` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp index b6082995ec15..3fde2ea8364d 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -24,9 +24,14 @@ bool ToBool::convertImpl(String & out, IParser::Pos & pos) bool ToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + + out = std::format("parseDateTime64BestEffortOrNull(toString({0}),9,'UTC')", param); + return true; } bool ToDouble::convertImpl(String & out, IParser::Pos & pos) @@ -60,13 +65,13 @@ bool ToString::convertImpl(String & out, IParser::Pos & pos) const auto param = getArgument(function_name, pos); out = std::format("ifNull(toString({0}), '')", param); return true; -} - +} bool ToTimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + String res = String(pos->begin, pos->end); + out = res; + return false; } + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp index 4caf4188c8c7..cc6834744b01 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -45,7 +45,7 @@ bool DatatypeDatetime::convertImpl(String &out,IParser::Pos &pos) --pos; datetime_str = std::format("'{}'",String(start->begin,pos->end)); } - out = std::format("toDateTime64({},9,'UTC')", datetime_str); + out = std::format("parseDateTime64BestEffortOrNull({},9,'UTC')", datetime_str); ++pos; return true; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index c1a5000c6fc4..1f238fc222b5 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -15,11 +15,13 @@ #include #include #include +#include namespace DB::ErrorCodes { extern const int SYNTAX_ERROR; } + namespace DB { @@ -67,7 +69,7 @@ bool DatetimeAdd::convertImpl(String & out, IParser::Pos & pos) { //period.remove period.erase( 0, 1 ); // erase the first quote - period.erase( period.size() - 2 ); // erase the last quuote(Since token includes trailing space alwayas as per implememtation) + period.erase( period.size() - 2 ); // erase the last quote(Since token includes trailing space alwayas as per implememtation) } ++pos; const String offset = getConvertedArgument(fn_name, pos); @@ -88,7 +90,8 @@ bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) ++pos; String part = Poco::toUpper(getConvertedArgument(fn_name, pos)); - if ( part.front() == '\"' || part.front() == '\'' ) + + if (part.front() == '\"' || part.front() == '\'' ) { //period.remove part.erase( 0, 1 ); // erase the first quote @@ -100,13 +103,12 @@ bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) ++pos; date = getConvertedArgument(fn_name, pos); } - String format; if(part == "YEAR" ) format = "%G"; else if (part == "QUARTER" ) - format = "%Q"; + format = "%Q"; else if (part == "MONTH") format = "%m"; else if (part == "WEEK_OF_YEAR") @@ -122,9 +124,9 @@ bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) else if (part == "SECOND") format = "%S"; else - return false; - - out = std::format("formatDateTime(toDateTime64({}, 9, 'UTC'), '{}' )", date, format); + throw Exception("Unexpected argument " + part + " for " + fn_name, ErrorCodes::SYNTAX_ERROR); + + out = std::format("formatDateTime({}, '{}' )", date, format); return true; } @@ -263,16 +265,162 @@ bool EndOfYear::convertImpl(String & out, IParser::Pos & pos) bool FormatDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String formatspecifier; + ++pos; + const auto datetime = getConvertedArgument(fn_name, pos); + ++pos; + auto format = getConvertedArgument(fn_name, pos); + + //remove quotes and end space from format argument. + if (format.front() == '\"' || format.front() == '\'' ) + { + format.erase( 0, 1 ); // erase the first quote + format.erase( format.size() - 2 ); // erase the last quuote(Since token includes trailing space alwayas as per implememtation) + } + + std::vector res; + getTokens(format, res); + std::string::size_type i = 0; + size_t decimal =0; + while (i < format.size()) + { + char c = format[i]; + if(!isalpha(c)) + { + //delimeter + if (c == ' ' || c == '-' || c == '_' || c == '[' || c == ']' || c == '/' || c == ',' || c == '.' || c == ':') + formatspecifier = formatspecifier + c; + else + throw Exception("Invalid format delimeter in function:" + fn_name, ErrorCodes::SYNTAX_ERROR); + ++i; + } + else + { + //format specifier + String arg = res.back(); + + if(arg == "y" || arg == "yy" ) + formatspecifier = formatspecifier + "%y"; + else if (arg == "yyyy") + formatspecifier = formatspecifier + "%Y"; + else if (arg == "M" || arg == "MM") + formatspecifier = formatspecifier + "%m"; + else if (arg == "s" || arg == "ss") + formatspecifier = formatspecifier + "%S"; + else if (arg == "m" || arg == "mm") + formatspecifier = formatspecifier + "%M"; + else if (arg == "h" || arg == "hh") + formatspecifier = formatspecifier + "%I"; + else if (arg == "H" || arg == "HH") + formatspecifier = formatspecifier + "%H"; + else if (arg == "d") + formatspecifier = formatspecifier + "%e"; + else if (arg == "dd") + formatspecifier = formatspecifier + "%d"; + else if (arg == "tt") + formatspecifier = formatspecifier + "%p"; + else if (arg.starts_with('f')) + decimal = arg.size(); + else if (arg.starts_with('F')) + decimal = arg.size(); + else + throw Exception("Format specifier " + arg + " in function:" + fn_name + "is not supported", ErrorCodes::SYNTAX_ERROR); + res.pop_back(); + i = i + arg.size(); + } + } + if(decimal > 0 && formatspecifier.find('.')!=String::npos) + { + + out = std::format("concat(" + "substring(toString(formatDateTime( {0} , '{1}' )),1, position(toString(formatDateTime({0},'{1}')),'.')) ," + "substring(substring(toString({0}), position(toString({0}),'.')+1),1,{2})," + "substring(toString(formatDateTime( {0},'{1}')), position(toString(formatDateTime({0},'{1}')),'.')+1 ,length (toString(formatDateTime({0},'{1}'))))) " ,datetime, formatspecifier,decimal); + } + else + out = std::format("formatDateTime( {0},'{1}')" ,datetime, formatspecifier); + + return true; } -bool FormatTimeSpan::convertImpl(String & out, IParser::Pos & pos) +bool FormatTimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String formatspecifier; + ++pos; + const auto datetime = getConvertedArgument(fn_name, pos); + ++pos; + auto format = getConvertedArgument(fn_name, pos); + size_t decimal=0; + //remove quotes and end space from format argument. + if (format.front() == '\"' || format.front() == '\'' ) + { + format.erase( 0, 1 ); // erase the first quote + format.erase( format.size() - 2 ); // erase the last quuote(Since token includes trailing space alwayas as per implememtation) + } + std::vector res; + getTokens(format, res); + size_t pad = 0; + std::string::size_type i = 0; + + while (i < format.size()) + { + char c = format[i]; + if(!isalpha(c)) + { + //delimeter + if (c == ' ' || c == '-' || c == '_' || c == '[' || c == ']' || c == '/' || c == ',' || c == '.' || c == ':') + formatspecifier = formatspecifier + c; + else + throw Exception("Invalid format delimeter in function:" + fn_name, ErrorCodes::SYNTAX_ERROR); + ++i; + } + else + { + //format specifier + String arg = res.back(); + + if (arg == "s" || arg == "ss") + formatspecifier = formatspecifier + "%S"; + else if (arg == "m" || arg == "mm") + formatspecifier = formatspecifier + "%M"; + else if (arg == "h" || arg == "hh") + formatspecifier = formatspecifier + "%I"; + else if (arg == "H" || arg == "HH") + formatspecifier = formatspecifier + "%H"; + else if (arg == "d") + formatspecifier = formatspecifier + "%e"; + else if (arg == "dd") + formatspecifier = formatspecifier + "%d"; + else if (arg.starts_with('d') && arg.size() >2) + { formatspecifier = formatspecifier + "%d"; + pad = arg.size() - 2 ; + } + else if (arg.starts_with('f')) + decimal = arg.size(); + else if (arg.starts_with('F')) + decimal = arg.size(); + else + throw Exception("Format specifier " + arg + " in function:" + fn_name + "is not supported", ErrorCodes::SYNTAX_ERROR); + res.pop_back(); + i = i + arg.size(); + } + } + if(decimal > 0 && formatspecifier.find('.')!=String::npos ) + { + out = std::format("leftPad(concat(substring(toString(formatDateTime( toDateTime64({0},9,'UTC'),'{1}')),1, position( toString(formatDateTime( toDateTime64({0},9,'UTC'),'{1}')),'.')),substring(SUBSTRING(toString(toDateTime64({0},9,'UTC')),position(toString(toDateTime64({0},9,'UTC')),'.')+1),1,{2}),substring(toString(formatDateTime(toDateTime64({0},9,'UTC'),'{1}')),position( toString(formatDateTime( toDateTime64({0},9,'UTC'),'{1}')),'.')+1,length(toString(formatDateTime( toDateTime64({0},9,'UTC'),'{1}'))))),length(toString(formatDateTime( toDateTime64({0},9,'UTC'),'{1}')))+{3}+{2},'0')", datetime,formatspecifier,decimal,pad); + } + else if (decimal == 0 && formatspecifier.find('.')==String::npos) + out = std::format("leftPad(toString(formatDateTime(toDateTime64({0},9,'UTC'),'{1}')),length(toString(formatDateTime( toDateTime64({0},9,'UTC'),'{1}')))+{2},'0')", datetime,formatspecifier,pad); + else + out = std::format("formatDateTime(toDateTime64({0},9,'UTC'),'{1}')", datetime,formatspecifier); + + return true; } bool GetMonth::convertImpl(String & out, IParser::Pos & pos) @@ -292,9 +440,74 @@ bool HoursOfDay::convertImpl(String & out, IParser::Pos & pos) bool MakeTimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String datetime_str; + String hour ; + String day ; + String minute ; + String second ; + int arg_count = 0; + std::vector args; + while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket) + { + String arg = getConvertedArgument(fn_name, pos); + args.insert(args.begin(),arg); + if(pos->type == TokenType::Comma) + ++pos; + ++arg_count; + } + + if (arg_count < 2 || arg_count > 4) + throw Exception("argument count out of bound in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); + + if(arg_count == 2) + { + hour = args.back(); + args.pop_back(); + minute = args.back(); + args.pop_back(); + datetime_str = hour.erase(hour.size() - 1) + ":" + minute.erase(minute.size() - 1) ; + } + else if (arg_count == 3) + { + hour = args.back(); + args.pop_back(); + minute = args.back(); + args.pop_back(); + second = args.back(); + args.pop_back(); + + datetime_str = hour.erase(hour.size() - 1) + ":" + minute.erase(minute.size() - 1) + ":" + second.erase(second.size() - 1); + } + else if (arg_count == 4) + { + day = args.back(); + args.pop_back(); + hour = args.back(); + args.pop_back(); + minute = args.back(); + args.pop_back(); + second = args.back(); + args.pop_back(); + + datetime_str = hour.erase(hour.size() - 1) + ":" + minute.erase(minute.size() - 1) + ":" + second.erase(second.size() - 1); + day = day.erase(day.size() - 1) + "."; + + } + else + throw Exception("argument count out of bound in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); + + //Add dummy yyyy-mm-dd to parse datetime in CH + datetime_str = "0000-00-00 " + datetime_str; + + out = std::format("CONCAT('{}',toString(SUBSTRING(toString(toTime(parseDateTime64BestEffortOrNull('{}', 9 ,'UTC' ))),12)))" ,day ,datetime_str ); + + return true; } bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) @@ -366,7 +579,7 @@ bool StartOfDay::convertImpl(String & out, IParser::Pos & pos) offset = getConvertedArgument(fn_name, pos); } - out = std::format("date_add(DAY,{}, toDateTime64((toStartOfDay({})) , 9 , 'UTC')) ", offset, datetime_str); + out = std::format("date_add(DAY,{}, parseDateTime64BestEffortOrNull((toStartOfDay({})) , 9 , 'UTC')) ", offset, datetime_str); return true; } @@ -386,7 +599,7 @@ bool StartOfMonth::convertImpl(String & out, IParser::Pos & pos) offset = getConvertedArgument(fn_name, pos); } - out = std::format("date_add(MONTH,{}, toDateTime64((toStartOfMonth({})) , 9 , 'UTC')) ", offset, datetime_str); + out = std::format("date_add(MONTH,{}, parseDateTime64BestEffortOrNull((toStartOfMonth({})) , 9 , 'UTC')) ", offset, datetime_str); return true; } @@ -406,7 +619,7 @@ bool StartOfWeek::convertImpl(String & out, IParser::Pos & pos) offset = getConvertedArgument(fn_name, pos); } - out = std::format("date_add(Week,{}, toDateTime64((toStartOfWeek({})) , 9 , 'UTC')) ", offset, datetime_str); + out = std::format("date_add(Week,{}, parseDateTime64BestEffortOrNull((toStartOfWeek({})) , 9 , 'UTC')) ", offset, datetime_str); return true; } @@ -425,7 +638,7 @@ bool StartOfYear::convertImpl(String & out, IParser::Pos & pos) ++pos; offset = getConvertedArgument(fn_name, pos); } - out = std::format("date_add(YEAR,{}, toDateTime64((toStartOfYear({}, 'UTC')) , 9 , 'UTC'))", offset, datetime_str); + out = std::format("date_add(YEAR,{}, parseDateTime64BestEffortOrNull((toStartOfYear({}, 'UTC')) , 9 , 'UTC'))", offset, datetime_str); return true; } @@ -496,3 +709,4 @@ bool MonthOfYear::convertImpl(String & out, IParser::Pos & pos) } } + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h index bde104e88b96..a40c8125063a 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -223,5 +223,36 @@ class MonthOfYear : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; +void inline getTokens(String format , std::vector & res ) +{ + String str = format; + String token; + auto pos = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzQWERTYUIOPASDFGHJKLZXCVBNM"); + while (pos != String::npos ) + { + if ( pos != 0 ) + { + // Found a token + token = str.substr(0, pos); + res.insert(res.begin(),token); + } + /* else + { + // Found another delimiter + // Just move on to next one + + } +*/ + str.erase(0, pos+1); // Always remove pos+1 to get rid of delimiter + pos = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzQWERTYUIOPASDFGHJKLZXCVBNM"); + } + // Cover the last (or only) token + if ( str.length() > 0 ) + { + token = str; + res.insert(res.begin(),token); + } +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp index e1d932e9ce19..038e801216b0 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -78,7 +78,7 @@ bool BinAt::convertImpl(String & out,IParser::Pos & pos) if (origal_expr == "datetime" || origal_expr == "date") { - out = std::format("toDateTime64({} + toInt64(({} - {}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); + out = std::format("parseDateTime64BestEffortOrNull({} + toInt64(({} - {}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); } else if (origal_expr == "timespan" || origal_expr =="time" || ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) { diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp index 65eeafe737b7..ca2d2421181d 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -348,7 +348,7 @@ bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expec } makeSeries(kql_make_series, pos.max_depth); - + Tokens token_subquery(kql_make_series.sub_query.c_str(), kql_make_series.sub_query.c_str() + kql_make_series.sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp index 3dd5447b7db6..b5c7971e6445 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -52,15 +52,15 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "print datetime(2015-12-31 23:59:59.9)", - "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + "SELECT parseDateTime64BestEffortOrNull('2015-12-31 23:59:59.9', 9, 'UTC')" }, { "print datetime(\"2015-12-31 23:59:59.9\")", - "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + "SELECT parseDateTime64BestEffortOrNull('2015-12-31 23:59:59.9', 9, 'UTC')" }, { "print datetime('2015-12-31 23:59:59.9')", - "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + "SELECT parseDateTime64BestEffortOrNull('2015-12-31 23:59:59.9', 9, 'UTC')" }, { "print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)", @@ -124,7 +124,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "print bin(datetime(1970-05-11 13:45:07), 1d)", - "SELECT toDateTime64(toInt64(toFloat64(toDateTime64('1970-05-11 13:45:07', 9, 'UTC')) / 86400) * 86400, 9, 'UTC')" + "SELECT parseDateTime64BestEffortOrNull(toFloat64(parseDateTime64BestEffortOrNull('1970-01-01 12:00:00.0', 9, 'UTC')) + (toInt64(((toFloat64(parseDateTime64BestEffortOrNull('2017-05-15 10:20:00.0', 9, 'UTC')) - toFloat64(parseDateTime64BestEffortOrNull('1970-01-01 12:00:00.0', 9, 'UTC'))) / 86400) + 0) * 86400), 9, 'UTC') AS res" } }))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index e5d2ee5e0634..09fbff6625fd 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -8,19 +8,19 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, ::testing::ValuesIn(std::initializer_list{ { "print week_of_year(datetime(2020-12-31))", - "SELECT toWeek(toDateTime64('2020-12-31', 9, 'UTC'), 3, 'UTC')" + "SELECT toWeek(parseDateTime64BestEffortOrNull('2020-12-31', 9, 'UTC'), 3, 'UTC')" }, { "print startofweek(datetime(2017-01-01 10:10:17), -1)", - "SELECT toDateTime64(toStartOfWeek(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalWeek(-1)" + "SELECT parseDateTime64BestEffortOrNull(toStartOfWeek(parseDateTime64BestEffortOrNull('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalWeek(-1)" }, { "print startofmonth(datetime(2017-01-01 10:10:17), -1)", - "SELECT toDateTime64(toStartOfMonth(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalMonth(-1)" + "SELECT parseDateTime64BestEffortOrNull(toStartOfMonth(parseDateTime64BestEffortOrNull('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalMonth(-1)" }, { "print startofday(datetime(2017-01-01 10:10:17), -1)", - "SELECT toDateTime64(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalDay(-1)" + "SELECT parseDateTime64BestEffortOrNull(toStartOfDay(parseDateTime64BestEffortOrNull('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalDay(-1)" }, { @@ -29,27 +29,27 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "print monthofyear(datetime(2015-12-14))", - "SELECT toMonth(toDateTime64('2015-12-14', 9, 'UTC'))" + "SELECT toMonth(parseDateTime64BestEffortOrNull('2015-12-14', 9, 'UTC'))" }, { "print hourofday(datetime(2015-12-14 10:54:00))", - "SELECT toHour(toDateTime64('2015-12-14 10:54:00', 9, 'UTC'))" + "SELECT toHour(parseDateTime64BestEffortOrNull('2015-12-14 10:54:00', 9, 'UTC'))" }, { "print getyear(datetime(2015-10-12))", - "SELECT toYear(toDateTime64('2015-10-12', 9, 'UTC'))" + "SELECT toYear(parseDateTime64BestEffortOrNull('2015-10-12', 9, 'UTC'))" }, { "print getmonth(datetime(2015-10-12))", - "SELECT toMonth(toDateTime64('2015-10-12', 9, 'UTC'))" + "SELECT toMonth(parseDateTime64BestEffortOrNull('2015-10-12', 9, 'UTC'))" }, { "print dayofyear(datetime(2015-10-12))", - "SELECT toDayOfYear(toDateTime64('2015-10-12', 9, 'UTC'))" + "SELECT toDayOfYear(parseDateTime64BestEffortOrNull('2015-10-12', 9, 'UTC'))" }, { "print dayofmonth(datetime(2015-10-12))", - "SELECT toDayOfMonth(toDateTime64('2015-10-12', 9, 'UTC'))" + "SELECT toDayOfMonth(parseDateTime64BestEffortOrNull('2015-10-12', 9, 'UTC'))" }, { "print unixtime_seconds_todatetime(1546300899)", @@ -57,7 +57,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "print dayofweek(datetime(2015-12-20))", - "SELECT toDayOfWeek(toDateTime64('2015-12-20', 9, 'UTC')) % 7" + "SELECT toDayOfWeek(parseDateTime64BestEffortOrNull('2015-12-20', 9, 'UTC')) % 7" }, { "print now()", @@ -136,6 +136,51 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))", "SELECT dateDiff('minute', toDateTime64('2017-10-30 23:05:01', 9, 'UTC'), toDateTime64('2017-10-30 23:00:59', 9, 'UTC')) * -1" + }, + { + "print datetime(null)", + "SELECT parseDateTime64BestEffortOrNull('null', 9, 'UTC')" + }, + { + "print datetime('2014-05-25T08:20:03.123456Z')", + "SELECT parseDateTime64BestEffortOrNull('2014-05-25T08:20:03.123456Z', 9, 'UTC')" + }, + { + "print datetime(2015-12-14 18:54)", + "SELECT parseDateTime64BestEffortOrNull('2015-12-14 18:54', 9, 'UTC')" + }, + { + "print make_timespan(67,12,30,59.9799)", + "SELECT CONCAT('67.', toString(substring(toString(toTime(parseDateTime64BestEffortOrNull('0000-00-00 12:30:59.9799', 9, 'UTC'))), 12)))" + }, + { + "print todatetime('2014-05-25T08:20:03.123456Z')", + "SELECT parseDateTime64BestEffortOrNull(toString('2014-05-25T08:20:03.123456Z'), 9, 'UTC')" + }, + { + "print format_datetime(todatetime('2009-06-15T13:45:30.6175425'), 'yy-M-dd [H:mm:ss.fff]')", + "SELECT concat(substring(toString(formatDateTime(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC'), '%y-%m-%d [%H:%M:%S.]')), 1, position(toString(formatDateTime(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC'), '%y-%m-%d [%H:%M:%S.]')), '.')), substring(substring(toString(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC')), position(toString(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC')), '.') + 1), 1, 3), substring(toString(formatDateTime(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC'), '%y-%m-%d [%H:%M:%S.]')), position(toString(formatDateTime(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC'), '%y-%m-%d [%H:%M:%S.]')), '.') + 1, length(toString(formatDateTime(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC'), '%y-%m-%d [%H:%M:%S.]')))))" + }, + { + "print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s tt')", + "SELECT formatDateTime(parseDateTime64BestEffortOrNull('2015-12-14 02:03:04.12345', 9, 'UTC'), '%y-%m-%e %I:%M:%S %p')" + }, + { + "print format_timespan(time(1d), 'd-[hh:mm:ss]')", + "SELECT leftPad(toString(formatDateTime(toDateTime64(86400., 9, 'UTC'), '%e-[%I:%M:%S]')), length(toString(formatDateTime(toDateTime64(86400., 9, 'UTC'), '%e-[%I:%M:%S]'))) + 0, '0')" + }, + { + "print format_timespan(time('12:30:55.123'), 'ddddd-[hh:mm:ss.ffff]')", + "SELECT leftPad(concat(substring(toString(formatDateTime(toDateTime64(1038655., 9, 'UTC'), '%d-[%I:%M:%S.]')), 1, position(toString(formatDateTime(toDateTime64(1038655., 9, 'UTC'), '%d-[%I:%M:%S.]')), '.')), substring(substring(toString(toDateTime64(1038655., 9, 'UTC')), position(toString(toDateTime64(1038655., 9, 'UTC')), '.') + 1), 1, 4), substring(toString(formatDateTime(toDateTime64(1038655., 9, 'UTC'), '%d-[%I:%M:%S.]')), position(toString(formatDateTime(toDateTime64(1038655., 9, 'UTC'), '%d-[%I:%M:%S.]')), '.') + 1, length(toString(formatDateTime(toDateTime64(1038655., 9, 'UTC'), '%d-[%I:%M:%S.]'))))), (length(toString(formatDateTime(toDateTime64(1038655., 9, 'UTC'), '%d-[%I:%M:%S.]'))) + 3) + 4, '0')" + }, + { + "print datetime_part('day', datetime(2017-10-30 01:02:03.7654321))", + "SELECT formatDateTime(parseDateTime64BestEffortOrNull('2017-10-30 01:02:03.7654321', 9, 'UTC'), '%e')" + }, + { + "print datetime_add('day',1,datetime(2017-10-30 01:02:03.7654321))", + "SELECT parseDateTime64BestEffortOrNull('2017-10-30 01:02:03.7654321', 9, 'UTC') + toIntervalDay(1)" } + }))); From 48af801fdbfa31909e9c5599ee63460097000eb9 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Tue, 23 Aug 2022 12:56:55 -0700 Subject: [PATCH 123/342] Fixed tests --- q | 674432 +++++++++++++++ .../KustoFunctions/KQLGeneralFunctions.cpp | 2 +- .../tests/KQL/gtest_KQL_MakeSeries.cpp | 2 +- .../tests/KQL/gtest_KQL_StringFunctions.cpp | 2 +- .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 25 +- 5 files changed, 674446 insertions(+), 17 deletions(-) create mode 100644 q diff --git a/q b/q new file mode 100644 index 000000000000..1f9ab46542e6 --- /dev/null +++ b/q @@ -0,0 +1,674432 @@ +commit 9f567b4e62e8f65bc7d2a976dcd771b6636156f6 (HEAD -> DateTime-Part3) +Author: HeenaBansal2009 +Date: Tue Aug 23 09:43:43 2022 -0700 + + DateTime part3 functions + +commit a6a8340d00ba90509177b84626038cdc2dd2695a +Author: HeenaBansal2009 +Date: Tue Aug 16 07:31:15 2022 -0700 + + date_add and date_part + +commit 1f74eddb5fab974f683cfdcbef917a6da994eccc (origin/Kusto-phase2, Kusto-phase2) +Merge: ea036cc0cd 55932cfd9c +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Aug 23 09:17:43 2022 -0400 + + Merge pull request #48 from ClibMouse/feature/kql-array-functions-1096 + + Implement some KQL array functions + +commit 55932cfd9c8023a450719d5fdb53f614fccc424c (origin/feature/kql-array-functions-1096) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Mon Aug 22 15:08:43 2022 -0700 + + Implement some KQL array functions + +commit ea036cc0cd039fa4e88967d343f757acd355264f +Author: Yong Wang +Date: Mon Aug 22 13:33:01 2022 -0700 + + Kusto-phase2: add bin function, unit test for make-series + +commit 019eeb3c5b2bd64c22c3974aacf7646288c91d0d (origin/larry-kql-functional-tests) +Author: Yong Wang +Date: Sun Aug 21 22:31:03 2022 -0700 + + Kusto-phase2, updated make-series operator + +commit 640d8f7f0bf435bd754e65b34b89e413e5f25335 +Merge: 0a4310fef1 e362bc7a6e +Author: Heena Bansal +Date: Fri Aug 19 10:41:40 2022 -0400 + + Merge pull request #41 from ClibMouse/Datetime_Part2 + + Part2 DateTime functions + +commit e362bc7a6ed3ceae6ea18b3dfcad180cfd2b6133 (origin/Datetime_Part2, Datetime_Part2) +Author: HeenaBansal2009 +Date: Fri Aug 19 07:35:22 2022 -0700 + + Updated Readme + +commit bbd245e091ebd26ad741d674b100c91ec41f0378 +Author: HeenaBansal2009 +Date: Tue Aug 16 13:01:01 2022 -0700 + + Updated as per review comments + +commit 37b1be801c9a25b501e862009fedb73da3737949 +Author: HeenaBansal2009 +Date: Mon Aug 15 20:45:39 2022 -0700 + + Incorporated review comments + +commit 238b45f99195dec983e1ad06aa35819f6c3379f4 +Author: HeenaBansal2009 +Date: Fri Aug 12 10:24:52 2022 -0700 + + Part2 DateTime functions + +commit f76a277039f5101d552da0f0357937d80ee3b1db +Author: HeenaBansal2009 +Date: Mon Aug 15 20:45:39 2022 -0700 + + Incorporated review comments + +commit 5332da3b617d0050131999d31c052422bc1bf3ab +Author: HeenaBansal2009 +Date: Fri Aug 12 12:58:25 2022 -0700 + + Updated release notes + +commit e687abf259da928960fd54db1b30d7bd12beb8f8 +Author: HeenaBansal2009 +Date: Fri Aug 12 10:24:52 2022 -0700 + + Part2 DateTime functions + +commit 0a4310fef1bc4afcef4799432eee3525fc79bc91 +Merge: 5cb0aa4d0c 09aac74f50 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 19 10:25:24 2022 -0400 + + Merge pull request #46 from ClibMouse/feature/kql-array-iif + + Implement array_iff / array_iif + +commit 09aac74f508d38d34c5579ec2b8eb787dc23acfd (origin/feature/kql-array-iif) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 19 07:24:36 2022 -0700 + + Set the release date + +commit 5cb0aa4d0c06e40b33d2ad4d59ea10e8426595ea +Author: Yong Wang +Date: Wed Aug 17 23:17:29 2022 -0700 + + Kusto-phase2 : fixed the double quote issue + +commit 66b02d0551d4d4129830bedd3105f11e89277dd9 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Wed Aug 17 05:53:02 2022 -0700 + + Implement array_iff / array_iif + +commit b83ab47bf7849db7c10b6533a8c81bfbc6965d46 +Merge: f7cbc8647f 6614c412ef +Author: larryluogit +Date: Tue Aug 16 16:02:45 2022 -0400 + + Merge pull request #40 from ClibMouse/larry-kql-functional-tests + + Adding functional tests for IP Binary DateTime and more String functions + +commit 6614c412ef1d8c6c33ebbf7f68ad0dfbbd2f0e3d +Author: Larry Luo +Date: Mon Aug 15 17:12:04 2022 -0400 + + Added datatype tests + +commit 2e95aed33372b16b0a04dc4b66030a956c12bae7 +Author: Larry Luo +Date: Fri Aug 12 14:26:52 2022 -0400 + + Added missing file for IPv6 tests + +commit 52b95e8921d91204f4ab568147ca383043935243 +Author: Larry Luo +Date: Fri Aug 12 14:22:47 2022 -0400 + + Added ipv6 tests + +commit 707cba3e4b5e81487eaef0e139557a91c9983d96 +Author: Larry Luo +Date: Fri Aug 12 12:35:56 2022 -0400 + + Add tests for binary and datetime + +commit 1d4adaa168347ea1a0cf4751c53a81c487c044c7 +Author: Larry Luo +Date: Fri Aug 12 12:34:03 2022 -0400 + + Added functional tests for IP, String, Binary and Datetime + +commit f7cbc8647f08302a299bb90fc03aa49309986ad2 +Author: kashwy +Date: Tue Aug 16 06:48:49 2022 -0700 + + Kusto-phase2 : finish make series + +commit 1fc52b579f60d3df9f1873e7211d702a8e03d355 +Merge: e0c4c5afff a516aaf84f +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 12 17:09:11 2022 -0400 + + Merge pull request #42 from ClibMouse/feature/kql-conversion-functions + + Implement some KQL conversion functions + +commit a516aaf84fb1293160ed73b2d4a5ada9d9bcbe03 (origin/feature/kql-conversion-functions) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 12 13:34:55 2022 -0700 + + Implement some KQL conversion functions + +commit e0c4c5afffda69ebe549c0d0da83f96a6a79c435 +Author: kashwy +Date: Fri Aug 12 11:47:25 2022 -0700 + + Kusto-phase2: add bin_at function. fix trim error + +commit 214ef8aa9fda30e69d6712a691d427532be2f1b8 +Merge: fcc0d8340b f94045366a +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 12 13:03:40 2022 -0400 + + Merge pull request #39 from ClibMouse/feature/ip-functions-part-3 + + Implement KQL IPv6 functions + +commit f94045366a74d534132879f823763cdd3c2bf73f (origin/feature/ip-functions-part-3) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 12 09:54:49 2022 -0700 + + Add unit tests + +commit 140a343e006071012a7a97fb541fce2d2f83bc35 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 12 07:54:38 2022 -0700 + + Correct rebase error + +commit 6336c666c10c2bcfe657a4948e3538b5e3eacde7 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 12 07:32:42 2022 -0700 + + Implement KQL IPv6 functions + +commit fcc0d8340bc0d5b22a7af87b39fe8e8af31dcb14 +Merge: 80c29bcdeb 8d03117091 +Author: Mallik Hassan +Date: Fri Aug 12 11:35:34 2022 -0300 + + Merge pull request #37 from ClibMouse/Kusto-phase2-Dynamic-Array-Functions + + Resubmit Dynamic Array functions - array_index_of, length, sum and dynamic keyword + +commit 8d031170917b16f5371b52d1ca58aab4dd6b5685 (origin/Kusto-phase2-Dynamic-Array-Functions) +Author: root +Date: Thu Aug 11 17:11:22 2022 -0700 + + Resubmit Aggregate functions - array_index_of, length, sum and dynamic data type + +commit 80c29bcdebc1e3ea6693a7d06d1be5cc29169003 +Author: kashwy +Date: Thu Aug 11 12:38:49 2022 -0700 + + Kusto-pahse2: fixed toimspan issue and other functions + +commit d8c1f41b66dbc1ef686a9f9c0e74abde3bdb10bd +Merge: 8e71c21508 4a1866b320 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Thu Aug 11 09:04:37 2022 -0400 + + Merge pull request #36 from ClibMouse/feature/kql-binary-functions + + Implement KQL binary functions + +commit 8e71c21508662af555a2fb417d3d0a5a81a712d2 +Merge: 803dfd2063 7296dd583e +Author: Heena Bansal +Date: Wed Aug 10 16:57:40 2022 -0400 + + Merge pull request #34 from ClibMouse/Kusto-DateTime_part1 + + KQL DateTime functions PART 1 + +commit 7296dd583e465a174c3eb5d0b7ff1bcdd914c470 (origin/Kusto-DateTime_part1, Kusto-DateTime_part1) +Author: HeenaBansal2009 +Date: Wed Aug 10 13:04:07 2022 -0700 + + Added test and review comments + +commit 4a1866b320f6f3938cd683c264c31b74457b70fb (origin/feature/kql-binary-functions) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Wed Aug 10 07:58:07 2022 -0700 + + Update release notes + +commit f5b87531a46ec899a412b22f06984560d8a98f94 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Wed Aug 10 07:46:29 2022 -0700 + + Implement KQL binary functions + +commit 803dfd2063e7a058b397eaa09732963a8f4473f6 +Merge: cd2838cc42 f7c47a79c8 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Aug 9 13:06:39 2022 -0400 + + Merge pull request #35 from ClibMouse/bugfix/fix-unit-tests + + Update unit tests for IP functions + +commit f7c47a79c80a25c911b62fd929e6bfca80c84958 (origin/bugfix/fix-unit-tests) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Aug 9 10:02:09 2022 -0700 + + Update unit tests for IP functions + +commit e4dc7889dfdec654871198e70fccc22c4fb47960 +Author: HeenaBansal2009 +Date: Tue Aug 9 09:40:35 2022 -0700 + + Date_Time functions PART 1 + +commit cd2838cc424e139be3e98ecad03eba7e2b1bdba6 +Author: kashwy +Date: Tue Aug 9 06:11:39 2022 -0700 + + Kusto-phase2: Add kusto data types + +commit 26472e65bcd806201207c09dcf0da55bcf8df63e +Merge: e08e0f6b9b 5d67138a81 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Mon Aug 8 10:49:56 2022 -0400 + + Merge pull request #31 from ClibMouse/feature/improve-ipv4-performance + + Improve performance of IPv4 functions + +commit 5d67138a81bff4f5c52c47ddd4c6e81a2822eb85 (origin/feature/improve-ipv4-performance) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Mon Aug 8 07:38:51 2022 -0700 + + Improve performance of IPv4 functions + +commit e08e0f6b9b18ed20ce19272f0559dde104cb0e24 +Merge: 5a7dac91dd 89c9491ecc +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Mon Aug 8 10:28:08 2022 -0400 + + Merge pull request #25 from ClibMouse/feature/ip-functions-part-2-783 + + KQL IPv4 functions + +commit 89c9491eccc576d81bb800707595c9849f5a6c1d (origin/feature/ip-functions-part-2-783) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Mon Aug 8 07:27:00 2022 -0700 + + Move KQL tests into their own folder + +commit c98a49d358a5516007c7c12864e0c2cf161bb195 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 5 13:58:32 2022 -0700 + + Implement unit tests for IP functions + +commit 5a7dac91dd491ad160e6577d23f96d7413154150 +Merge: 6be179ae7e 3e6578796f +Author: larryluogit +Date: Fri Aug 5 13:05:17 2022 -0400 + + Merge pull request #30 from ClibMouse/larry-kql-functional-tests + + Add make list set and more ip tests + +commit 45a7fba5fcf8d0c3e07f7a4ad7600fc0558a06b0 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Thu Aug 4 10:16:23 2022 -0700 + + Make aliases unique + +commit 01aa7ddbdfa401bfe80fe856308473e65e435b25 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Aug 2 16:28:50 2022 -0700 + + Add unit tests and release notes + +commit 432aaa085f5bf971f3edcc6571ef5cd6a844f88f +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Aug 2 07:35:21 2022 -0700 + + Implement KQL functions handling IPv4 + +commit 3e6578796f90856c41078e766e9f3e12e2944a19 +Author: Larry Luo +Date: Thu Aug 4 22:33:08 2022 -0400 + + Add make list set and more ip tests + +commit 6be179ae7ee3b1614cf6231f3d9f6b055d37f489 +Author: Larry Luo +Date: Thu Aug 4 20:55:33 2022 -0400 + + Fix rebase conflicts. + +commit 561570f622a42f7d79936750768366762a75cca5 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Wed Aug 3 16:49:36 2022 -0700 + + Fix some IP function unit tests + +commit 441c348647260c48929b7a633ca01bae09cfaa6f +Author: HeenaBansal2009 +Date: Wed Aug 3 14:06:02 2022 -0700 + + Fix bug in clickhouse-client for non-interactive mode + +commit 8e3cc459c94aafc48a09b12bfd159b74e784f695 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Thu Jul 28 07:24:45 2022 -0700 + + Extract common functions + +commit 79e645cf885cdb5464642040029121c159d15c82 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Wed Jul 27 12:44:08 2022 -0700 + + Improve conformance to the specifications + +commit f6365f148598e23a3a8f22709f6a95c220877bc8 +Author: Larry Luo +Date: Tue Jul 26 20:24:29 2022 -0400 + + Added func tests for string and ip + +commit e7af515ba28174b09e9aaa770b4bcc4bf047faf5 +Author: Yong Wang +Date: Mon Jul 25 00:01:19 2022 -0700 + + Kusto-phase2: Added check end of function, and neww string functions + +commit b80da6b076d7202e34e53b3eba6f0f1167204c6b +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Jul 26 08:32:28 2022 -0700 + + Provide conformance to the specification + +commit 4564e97e4ce0bdad2b94e3bde5bc17fd22d0eae1 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Jul 19 09:52:54 2022 -0700 + + Support expressions as IP function arguments + +commit afa8390df7821cd21b5e8a9b17f8613596ac3179 +Author: root +Date: Wed Aug 3 08:22:17 2022 -0700 + + update release notes and test script + +commit 0150da0edc286efc6906547b2a6530462f9a4979 +Author: root +Date: Tue Aug 2 19:03:22 2022 -0700 + + Kusto Aggregate functions as of July 29 + +commit 4c627fdddbb56eda0dfbc7443101d9864d2ffaba +Author: Yong Wang +Date: Fri Jul 22 05:52:26 2022 -0700 + + Kusto-phase2: Add print operator + +commit 5681abb810e78dce035b9ba34d7702dddfd2dc27 +Author: Larry Luo +Date: Wed Jul 20 14:18:03 2022 -0400 + + Added sorting test cases + +commit 1e6d472555c4f87ebf1c1050eb9d2125182fdadf +Author: Larry Luo +Date: Mon Jul 18 15:56:57 2022 -0400 + + Add functional tests for tabular table summarize + +commit d807446849922616c03b9a0980eeb91d291ef6c1 +Author: Yong Wang +Date: Wed Jul 20 06:39:32 2022 -0700 + + Kusto-phase: Add function to validate end of kql function + +commit c890a9dbc8be90868ec576b133dd3ab0d88c650e +Author: Yong Wang +Date: Tue Jul 19 21:25:52 2022 -0700 + + Kusto-phase2 Fixed bug of Syntax error when Order By is followed by another statement + +commit 94a739094b765291368296d168d6b2608b67eb48 +Author: HeenaBansal2009 +Date: Thu Jul 28 08:59:08 2022 -0700 + + Updated Release notes with examples + +commit b6484dbdf05a40d1e281f01303221e9f024c755e +Author: HeenaBansal2009 +Date: Tue Jul 26 22:13:34 2022 -0700 + + Add config entry to overwrite default dialect to kusto auto + +commit 6cc15190c9d9ecda8e895b9a996749089262e2a5 +Author: Yong Wang +Date: Sat Jul 16 07:49:24 2022 -0700 + + Kusto-phase2: Fixed the issue of conflict + +commit a4a947b33260f824d481d764a39bbd0054acea01 +Author: Yong Wang +Date: Thu Jun 23 14:26:37 2022 -0700 + + Kusto-phase2: add kusto_auto dialect + +commit 6754d71d34d7613c510488b7ee2f90dc88e0ebcd +Author: Yong Wang +Date: Tue Jun 21 09:33:07 2022 -0700 + + Kusto-phase2: Add KQL functions parser + +commit 45d804a1d917d6fcb7addd45148f64bf19a538ec +Author: Yong Wang +Date: Fri Jun 17 08:47:08 2022 -0700 + + Kusto-phase2 : Added KQL functions interface. + changed the summarize class for new aggregation functions + +commit 3a619fb39fc5ef9de12501d7c9a76aabc0da2441 +Author: Yong Wang +Date: Sat Jun 11 10:33:38 2022 -0700 + + Kusto-phase1: Fixed style + +commit e5f3ab433311c5d4aa53ff0a74c7fdaa8433aa80 +Author: root +Date: Thu Jun 9 11:04:20 2022 -0700 + + Kusto summarize init + +commit caee54a5f77324d686b37207d9a66e1db6dc9592 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Jul 15 12:13:50 2022 -0700 + + Updated release notes to indicate deficiencies + +commit bd2b0296bad51880c8941923d85d05336fec3ccd +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Jul 15 10:32:49 2022 -0700 + + Update release notes + +commit 41396c3770f489c37bd182b72eb120f46669519a +Author: Yong Wang +Date: Fri Jul 15 06:54:23 2022 -0700 + + Kusto-phase2: Added some string functions and release note + +commit 808c2883ede5a9176a3fac7df65d7bf85634db6d +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Jul 15 10:14:01 2022 -0700 + + Implement ipv4_is_private + +commit 7b65761f50a7ba10b4b665e35fdf7eefb02ec235 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Jul 15 10:13:44 2022 -0700 + + Implement review comments + +commit 3667092e968d97798c81c89f824230a7d38d50a8 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Thu Jul 14 09:00:51 2022 -0700 + + Implement some IP-handling functions + +commit f3df9c7734a3277894b44fb56db204be5188aae6 +Author: Yong Wang +Date: Tue Jul 12 08:49:42 2022 -0700 + + Kusto-phase2: Changed dialect to use enumerate, Added subquery for in operator, fixed the multi query issue + +commit 8d20a97ec66bfb6661a2baf170fce112537ae6ce +Author: Yong Wang +Date: Wed Jun 29 23:01:17 2022 -0700 + + Kusto-phase2 : Fix the function base64_decode_tostring() + +commit 69e12692b371ec3b2f71f3cae1c8ce2c8af57b84 +Author: Yong Wang +Date: Wed Jun 29 13:02:14 2022 -0700 + + Kusto-phase 2: Add more string operators + +commit 7fb659ea5b58574ef1be93bee8969d24716db3bd +Author: Yong Wang +Date: Tue Jun 28 22:03:36 2022 -0700 + + Kusto-phase2: Add table function kql() + +commit 9d902a53ceecab662a9e7b52c442d15d76003e52 +Author: root +Date: Wed Jun 29 21:17:17 2022 -0700 + + Aggregate function working with two pipes + +commit 55a1e836b1559128ea12de872b0485c2989363d0 +Author: root +Date: Wed Jun 29 11:23:13 2022 -0700 + + Aggregate functions initial code - Priority:HIGHT(Easy and Medium) + +commit 3e9f23f7ceaa1d41f0af15b87a1b681e564a3771 +Author: Yong Wang +Date: Fri Jun 24 13:05:52 2022 -0700 + + Kusto-phase2: Add alias support + +commit 311a59191e058e707195b6ad65bcd0f62808be5d +Author: Yong Wang +Date: Thu Jun 23 14:26:37 2022 -0700 + + Kusto-phase2: add kusto_auto dialect + +commit 6bb8f1a06e8ebada3f337e6bc8bfaa5f19beccec +Author: Yong Wang +Date: Wed Jun 22 12:00:47 2022 -0700 + + Kusto-phase2: Add common function to get argument for function convertion + +commit 8d5a925fcac00c34f8bb99c1551ee02136f9bde0 +Author: Yong Wang +Date: Tue Jun 21 09:33:07 2022 -0700 + + Kusto-phase2: Add KQL functions parser + +commit 4a86da3992bf3cae8c55a6bc2edc9e7043367031 +Author: Yong Wang +Date: Fri Jun 17 08:47:08 2022 -0700 + + Kusto-phase2 : Added KQL functions interface. + changed the summarize class for new aggregation functions + +commit 257abcdb6942fa86ddb1ff1c19e985c9b370d811 +Author: Yong Wang +Date: Sat Jun 11 10:33:38 2022 -0700 + + Kusto-phase1: Fixed style + +commit a99e3adb50988270f841ccd859716148eae8698d +Author: root +Date: Thu Jun 9 11:04:20 2022 -0700 + + Kusto summarize init + +commit 8fe315e4b6c9c6c1bbb6d84289021b893976b2c8 +Author: Yong Wang +Date: Wed Jun 8 10:14:03 2022 -0700 + + Kusto-phase1: Add Support to Kusto Query Language + + This is the initial implement of Kusto Query Language. + + in this commit, we support the following features as MVP : + + Tabular expression statements + Limit returned results + Select Column (basic project) + sort, order + Perform string equality operations + Filter using a list of elements + Filter using common string operations + Some string operators + Aggregate by columns + Base aggregate functions + only support avg, count ,min, max, sum + Aggregate by time intervals + +commit b8ca6a7a832bfb691b4453f2bca9af62e6203fa2 +Author: Yong Wang +Date: Tue Jun 21 09:33:07 2022 -0700 + + Kusto-phase2: Add KQL functions parser + +commit d72c403f816509685e06eef09a957438725e8d02 +Author: Yong Wang +Date: Fri Jun 17 08:47:08 2022 -0700 + + Kusto-phase2 : Added KQL functions interface. + changed the summarize class for new aggregation functions + +commit ff202f1d11a71a35bb381339a35d5e589478f8c5 +Author: Yong Wang +Date: Tue Jun 14 07:40:06 2022 -0700 + + Kusto-pahse2: Add support for multiple summarize + +commit e17d586b56ffd0a4ab8b443e55be07cf2a5fb87d +Author: Yong Wang +Date: Mon Jun 13 06:26:02 2022 -0700 + + Kusto-phase1: Fixed misleading indentation + +commit c84d164c91792f2504cddf3f2851b06a5433eb15 +Author: Yong Wang +Date: Sun Jun 12 20:05:51 2022 -0700 + + Kusto-pahse1: Fixed moy style issues. + +commit bedfc2ed619235b842db7bd5f030e844d0a0c816 +Author: Yong Wang +Date: Sat Jun 11 10:33:38 2022 -0700 + + Kusto-phase1: Fixed style + +commit 4b47d3299a7b6da7ac15c06cf535407379c2b502 +Author: Yong Wang +Date: Thu Jun 9 22:17:58 2022 -0700 + + Kusto-phase1: Fixed the bug for KQL filer with multiple operations + +commit 77806601ac2316bd450022aff265373ac49e45b9 +Author: Yong Wang +Date: Thu Jun 9 18:49:22 2022 -0700 + + Kusto-phase1 : + Add new test cases + +commit 20f8edb08d67e98825aad1568f0fe6539e032354 +Author: root +Date: Thu Jun 9 12:06:15 2022 -0700 + + corrected unit test + +commit f7b84af6aaf5b6e7a5e295f7a7b6635a5071e75f +Author: root +Date: Thu Jun 9 11:29:51 2022 -0700 + + removed unwanted comments + +commit a1f2f8f8ca73453cbbb08eef53ea62bef662e2af +Author: root +Date: Thu Jun 9 11:18:49 2022 -0700 + + added single unit test case for summarize bin() + +commit d594afa8efdf148b2db0f1eccd3488a6b6eed23b +Author: root +Date: Thu Jun 9 11:04:20 2022 -0700 + + Kusto summarize init + +commit 5763e77e7651d4847d8e87e227f43e35168cdd0a +Author: Yong Wang +Date: Wed Jun 8 10:14:03 2022 -0700 + + Kusto-phase1: Add Support to Kusto Query Language + + This is the initial implement of Kusto Query Language. + + in this commit, we support the following features as MVP : + + Tabular expression statements + Limit returned results + Select Column (basic project) + sort, order + Perform string equality operations + Filter using a list of elements + Filter using common string operations + Some string operators + Aggregate by columns + Base aggregate functions + only support avg, count ,min, max, sum + Aggregate by time intervals + +commit 6c5a2a1214c0166b2b0fe999cf8850055872315f +Merge: 29273d2bc1 0d1c31a869 +Author: Mikhail f. Shiryaev +Date: Thu Aug 4 21:34:00 2022 +0200 + + Merge pull request #39853 from ClickHouse/release-tweak-generate + + Update tweak on version part update + +commit 0d1c31a869d9720c6357f6d97cf6aba07a88f871 +Merge: 8919fd6e58 29273d2bc1 +Author: Mikhail f. Shiryaev +Date: Thu Aug 4 21:33:54 2022 +0200 + + Merge branch 'master' into release-tweak-generate + +commit 29273d2bc1c47debb8b2098a938b7574ad7863ab +Merge: 1eb28f5f5f 0b82fb7816 +Author: alesapin +Date: Thu Aug 4 14:48:10 2022 +0200 + + Merge pull request #39847 from nityanandagohain/patch-1 + + Extra semicolon removed from the TTL example + +commit 1eb28f5f5fc27970f3a74f86d8525eab72f98d66 +Merge: 235649cb98 f608e62c27 +Author: alesapin +Date: Thu Aug 4 14:21:31 2022 +0200 + + Merge pull request #39860 from ClickHouse/avoid_additional_disk_touch + + Better total part size calculation on mutation + +commit 235649cb984371cb141f349f07959611062a5e12 +Merge: 8010479394 c5eab9c760 +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Thu Aug 4 13:02:08 2022 +0200 + + Merge pull request #39458 from Avogar/fix-cancel-insert-into-function + + Fix WriteBuffer finalize when cancel insert into function + +commit 80104793948a82ceaefe710700096d2d31089dd2 +Merge: a952a5dfe5 1d67344ac8 +Author: Alexander Tokmakov +Date: Thu Aug 4 12:38:15 2022 +0300 + + Merge pull request #39893 from ClickHouse/tavplubix-patch-2 + + Update 02354_distributed_with_external_aggregation_memory_usage.sql + +commit 1d67344ac8106688cced06ccf18b0b3868445bfd +Author: Alexander Tokmakov +Date: Thu Aug 4 12:37:25 2022 +0300 + + Update 02354_distributed_with_external_aggregation_memory_usage.sql + +commit a952a5dfe5baa3cf5b8bba9521809bf4e4825d35 +Merge: 9e46abc560 dc25f18f13 +Author: Vitaly Baranov +Date: Thu Aug 4 09:46:25 2022 +0200 + + Merge pull request #39859 from vitlibar/fix-flaky-test_async_backups_to_same_destination + + Fix flaky integration test test_async_backups_to_same_destination. + +commit 9e46abc56003a5f8854f03571577c8a36c8a4d3e +Merge: 71cb055ecc b98e645ff7 +Author: Alexey Milovidov +Date: Thu Aug 4 03:06:55 2022 +0300 + + Merge pull request #39420 from amosbird/better-projection1-fix1 + + Normalize AggregateFunction types and state representations + +commit 71cb055eccc9534b704ddaa6dfcc63c97cd2528b +Merge: fe95703a49 91e3e2f18b +Author: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> +Date: Thu Aug 4 01:36:47 2022 +0200 + + Merge pull request #39812 from guowangy/applyFunction-multi-thread + + KeyCondition: optimize applyFunction in multi-thread scenario + +commit fe95703a49d612547e9c5ac24d01734fc900f45b +Merge: f474eb957d ce3411b0ff +Author: Alexey Milovidov +Date: Thu Aug 4 02:33:58 2022 +0300 + + Merge pull request #39586 from guowangy/bytes-to-bits-mask + + Improve bytes to bits mask transform for SSE/AVX/AVX512 + +commit f474eb957dcab93c89015f9db1ace36e94f191f2 +Merge: 5297592f38 e292d830f5 +Author: Alexey Milovidov +Date: Thu Aug 4 01:56:52 2022 +0300 + + Merge pull request #39758 from ClickHouse/tsan_clang_15 + + Try clang-15 for build with tsan + +commit 5297592f38765293ed592726439c5bec438d6896 +Merge: 9987a9e740 58fc49df66 +Author: Alexey Milovidov +Date: Thu Aug 4 01:51:34 2022 +0300 + + Merge pull request #39868 from ClickHouse/auto/v22.3.10.22-lts + + Update version_date.tsv after v22.3.10.22-lts + +commit 9987a9e7400e125c51a86e7ac9d3f429a81225f6 +Merge: a5d5dc2c00 149581e319 +Author: Alexey Milovidov +Date: Thu Aug 4 01:48:44 2022 +0300 + + Merge pull request #39862 from ClickHouse/follow-up-do-not-optimize-functions-shadowing-args + + Remove prefer_localhost_replica from test + +commit a5d5dc2c00047f5b4f2b2e58f1e456c50a7e3522 +Merge: 1842a3fc7a 517f821e94 +Author: Alexey Milovidov +Date: Thu Aug 4 01:46:08 2022 +0300 + + Merge pull request #39323 from ClickHouse/clickhouse-server-service + + Clean out our clickhouse-server.service from /etc + +commit 8919fd6e58aa18fd0f783b51458bced9a643ce2b +Author: Mikhail f. Shiryaev +Date: Thu Aug 4 00:40:32 2022 +0200 + + Add handful notes to a post-release logging + +commit ff26492830551a46ea6c3903baaf3d221a31ed6a +Author: Mikhail f. Shiryaev +Date: Thu Aug 4 00:24:43 2022 +0200 + + Prevent spoiling rollback_stack + +commit 1842a3fc7a8fdb50491aa97a5b531088ac63fdb9 +Merge: 4354e3db96 8533769132 +Author: Mikhail f. Shiryaev +Date: Thu Aug 4 00:21:44 2022 +0200 + + Merge pull request #39709 from ClickHouse/update-ccache + + Update ccache to the latest available version + +commit 4354e3db9680a442f14ed30e4e235ab472783fba +Merge: b84e65bb3b 0e6a0d589f +Author: Dan Roscigno +Date: Wed Aug 3 16:41:29 2022 -0400 + + Merge pull request #39873 from DanRoscigno/translate-guides-to-zh + + moving PR from clickhouse-docs + +commit 0e6a0d589f7a5fbe05472f8e1f1c50bf074b20db +Author: DanRoscigno +Date: Wed Aug 3 16:27:17 2022 -0400 + + moved image dir + +commit 8b1b059ae96520f6b84ebe6e743199bbdfa07a1f +Author: DanRoscigno +Date: Wed Aug 3 15:56:32 2022 -0400 + + wrong directory + +commit d0c3de9da9e5df877d9034fbe9fd873d9ead4d74 +Author: DanRoscigno +Date: Wed Aug 3 15:55:12 2022 -0400 + + wrong dir + +commit ab455f3767a55d55d22dfbfe0419509277b707f9 +Author: DanRoscigno +Date: Wed Aug 3 14:47:31 2022 -0400 + + moving PR from clickhouse-docs + +commit b84e65bb3b7d4162e9caf0fadd296a895db38b3e +Merge: e6efb47aa3 b386db02e1 +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Wed Aug 3 18:53:37 2022 +0200 + + Merge pull request #39716 from arthurpassos/fix_scalar_cte_with_lc_result + + Unwrap LC column in IExecutablefunction::executeWithoutSparseColumns + +commit e6efb47aa362d1ce0731e4f1f7e4070cd6eaa367 +Merge: 4943202921 3e6b663020 +Author: Maksim Kita +Date: Wed Aug 3 18:50:25 2022 +0200 + + Merge pull request #39850 from kitaisreal/select-query-has-join-method + + Add hasJoin method into ASTSelectQuery + +commit 4943202921001add4b363fd1cd770db898079877 +Author: Nikita Taranov +Date: Wed Aug 3 17:56:59 2022 +0200 + + Improve memory usage during memory efficient merging of aggregation results (#39429) + +commit 58fc49df6665cc939032a59d58798e45e2d06780 +Author: robot-clickhouse +Date: Wed Aug 3 14:53:22 2022 +0000 + + Update version_date.tsv after v22.3.10.22-lts + +commit 1c0d2677673a82336641605d08b743964eb3dadc +Merge: f144eae388 b4c3ff0cef +Author: Alexey Milovidov +Date: Wed Aug 3 16:38:25 2022 +0300 + + Merge pull request #39861 from nathanbegbie/fix-docs-typo-postgres + + typo: PostgerSQL -> PostgreSQL + +commit f144eae388b93c65aedf9237cf3a3f2dd8856c31 +Author: Nikita Mikhaylov +Date: Wed Aug 3 15:23:07 2022 +0200 + + Fix typo and extra dots in exception messages from OverCommitTracker (#39858) + +commit 149581e319ac2b78bdd65cfd78b27519ca16eb8e +Author: Igor Nikonov +Date: Wed Aug 3 13:15:16 2022 +0000 + + Remove prefer_localhost_replica + + Test queries failed before fix #39103 regardless the setting value + The setting is randomized + +commit 9eef299e110a7d86ce0baa7d2b6b453a7fb424f8 +Merge: 6b15ee2bd9 ce70f3dacb +Author: Igor Nikonov <954088+devcrafter@users.noreply.github.com> +Date: Wed Aug 3 15:06:42 2022 +0200 + + Merge pull request #39103 from tonickkozlov/tonickkozlov/37032/do-not-optimize-functions-shadowing-args + + Do not optimize GROUP BY functions that shadow their arguments + +commit b4c3ff0cef0a95063cef4f9c6ea93517bff6c002 +Author: nathanbegbie +Date: Wed Aug 3 16:01:24 2022 +0300 + + typo: PostgerSQL -> PostgreSQL + +commit de91875b5e8e0ad425d7110229d3861509a24beb +Author: nathanbegbie +Date: Wed Aug 3 15:59:36 2022 +0300 + + Revert "typo: PostgerSQL -> PostgreSQL" + + This reverts commit fda8b113dc88100ff80dfd778ed1e0bcd740d4d2. + +commit fda8b113dc88100ff80dfd778ed1e0bcd740d4d2 +Author: nathanbegbie +Date: Wed Aug 3 15:54:58 2022 +0300 + + typo: PostgerSQL -> PostgreSQL + +commit 8533769132027c4bd8fee5386dc1a3837704e470 +Author: Mikhail f. Shiryaev +Date: Sat Jul 30 00:47:12 2022 +0200 + + Use compression and cleanup with the recent version ccache + +commit eeaf08525fd167b9241ced37840fd007d1157579 +Author: Mikhail f. Shiryaev +Date: Fri Jul 29 14:31:53 2022 +0200 + + Use test-util as source for base-test, fasttest and package builder + +commit dc25f18f132006b6b194788bf9769b412c4f8e59 +Author: Vitaly Baranov +Date: Wed Aug 3 14:04:18 2022 +0200 + + Fix flaky integration test test_async_backups_to_same_destination. + +commit f608e62c27bd5c830b87d646758081def912224f +Author: alesapin +Date: Wed Aug 3 14:17:31 2022 +0200 + + Fix call + +commit 56a4d26e87b5100f59b54a4cf2bccbcb15fdcebd +Author: alesapin +Date: Wed Aug 3 14:15:45 2022 +0200 + + Better total part size calculation on mutation + +commit 6b15ee2bd96f6f101d2b17b8a6e5c0ab9c48c34e +Merge: 05467e315f f94d4d4877 +Author: Anton Popov +Date: Wed Aug 3 13:51:16 2022 +0200 + + Merge pull request #39685 from CurtizJ/hash-functions-map + + Allow to execute hash functions with arguments of type `Map` + +commit 62a05dc10df37725b58e9e222c633addc801bacb +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 13:40:31 2022 +0200 + + Add instructions for github-cli installation + +commit 05467e315f56599ed3bc2124d0c60ec13cd1a26c +Merge: 86561509c5 cbff608d0b +Author: Antonio Andelic +Date: Wed Aug 3 13:13:00 2022 +0200 + + Merge pull request #39698 from ClickHouse/update-digest-version + + Update Keeper version for digest + +commit 86561509c510e97709e9a1151667a45d31f28c85 +Merge: cdee1d94d0 3b96ff0fe4 +Author: alesapin +Date: Wed Aug 3 13:03:31 2022 +0200 + + Merge pull request #39673 from ClickHouse/fix-rollback-inconsistency-keeper + + Rollback request in Keeper if storing log fails + +commit cdee1d94d0331f12b0164f614f58689ed96a66b2 +Merge: 14135927fb 7c8ceead63 +Author: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> +Date: Wed Aug 3 12:31:40 2022 +0200 + + Merge pull request #39404 from HeenaBansal2009/Issue_39395 + + Clickhouse-local fixes + +commit 14135927fb790963047753787cbea730f7ac5c66 +Merge: 2ca9df9b22 e78a176b0a +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 12:10:08 2022 +0200 + + Merge pull request #39854 from ClickHouse/auto/v22.7.2.15-stable + + Update version_date.tsv and changelogs after v22.7.2.15-stable + +commit e78a176b0a63bc9333e3481c06f42c5e905e0b53 +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 12:09:29 2022 +0200 + + Regenerate changelog with the recent script + +commit 49b1f62abd91af876190b4ebb4b343500f541105 +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 12:06:45 2022 +0200 + + Update SECURITY.md + +commit c05526beeff79f624394a5e07db3f180436736b1 +Merge: da655fbfcf 2ca9df9b22 +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 12:04:34 2022 +0200 + + Merge remote-tracking branch 'origin/master' into auto/v22.7.2.15-stable + +commit 2ca9df9b22f46f30e6dd7e7fd007d04430c74644 +Merge: 1815b8c00c 469b7e7668 +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 12:04:03 2022 +0200 + + Merge pull request #39421 from ClickHouse/github-helper + + GitHub helper + +commit da655fbfcfeee1dc0c39e2e7ec80eb1057ecb0f2 +Author: robot-clickhouse +Date: Wed Aug 3 09:57:02 2022 +0000 + + Update version_date.tsv and changelogs after v22.7.2.15-stable + +commit 08474cf869078120e5e32e83b3f3eb8cca676de2 +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 11:43:47 2022 +0200 + + Update tweak on version part update + +commit 3e6b663020833e26dfa2a08af28e47ea35453280 +Author: Maksim Kita +Date: Wed Aug 3 11:25:45 2022 +0200 + + ASTSelectQuery added hasJoin method + +commit 1815b8c00c8397f9943d175153c6e946707d1009 +Merge: 00a7c8733b fd8ad12e6b +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 10:34:40 2022 +0200 + + Merge pull request #39730 from ClickHouse/jepsen-label + + Jepsen label + +commit 00a7c8733b5e1600709e038faf431cd15fad7893 +Merge: 80d2685ab7 6a7213291b +Author: Robert Schulze +Date: Wed Aug 3 09:23:24 2022 +0200 + + Merge pull request #39633 from guowangy/filter-vbmi2 + + ColumnVector: optimize filter with AVX512VBMI2 compress store + +commit 0b82fb78164ea08d5ee4e8f89055bc7285a82b52 +Author: Nityananda Gohain +Date: Wed Aug 3 12:52:06 2022 +0530 + + Extra semicolon removed from the TTL example + + This PR removes an extra semicolon from the TTL example. + +commit 80d2685ab714abd8c4b23ed93ad55ce39f64eeff +Merge: e2a5faede9 a4c4b1f54d +Author: Alexey Milovidov +Date: Wed Aug 3 09:10:08 2022 +0300 + + Merge pull request #39814 from qianmoQ/fix-cte + + Support cte statement for antlr4 syntax file #39810 + +commit cbff608d0b48dc81a976d09871d968dbbb3d0095 +Merge: b3b3c371f0 e2a5faede9 +Author: Antonio Andelic +Date: Wed Aug 3 07:53:38 2022 +0200 + + Merge branch 'master' into update-digest-version + +commit e2a5faede91980b07dc8ff193f008f17d5ba634f +Merge: 70d97e9393 504180d7d6 +Author: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> +Date: Tue Aug 2 22:55:40 2022 -0400 + + Merge pull request #39843 from ClickHouse/util-self-extracting-macos-script-fix + + Fix post-build script for building utils/self-extracting-executable/compressor + +commit a4c4b1f54da352cf522109ec3120bc773844fdfe +Merge: 094b28b869 70d97e9393 +Author: qianmoQ +Date: Wed Aug 3 09:50:08 2022 +0800 + + Merge branch 'master' into fix-cte + +commit 504180d7d6576467f25c61518ca4742dfd45335f +Author: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> +Date: Tue Aug 2 15:39:11 2022 -0400 + + stat is different for macos + +commit 469b7e7668cd976b058079c4c0e8a3e1c8769f53 +Author: Mikhail f. Shiryaev +Date: Tue Aug 2 18:44:49 2022 +0200 + + Add notes about _is_cache_updated logic + +commit 70d97e9393885b8949115827438fde29d5f8a733 +Merge: ec8a11dfdd f0474f9e46 +Author: Mikhail f. Shiryaev +Date: Tue Aug 2 18:29:43 2022 +0200 + + Merge pull request #39630 from ClickHouse/workflow-rerun-lambda + + Attempt to fix wrong workflow_run data for rerun + +commit ec8a11dfdd2f3ce8fa223192263ae32ee6430ca7 +Merge: 2a5b023b0f ad55c2f55a +Author: Alexander Tokmakov +Date: Tue Aug 2 18:48:34 2022 +0300 + + Merge pull request #39817 from ClickHouse/revert-39788-revert-39124-fix-02232_dist_insert_send_logs_level_hung + + Revert "Revert "tests: enable back 02232_dist_insert_send_logs_level_hung"" + +commit 2a5b023b0f50aa610f95452cbe9fd2e9d4ace8ca +Merge: 6405439976 b1919d045f +Author: Anton Popov +Date: Tue Aug 2 16:06:13 2022 +0200 + + Merge pull request #39800 from CurtizJ/fix-cannot-read-all-data + + Fix `CANNOT_READ_ALL_DATA` with `pread_threadpool`. + +commit b386db02e14caf5cc4eb283c5d05b822dbb01e0e +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Tue Aug 2 15:51:57 2022 +0200 + + Fix test + +commit 6405439976e8e6e5321230ec3e47dd60e846293c +Merge: 914bd3654f 90b08d6fae +Author: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> +Date: Tue Aug 2 15:49:14 2022 +0200 + + Merge pull request #39558 from evillique/fix-logs + + Fix logs rotation issue + +commit 914bd3654f5b95b6b95e4a8eace1fe55eef3e3ee +Merge: 2312d8ceea e5c47cb26f +Author: Alexander Tokmakov +Date: Tue Aug 2 16:40:55 2022 +0300 + + Merge pull request #39798 from ClickHouse/tavplubix-patch-2 + + Minor fix for Stress Tests + +commit e292d830f57540e20b1f1f021f86d13eb38ca4a2 +Author: Alexander Tokmakov +Date: Tue Aug 2 15:37:02 2022 +0300 + + Update Dockerfile + +commit 5f7848ffd48f40a34e72d22182571fe31bfa666d +Author: Arthur Passos +Date: Tue Aug 2 09:30:54 2022 -0300 + + Replace LC CTE scalar integ tests by stateless tests + +commit 2312d8ceea244b7c53ba9baa94c1ec21086cbe7d +Merge: f150966f10 5d6804dd65 +Author: Anton Popov +Date: Tue Aug 2 14:27:01 2022 +0200 + + Merge pull request #39696 from BiteTheDDDDt/fix_0729 + + fix align of AggregateFunctionDistinct + +commit e9b124b4bcced6ebc5a7a77aa54aca2ddb48db15 +Author: Arthur Passos +Date: Tue Aug 2 09:17:53 2022 -0300 + + Don't use default implementation for LC columns in Scalar functions + +commit fd8ad12e6b35d816b88c61d8df688dcb75be4c13 +Merge: 621da05145 f150966f10 +Author: Mikhail f. Shiryaev +Date: Tue Aug 2 13:56:02 2022 +0200 + + Merge branch 'master' into jepsen-label + +commit f94d4d4877e61fef816dceaecc2c7fdf62c1e814 +Merge: 2a841d0860 f150966f10 +Author: Anton Popov +Date: Tue Aug 2 13:26:54 2022 +0200 + + Merge branch 'master' into hash-functions-map + +commit f150966f1028ffeef44f61bd8c5c2f170183cd36 +Merge: cd9fc3b5ab 6126bd60ed +Author: Mikhail f. Shiryaev +Date: Tue Aug 2 13:13:47 2022 +0200 + + Merge pull request #39723 from ClickHouse/cherry-pick-fix + + Fix cherry-pick for cases, when assignee is not set for PR + +commit ce70f3dacb4b197e7b96617b2d9de742fd477f20 +Author: Anton Kozlov +Date: Thu Jul 14 15:07:47 2022 +0000 + + fixed 02303_query_kind test; added logging in 02352_grouby_shadows_arg test + +commit 82b50e79cf60393e5ba8b2d07f7122706243dffd +Merge: 0d68b1c67f cd9fc3b5ab +Author: Alexander Tokmakov +Date: Tue Aug 2 13:00:55 2022 +0300 + + Merge branch 'master' into tsan_clang_15 + +commit ad55c2f55a1f85368061bdb9483b4b057c172859 +Author: Alexander Tokmakov +Date: Tue Aug 2 12:49:29 2022 +0300 + + Revert "Revert "tests: enable back 02232_dist_insert_send_logs_level_hung"" + +commit cd9fc3b5ab014bb7d28d34c7ec3eb2dd1f1400ac +Merge: 5ae7f339c4 5050e0aca5 +Author: Alexander Tokmakov +Date: Tue Aug 2 12:48:43 2022 +0300 + + Merge pull request #39816 from ClickHouse/revert-38185-analyze_stuck + + Revert "Limit number of analyze for one query" + +commit 5050e0aca52189cbe3bc07c10dfd2a40e0180107 +Author: Alexander Tokmakov +Date: Tue Aug 2 12:48:31 2022 +0300 + + Revert "Limit number of analyze for one query" + +commit 094b28b869766ca43e2ac5b427e7d220b889a572 +Author: qianmoQ +Date: Tue Aug 2 17:17:08 2022 +0800 + + Support cte statement for antlr4 syntax file #39810 + +commit e5c47cb26f3d9cd15ebbdb1383865469aca81dc0 +Author: Alexander Tokmakov +Date: Tue Aug 2 12:10:53 2022 +0300 + + Update run.sh + +commit 5ae7f339c417d1ceba5dc67c1689e7be587d92c5 +Merge: a3bf9496d4 e832153e93 +Author: Robert Schulze +Date: Tue Aug 2 10:41:14 2022 +0200 + + Merge pull request #39813 from ClickHouse/typos + + Typos + +commit c5eab9c760c8f33752978f4f90bd33f65a60604c +Author: avogar +Date: Tue Aug 2 08:38:15 2022 +0000 + + Delete test for s3 + +commit e832153e93561fb7e075e1aa423ce2c933f77cec +Author: Robert Schulze +Date: Tue Aug 2 08:37:58 2022 +0000 + + Typos + +commit a3bf9496d4c1eaa231d40427bb3cc8c265667659 +Merge: 316528817b 77c143aa23 +Author: Nikolai Kochetov +Date: Tue Aug 2 10:35:35 2022 +0200 + + Merge pull request #39799 from ClickHouse/fix-extra-column-after-array-join-optimization + + Fix extra column after ARRAY JOIN optimization. + +commit 91e3e2f18bdf70d8d3cc66d16074605d9743a40c +Author: Wangyang Guo +Date: Tue Aug 2 15:38:27 2022 +0800 + + KeyCondition: optimize applyFunction in multi-thread scenario + + Construct and deconstruct args (ColumnsWithTypeAndName) will inc/dec + ref_count (actually this is a atomic lock inc/dec operation) to share_ptr, + which may share the same DataTypePtr among different threads. This will + have a lock contention issue in large parallel situation. + + The patch try to minimize `args` scope and reduce unnecessary + construct/destory of instances. It will improve the performance in + multi-thread cases. + +commit b3b3c371f068d390a7fed7623197e7122929f204 +Author: Antonio Andelic +Date: Tue Aug 2 09:20:02 2022 +0200 + + Update KeeperStorage.h + +commit b98e645ff7316af83457aa5df3dee8ca660c3cef +Author: Amos Bird +Date: Tue Aug 2 11:33:45 2022 +0800 + + Revert "test what will be wrong if state returns norm type" + + This reverts commit 55802099bcf42ccca359a1ddc462b20ab72123df. + +commit 7c8ceead63b38eed7ae0f2fe538eda63d13ad826 +Merge: d8db482b2e 316528817b +Author: Heena Bansal +Date: Mon Aug 1 22:49:58 2022 -0400 + + Merge branch 'master' into Issue_39395 + +commit 81a15304ca9f6f2d8d0f425aef9b9a04cb79a840 +Merge: 3cc20f05ba 316528817b +Author: Alexey Milovidov +Date: Tue Aug 2 05:45:04 2022 +0300 + + Merge branch 'master' into tavplubix-patch-2 + +commit 316528817b2458fd37960b965f9eef10b4d13535 +Merge: b33fe26d8c 3e627e2861 +Author: Alexey Milovidov +Date: Tue Aug 2 05:44:35 2022 +0300 + + Merge pull request #39179 from azat/fsync-profile-events + + Add profile events for fsync + +commit c8aaa32f9ca75f89fe5848e1266fbca120a4ddc4 +Author: Amos Bird +Date: Tue Aug 2 10:43:48 2022 +0800 + + Revert "Another test" + + This reverts commit 69347028c54edcedc9a43e6795c52c15ad6972ec. + +commit 6a7213291b2c10105ea15081ec977e1e75789187 +Merge: 6a67147584 b33fe26d8c +Author: Wangyang Guo +Date: Tue Aug 2 10:40:40 2022 +0800 + + Merge master and resolve conflict + +commit b1919d045f45df9f63f527e3323cc66e303a2ff9 +Author: Alexey Milovidov +Date: Tue Aug 2 05:34:14 2022 +0300 + + Update ThreadPoolReader.cpp + +commit ce3411b0ff5ec37e7103b29b310e1225029b2f84 +Merge: 3fa1a775d9 b33fe26d8c +Author: Wangyang Guo +Date: Tue Aug 2 10:27:01 2022 +0800 + + Merge master and solve conflict + +commit 90b08d6faee4b5d886210784d0f23a0f7257dc7b +Merge: fbedb70f8b b33fe26d8c +Author: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> +Date: Tue Aug 2 02:55:29 2022 +0200 + + Merge branch 'master' into fix-logs + +commit 64cbecf0c8e32b86d2049f0b41069efdf36458a3 +Author: Anton Popov +Date: Tue Aug 2 00:13:20 2022 +0000 + + fix build on non linux systems + +commit b33fe26d8cd295f2e9fadb33fa447a7688c06788 +Merge: 82e78a03e5 0e154ed1df +Author: Robert Schulze +Date: Mon Aug 1 22:39:57 2022 +0200 + + Merge pull request #39759 from ClickHouse/splitted-to-shared-renaming + + Rename "splitted build" to "shared libraries build" in CI tools + +commit 82e78a03e51c7fb8c2bd640eae020414fcddff0d +Author: Yuko Takagi <70714860+yukotakagi@users.noreply.github.com> +Date: Mon Aug 1 14:09:28 2022 -0600 + + Add URL for release webinar (#39796) + + Add URL for release webinar. + +commit 6792c3211d5fcd430d0f5715eedb93078c5e988d +Merge: 755a4c3ecf 55af8878a5 +Author: Alexander Tokmakov +Date: Mon Aug 1 22:18:43 2022 +0300 + + Merge pull request #39804 from ClickHouse/revert-39510-update-arrow + + Revert "Update arrow to fix possible data race" + +commit 55af8878a52079e969907532cc374380b33d8032 +Author: Alexander Tokmakov +Date: Mon Aug 1 22:18:34 2022 +0300 + + Revert "Update arrow to fix possible data race" + +commit 5d6804dd6544283a4f8354057b37b16787ca0e2f +Merge: b4842860e3 755a4c3ecf +Author: Anton Popov +Date: Mon Aug 1 21:14:46 2022 +0200 + + Merge branch 'master' into fix_0729 + +commit 69347028c54edcedc9a43e6795c52c15ad6972ec +Author: Amos Bird +Date: Tue Aug 2 03:08:25 2022 +0800 + + Another test + +commit 43e8ca5ba81cd989a5dc5601083e92d60ff8c92b +Author: Anton Popov +Date: Mon Aug 1 18:40:21 2022 +0000 + + fix CANNOT_READ_ALL_DATA with pread_threadpool + +commit 77c143aa235ce4959d01a0c94ff24eb8c7ff56ee +Author: Nikolai Kochetov +Date: Mon Aug 1 17:56:27 2022 +0000 + + Fix extra column after ARRAY JOIN optimization. + +commit 3cc20f05babe5cec22490460b74e1d45a62345f4 +Author: Alexander Tokmakov +Date: Mon Aug 1 20:47:14 2022 +0300 + + Update run.sh + +commit 755a4c3ecfcabc23c4735a66dba33b62edde4d5f +Merge: 3a57634dbb a63fb07f54 +Author: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> +Date: Mon Aug 1 19:10:15 2022 +0200 + + Merge pull request #39794 from melvynator/patch-5 + + Update settings.md + +commit 3a57634dbbff898bb160ec64c955122f70d28485 +Merge: c083abd40b d3cfa0a0c0 +Author: Alexander Tokmakov +Date: Mon Aug 1 20:09:31 2022 +0300 + + Merge pull request #39772 from ClickHouse/fix_distinct_in_order_test + + Fix non-deterministic queries in distinct_in_order test + +commit c083abd40b52884793a9f55d7004bad9223f20fd +Merge: 3de747a6e2 65efc0ec98 +Author: Alexander Tokmakov +Date: Mon Aug 1 19:59:21 2022 +0300 + + Merge pull request #39775 from ClickHouse/fix_subnets_integration_tests + + Fix some flaky integration tests + +commit d8db482b2e11c255aff09921b7c8e72eea938b1f +Merge: 50c98789b8 3de747a6e2 +Author: Heena Bansal +Date: Mon Aug 1 12:22:16 2022 -0400 + + Merge branch 'master' into Issue_39395 + +commit 3de747a6e25a35d79e93c1ad4852bd9a3db2c0e7 +Merge: 31891322a5 567b57a627 +Author: Alexey Milovidov +Date: Mon Aug 1 19:21:30 2022 +0300 + + Merge pull request #39746 from ClickHouse/cleanup-projection-setting + + Cleanup usages of `allow_experimental_projection_optimization` setting, part 1 + +commit 0d68b1c67f4707fa97dab2fa9d36ebb7b5e044b9 +Author: Alexander Tokmakov +Date: Mon Aug 1 18:00:54 2022 +0200 + + fix build with clang-15 + +commit 31891322a51febe79ec3edba6278b5cecdd9e8df +Merge: bf574b9154 b9d7cd6a5d +Author: Maksim Kita +Date: Mon Aug 1 17:59:52 2022 +0200 + + Merge pull request #39681 from pkit/pkit/executable_settings + + add settings for executable table func + +commit d3cfa0a0c0f481c130c4b6842be27eae79602cd8 +Merge: 914cf8eb4d bf574b9154 +Author: Igor Nikonov <954088+devcrafter@users.noreply.github.com> +Date: Mon Aug 1 17:19:55 2022 +0200 + + Merge branch 'master' into fix_distinct_in_order_test + +commit bf574b91547aec799364d032564606feb5a8bf03 +Merge: 2fd7530880 d39259a4c0 +Author: Robert Schulze +Date: Mon Aug 1 17:04:51 2022 +0200 + + Merge pull request #39760 from ClickHouse/bit-fiddling + + Use std::popcount, ::countl_zero, ::countr_zero functions + +commit 2fd75308807f0b43db5efc9612163cca697cac52 +Merge: 2150d0b9b0 af2f1b4cc3 +Author: Mikhail f. Shiryaev +Date: Mon Aug 1 16:34:23 2022 +0200 + + Merge pull request #39780 from ClickHouse/ch-play-retry + + Retry inserts with ClickHouseHelper + +commit 2a841d0860fc599694ebe639550798dd0547dfe1 +Author: Anton Popov +Date: Mon Aug 1 14:21:07 2022 +0000 + + update docs for hash functions + +commit a63fb07f54d1ce801acd5dd022459a1960360bd9 +Author: Peignon Melvyn +Date: Mon Aug 1 16:20:33 2022 +0200 + + Update settings.md + +commit 50c98789b8ffc2d2b886fb31f28f5a3ba9bfb85b +Merge: 800ed546be fa9c3dcc48 +Author: HeenaBansal2009 +Date: Mon Aug 1 07:05:50 2022 -0700 + + Updated as per comments + +commit 800ed546bef57d0ae6d7c7c2809c645aec32f487 +Author: HeenaBansal2009 +Date: Mon Aug 1 07:03:36 2022 -0700 + + Updated as per comments + +commit 2150d0b9b08497f035c10390b3267fec00ab4e6b +Merge: 8a3ec52b5e 63f9cf02cc +Author: Alexander Tokmakov +Date: Mon Aug 1 16:32:34 2022 +0300 + + Merge pull request #39788 from ClickHouse/revert-39124-fix-02232_dist_insert_send_logs_level_hung + + Revert "tests: enable back 02232_dist_insert_send_logs_level_hung" + +commit 63f9cf02cc103089c4ba2cb81bf59dcb400f5099 +Author: Alexander Tokmakov +Date: Mon Aug 1 16:32:24 2022 +0300 + + Revert "tests: enable back 02232_dist_insert_send_logs_level_hung" + +commit af2f1b4cc3610506b5cf996a4ef8a197c245d4a4 +Author: robot-clickhouse +Date: Mon Aug 1 13:22:53 2022 +0000 + + Automatic style fix + +commit 33b26dda05db2c3f2c0454e3a57c5d61b73460ad +Author: Mikhail f. Shiryaev +Date: Mon Aug 1 15:15:48 2022 +0200 + + Improve logging + + Co-authored-by: Antonio Andelic + +commit 55802099bcf42ccca359a1ddc462b20ab72123df +Author: Amos Bird +Date: Mon Aug 1 11:37:36 2022 +0800 + + test what will be wrong if state returns norm type + +commit 1ac716b7427b684711571374729bba88782087ae +Author: Amos Bird +Date: Sun Jul 31 02:45:33 2022 +0800 + + Remove no-s3-storage tag from tests + +commit 8ab475ccf379347cafde4183f53fc3d22c78ae41 +Author: Amos Bird +Date: Tue Jul 26 19:14:38 2022 +0800 + + Fix another case + +commit 09c99d8440fdd81c023f10e65cadcf8687ffd6ed +Author: Amos Bird +Date: Fri Jul 22 14:27:45 2022 +0800 + + Fix tests + +commit f84e5b68270dd6c4140319c09f92bbb599e5e74a +Author: Amos Bird +Date: Fri Jul 22 14:26:46 2022 +0800 + + Allow to format DataTypePtr + +commit 0e746c1afa01b520528d52e97078c41938779fc3 +Author: Amos Bird +Date: Fri Jul 22 12:52:54 2022 +0800 + + More format refactor + +commit 2b2ee8a2c3c38c7dea2560a095b8cbf06f3dc489 +Author: Amos Bird +Date: Fri Jul 22 10:57:54 2022 +0800 + + Format tests + +commit f23b3d64dcdb8dd044ad4c02a8fad4b2e77f889c +Author: Amos Bird +Date: Thu Jul 21 23:09:56 2022 +0800 + + Add tests + +commit f11d0484f3ad0ddbf829e695056fdd154d12a2cc +Author: Amos Bird +Date: Thu Jul 21 23:08:42 2022 +0800 + + Normalize everything else + +commit 2a73ccb3f681b73cbd5f5abcb4eaa9ad1a885ac4 +Author: Amos Bird +Date: Wed Jul 20 22:13:06 2022 +0800 + + Normalize AggregateFunctionCount type comparison + +commit 079db7f34b6207704148c9a948ea4d6d6a832445 +Author: Mikhail f. Shiryaev +Date: Mon Aug 1 14:59:13 2022 +0200 + + Retry inserts with ClickHouseHelper + +commit 8a3ec52b5e06f20ccd3472aed7fa440b625ebc0e +Merge: eeb9366010 095e400075 +Author: Anton Popov +Date: Mon Aug 1 14:41:46 2022 +0200 + + Merge pull request #39752 from CurtizJ/fix-index-analysis + + Fix index analysis with tuples and `IN` + +commit eeb9366010f3d336689dba3ccefc4bc6c0477b69 +Merge: c882bdc88e 942f056ce5 +Author: Anton Popov +Date: Mon Aug 1 14:22:49 2022 +0200 + + Merge pull request #39731 from CurtizJ/fix-send-logs-level-test + + Fix redirecting of logs to stdout in clickhouse-client + +commit c882bdc88e75b249ecc901ac68145f1a6cf93ed9 +Merge: 49a708ff29 6a2f7d0c8f +Author: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> +Date: Mon Aug 1 13:57:17 2022 +0200 + + Merge pull request #35968 from ClickHouse/interserver_listen_port + +commit 49a708ff2995917754dcab9076ddeaae68c088a1 +Merge: 80f9ba9186 c9e1364cd0 +Author: Robert Schulze +Date: Mon Aug 1 13:44:33 2022 +0200 + + Merge pull request #39596 from ClickHouse/move-woboq + + Merge Woboq code browser page into "Getting Started" document + +commit 621da0514510c40a169bfb0d8e8e8663ccf24e69 +Merge: d86f07d7ac 80f9ba9186 +Author: Antonio Andelic +Date: Mon Aug 1 13:35:27 2022 +0200 + + Merge branch 'master' into jepsen-label + +commit 65efc0ec98f9771ed98c9212a73eb0057cbb8be0 +Author: robot-clickhouse +Date: Mon Aug 1 11:24:03 2022 +0000 + + Automatic style fix + +commit 80f9ba9186dea2f9db26559ee527dae8c9dac6de +Merge: dfdfabec94 87d513f799 +Author: Alexander Tokmakov +Date: Mon Aug 1 14:20:37 2022 +0300 + + Merge pull request #39690 from ClickHouse/show-addresses-in-stack-traces + + Configuration parameter to hide addresses in stack traces + +commit 38e5e885c31687e9fe76fa5d3304626f574e5d18 +Author: Alexander Tokmakov +Date: Mon Aug 1 13:16:12 2022 +0200 + + fix some flaky integration tests + +commit dfdfabec947065dd4939d8ca92e984212eba0f31 +Merge: 91c0b94768 f79924f270 +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Mon Aug 1 13:04:19 2022 +0200 + + Merge pull request #39218 from evillique/file_default_value + + Add default argument to the function `file` + +commit 74f87a95c707f4c28c046511b6da08f781b325ab +Author: avogar +Date: Mon Aug 1 10:57:55 2022 +0000 + + Fis style + +commit 91c0b9476889c074ca1388de867febde5ce51dd5 +Merge: 42136b7630 075ff5005e +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Mon Aug 1 12:54:02 2022 +0200 + + Merge pull request #39510 from Avogar/update-arrow + + Update arrow to fix possible data race + +commit 42136b7630fe144a75a0e6caa233bc01f71e83ec +Merge: 6457c069a9 be656f9dfa +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Mon Aug 1 12:46:07 2022 +0200 + + Merge pull request #39647 from Avogar/fix-arrow-strings + + Fix strings in dictionary in Arrow format + +commit 6457c069a97453124245afd72682e1f65822af9a +Merge: 9ec27c0ab4 d66c108a04 +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Mon Aug 1 12:42:18 2022 +0200 + + Merge pull request #39293 from kssenii/fix-positional-args-case + + Fix positional arguments in case of unneeded columns pruning + +commit 914cf8eb4d78b9b5249da06d8f21686ee63c4468 +Author: Igor Nikonov +Date: Mon Aug 1 10:40:18 2022 +0000 + + Fix non-deterministic queries + +commit 9ec27c0ab45c78699f07c7175845358d604d713e +Merge: 379d8c5c6a d87aac2013 +Author: Antonio Andelic +Date: Mon Aug 1 12:17:10 2022 +0200 + + Merge pull request #39757 from ClickHouse/fix-rocksdb-filter-with-params + + Use params correctly for filtering by key in EmbeddedRocksDB + +commit 379d8c5c6a2732c4c9e6e87ae347e0e4690975a8 +Author: Nikita Mikhaylov +Date: Mon Aug 1 12:08:32 2022 +0200 + + Chown all directories for multidisk setup (#39121) + +commit 3bb060336dfad62e591889743f9959f984475916 +Merge: 76f2ba3e98 aff8c12a4a +Author: Alexander Tokmakov +Date: Mon Aug 1 12:42:51 2022 +0300 + + Merge pull request #39124 from azat/fix-02232_dist_insert_send_logs_level_hung + + tests: enable back 02232_dist_insert_send_logs_level_hung + +commit c9e1364cd00128fef9b8b7c691263ca94e443a8b +Author: Robert Schulze +Date: Mon Aug 1 08:34:28 2022 +0000 + + Temporarily restore Woboq pages + +commit 76f2ba3e98112688fbfb9974d3d8f2c82be3b1a9 +Merge: ab5a147065 55ff4956ed +Author: Antonio Andelic +Date: Mon Aug 1 09:09:32 2022 +0200 + + Merge pull request #39738 from ClickHouse/fix-jepsen-total-queue + + Use different root path for total-queue Jepsen test + +commit 0e154ed1df6f50aa681b61790c6849b83fa45130 +Author: Robert Schulze +Date: Sun Jul 31 12:43:50 2022 +0000 + + More renamings + +commit 6a67147584b1f57cc24e05f6bfc1eb69ee831c64 +Author: Wangyang Guo +Date: Mon Aug 1 13:17:11 2022 +0800 + + ColumnVector: refactory to use TargetSpecific::Default::doFilterAligned + +commit b05be56eefa2f7de09aaa9cbd2ccf3dd394489bf +Author: Wangyang Guo +Date: Mon Aug 1 10:15:49 2022 +0800 + + ColumnVector: naming style fix + +commit ab5a1470659557f9a3f19681279298ab9aa31cb2 +Merge: c5f7a3327b 22d8e532ed +Author: Dmitry Novik +Date: Mon Aug 1 00:32:59 2022 +0200 + + Merge pull request #38725 from azat/fix-order-by-projection + + Fix ORDER BY that matches projections ORDER BY + +commit d39259a4c0fa022db55ae23cc176a4a6e0576cf0 +Author: Robert Schulze +Date: Sun Jul 31 18:25:50 2022 +0000 + + More conversions + +commit 3e627e2861e08ac511435dc79e78681f97486bc3 +Author: Azat Khuzhin +Date: Wed Jul 13 16:29:22 2022 +0300 + + Add profile events for fsync + + The following new provile events had been added: + + - FileSync - Number of times the F_FULLFSYNC/fsync/fdatasync function was called for files. + - DirectorySync - Number of times the F_FULLFSYNC/fsync/fdatasync function was called for directories. + - FileSyncElapsedMicroseconds - Total time spent waiting for F_FULLFSYNC/fsync/fdatasync syscall for files. + - DirectorySyncElapsedMicroseconds - Total time spent waiting for F_FULLFSYNC/fsync/fdatasync syscall for directories. + + v2: rewrite test to sh with retries + Signed-off-by: Azat Khuzhin + +commit c5f7a3327b4f4d94b71b3c0a0e42585cf7fc6886 +Merge: ccef227494 7e4fb960cb +Author: Alexey Milovidov +Date: Sun Jul 31 22:57:26 2022 +0300 + + Merge pull request #39085 from quickhouse/patch-3 + + Fixed regexp in `test_match_process_uid_against_data_owner` + +commit ccef2274949bac5e6c0e3aa264f190505f9dfc0c +Merge: 52d08d9db4 1e974b55ea +Author: Alexey Milovidov +Date: Sun Jul 31 22:53:09 2022 +0300 + + Merge pull request #38185 from vdimir/analyze_stuck + + Limit number of analyze for one query + +commit 6a2f7d0c8f41d2c9048d24f7b779b311734771ff +Merge: f80a4c184e 52d08d9db4 +Author: Alexey Milovidov +Date: Sun Jul 31 22:51:38 2022 +0300 + + Merge branch 'master' into interserver_listen_port + +commit 567b57a627dbbb6e3c44a8875cdd55128ed2a7cf +Author: Alexey Milovidov +Date: Sun Jul 31 22:44:28 2022 +0300 + + Update a test. + +commit a7734672b90bfcf4c138c7c0f9085d402f2fac31 +Author: Robert Schulze +Date: Sun Jul 31 14:34:05 2022 +0000 + + Use std::popcount, ::countl_zero, ::countr_zero functions + + - Introduced with the C++20 header + + - The problem with __builtin_c(l|t)z() is that 0 as input has an + undefined result (*) and the code did not always check. The std:: + versions do not have this issue. + + - In some cases, we continue to use buildin_c(l|t)z(), (e.g. in + src/Common/BitHelpers.h) because the std:: versions only accept + unsigned inputs (and they also check that) and the casting would be + ugly. + + (*) https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html + +commit 63836749c674c5c1722d1fc6dd207ec13be2865e +Author: Robert Schulze +Date: Sun Jul 31 15:04:14 2022 +0000 + + Try to fix "Docs Check" error + +commit dd030c6b48d253384beac9b5d1efe0039d2f640f +Author: Robert Schulze +Date: Sun Jul 31 13:01:35 2022 +0000 + + Add anchor + +commit fb622e4c4b2a38aa5aeb13fa0993974a599cde15 +Author: Robert Schulze +Date: Sun Jul 31 12:52:14 2022 +0000 + + Remove IDE recommendation from Russian / Chinese translations of dev guide + + - IDEs are already recommended at length earlier in the documents + +commit 31550436e4b9a9e05017c09716214b10bec2043d +Author: Alexander Tokmakov +Date: Sun Jul 31 14:38:13 2022 +0200 + + try clang-15 for build with tsan + +commit 729d19fa4fcb74a17039292851aa1a4b1b7e5d6c +Author: Robert Schulze +Date: Fri Jul 29 12:30:40 2022 +0000 + + Rename "splitted build" to "shared libraries build" in CI tools + + - The old name made sense for (dev option) "-DUSE_STATIC_LIBRARIES=0 + -DSPLIT_SHARED_LIBRARIES=1 -DSPLIT_BINARY=1" but that was removed with + #39520. + + - What still exists is "-DUSE_STATIC_LIBRARIES=0 + -DSPLIT_SHARED_LIBRARIES=1" which does a shared library build + +commit 52d08d9db4c46e9f0a23a1913d3adac222630689 +Merge: 0f2177127b dcc8751685 +Author: Robert Schulze +Date: Sun Jul 31 14:23:31 2022 +0200 + + Merge pull request #39520 from ClickHouse/no-split-binary + + Remove SPLIT_BINARY + +commit 0f2177127b7bb1517e0acab815ded905f5ba1390 +Merge: eaeb0446c7 4d7627e45e +Author: Robert Schulze +Date: Sun Jul 31 14:09:46 2022 +0200 + + Merge pull request #39751 from ClickHouse/enable-getoskernelversion + + Enable SQL function getOSKernelVersion() on all platforms + +commit d87aac2013f5d0211e63ec1c49bb6b06e88246a3 +Author: Antonio Andelic +Date: Sun Jul 31 11:31:46 2022 +0000 + + Disable fasttest for rocksdb + +commit aff8c12a4a6bfb851a546ff3e2632053857825f2 +Author: Azat Khuzhin +Date: Sun Jul 10 19:26:38 2022 +0300 + + tests: enable back 02232_dist_insert_send_logs_level_hung + + The original issue was that log_comment was wrong, and this test relies + on correct log_comment, but this had been fixed already in #37932. + + Also adjust the timeout to avoid possible query hung check failures. + + Signed-off-by: Azat Khuzhin + +commit eaeb0446c7c98cb43f21df42167fe7a255924ebf +Merge: d259c4fa6c 6099f66fd6 +Author: Nikolai Kochetov +Date: Sun Jul 31 12:59:20 2022 +0200 + + Merge pull request #39705 from ClickHouse/avoid-recursive-dtor-for-ast + + Avoid recursive destruction of AST. + +commit eea1aaef2c7e84b023434dfa279dae1aa6e4965a +Author: Antonio Andelic +Date: Sun Jul 31 10:44:01 2022 +0000 + + Use params for filtering by key in EmbeddedRocksDB + +commit d259c4fa6c4aedc93f5021e4cf8091a458da98c6 +Merge: c9e6850306 4828be7fc4 +Author: Alexander Gololobov <440544+davenger@users.noreply.github.com> +Date: Sun Jul 31 11:28:25 2022 +0200 + + Merge pull request #39747 from ClickHouse/fix-double-escaping-json + + Fix double escaping in the metadata of FORMAT JSON + +commit dcc8751685874efdc559d65f467c3e998f7420f0 +Author: Robert Schulze +Date: Sun Jul 31 08:51:17 2022 +0000 + + Disable harmful env var check to workaround failure to start the server + +commit 55ff4956edbd4bb19933d0ae8812bcdcb8da92bb +Merge: 628503c92a c9e6850306 +Author: Antonio Andelic +Date: Sun Jul 31 08:28:10 2022 +0000 + + Merge branch 'master' into fix-jepsen-total-queue + +commit 7c23e48b5b2fc3a1907cf0bd4d8fd170f9db941b +Author: Robert Schulze +Date: Sun Jul 31 08:05:12 2022 +0000 + + Revert exclusion of libharmful (did not work anyways) + +commit 7fe106a0fbbe9976410959183169578709c54ea9 +Author: Robert Schulze +Date: Sat Jul 30 21:46:50 2022 +0000 + + Try to fix libharmful fail + +commit d834d9e1378aff4fc15a7cbc7191beec53a7afbf +Author: Robert Schulze +Date: Sat Jul 30 18:58:12 2022 +0000 + + Minor: Better variable name output + +commit 9de69a021c5e84780aae2abeb5fc014bc757bac3 +Author: Robert Schulze +Date: Fri Jul 29 12:19:04 2022 +0000 + + Fix documentation + +commit 4d7627e45e7a762c6205064881ee0393a8e5bfe8 +Author: Robert Schulze +Date: Sun Jul 31 07:36:40 2022 +0000 + + Fix include + +commit 095e400075196fd9ff3bf5356a37a82e89b15c02 +Author: Anton Popov +Date: Sun Jul 31 00:38:52 2022 +0000 + + fix index analysis with tuples and IN + +commit 075ff5005ee97b253056a3e5d3f157bc69c853b0 +Merge: c9ad914164 c9e6850306 +Author: Alexey Milovidov +Date: Sun Jul 31 03:05:53 2022 +0300 + + Merge branch 'master' into update-arrow + +commit fa9c3dcc4899227156d877cf9b676629103a537f +Author: Alexey Milovidov +Date: Sun Jul 31 03:02:27 2022 +0300 + + Update programs/local/LocalServer.cpp + + Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> + +commit a30dbed6b8dba4c08f9a27b786d2cad59f66becf +Author: Alexey Milovidov +Date: Sun Jul 31 03:02:20 2022 +0300 + + Update programs/local/LocalServer.cpp + + Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> + +commit c9e685030625f749c7564fc861d8aa1aab8f5e60 +Merge: 22bc89690d cf1a5baa23 +Author: Alexey Milovidov +Date: Sun Jul 31 02:51:38 2022 +0300 + + Merge pull request #39325 from azat/perf-parallel_mv-fix + + tests/performance: improve parallel_mv test + +commit 22bc89690d3b3681362c5c3615f285293a2bcc2e +Merge: 8fb70abe3e a068c397df +Author: Alexey Milovidov +Date: Sun Jul 31 02:24:05 2022 +0300 + + Merge pull request #39222 from azat/fix-http-session + + Do not report "Failed communicating with" on and on for parts exchange + +commit 8fb70abe3e54cbbfa935825fa88cf4e8caf99537 +Merge: 85773e0926 4f25a08b7c +Author: Alexey Milovidov +Date: Sun Jul 31 02:22:22 2022 +0300 + + Merge pull request #39178 from azat/dist-insert-log + + Add connection info for Distributed sends log message + +commit 7e4fb960cb53077ef83ce090ae07dc9813317b46 +Merge: 17176212a7 85773e0926 +Author: Alexey Milovidov +Date: Sun Jul 31 02:20:27 2022 +0300 + + Merge branch 'master' into patch-3 + +commit 17176212a77524b4086f9810d0bd529a7be51ad9 +Author: Alexey Milovidov +Date: Sun Jul 31 02:20:08 2022 +0300 + + Update test.py + +commit 8ca236de08375015aa09ec17e5de96a1f7f2de9f +Author: Robert Schulze +Date: Sat Jul 30 22:36:47 2022 +0000 + + Enable SQL function getOSKernelVersion() on all platforms + + Follow up to PR #38615 + +commit f80a4c184e67988a39b0aff34aa4503a957e5db4 +Merge: c026dbf51c 85773e0926 +Author: Alexey Milovidov +Date: Sun Jul 31 01:22:32 2022 +0300 + + Merge branch 'master' into interserver_listen_port + +commit 85773e0926b5e17152db7a35824ccf3a34b59061 +Merge: 15a3ed2e3b 4088c0a7f3 +Author: Robert Schulze +Date: Sun Jul 31 00:18:37 2022 +0200 + + Merge pull request #38615 from liyinsg/simplified_function_registration_interface + + Simplified function registration interface + +commit 15a3ed2e3bd130d76c1e75acf448879e7b6d1a19 +Merge: ed5090a398 146756e2ea +Author: Alexey Milovidov +Date: Sun Jul 31 01:14:54 2022 +0300 + + Merge pull request #34662 from den-crane/test/insert_deduplication_token_materialized_views + + Test/insert deduplication token materialized views + +commit ed5090a398eaf3a3e39877bc2dbaedf36ecdb47f +Merge: 2bdc926572 9551a36bda +Author: Alexey Milovidov +Date: Sun Jul 31 01:08:42 2022 +0300 + + Merge pull request #39622 from ClickHouse/blinkov-patch-23 + + Update README.md + +commit 9551a36bda62f42fe57a429494e83c4b9c219118 +Merge: 3bc9e1bd16 2bdc926572 +Author: Alexey Milovidov +Date: Sun Jul 31 01:08:25 2022 +0300 + + Merge branch 'master' into blinkov-patch-23 + +commit 4828be7fc42e9ba01935edf878ff4abedf5eb0b0 +Author: Alexey Milovidov +Date: Sat Jul 30 23:56:41 2022 +0200 + + Fix double escaping in the metadata of FORMAT JSON + +commit acb148122d9aa0c411c03fb074ba1e0cebce56b0 +Author: robot-clickhouse +Date: Sat Jul 30 21:49:36 2022 +0000 + + Automatic style fix + +commit 441f2feb49c5d089b49ab6323b7f3c0dc85d61dd +Author: Alexey Milovidov +Date: Sat Jul 30 23:40:21 2022 +0200 + + Cleanup usages of `allow_experimental_projection_optimization` setting, part 1 + +commit 2bdc9265728c2b176498a691dc1d966172c42dfc +Merge: b52843d5fd 5eea7ce18d +Author: Alexey Milovidov +Date: Sat Jul 30 23:09:15 2022 +0300 + + Merge pull request #39687 from vitlibar/fix-reading-from-encrypted-disk + + Fix seeking while reading from encrypted disk + +commit b52843d5fd79f0d11ab379098b94fcb5dd805032 +Merge: acb0137dbb b390bcfe7c +Author: Robert Schulze +Date: Sat Jul 30 20:49:05 2022 +0200 + + Merge pull request #37951 from zvonand/dt64_timeslots + + Fix timeSlots for DateTime64 + +commit b9d7cd6a5d7c5b22be2cf2e5e6055313dfd25f14 +Author: Constantine Peresypkin +Date: Thu Jul 28 19:54:46 2022 +0200 + + add settings for executable table func + + SELECT * FROM executable('