diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index c90df6e57b71..cbd3bd7bec42 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -12,11 +12,10 @@ jobs: PythonUnitTests: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -24,34 +23,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -59,18 +56,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -79,7 +75,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -94,13 +90,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -132,28 +127,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -177,28 +169,25 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -222,26 +211,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -265,26 +252,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -308,26 +293,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -351,28 +334,25 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -396,28 +376,25 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -436,12 +413,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -477,14 +452,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -516,14 +490,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -556,14 +529,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -594,14 +566,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -635,14 +606,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -672,14 +642,13 @@ jobs: REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -706,11 +675,10 @@ jobs: - CompatibilityCheck runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/cherry_pick.yml b/.github/workflows/cherry_pick.yml index 3e6f9e76c565..065e584182b7 100644 --- a/.github/workflows/cherry_pick.yml +++ b/.github/workflows/cherry_pick.yml @@ -28,8 +28,9 @@ jobs: REPO_TEAM=core EOF - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} fetch-depth: 0 - name: Cherry pick diff --git a/.github/workflows/clibmouse_pr.yml b/.github/workflows/clibmouse_pr.yml new file mode 100644 index 000000000000..2f58ac10e5c1 --- /dev/null +++ b/.github/workflows/clibmouse_pr.yml @@ -0,0 +1,343 @@ +name: PR Sanity +run-name: ${{ github.actor }} is running PR sanity check 🚀 +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + +on: # yamllint disable-line rule:truthy + pull_request: + types: + - synchronize + - reopened + - opened + branches: + - Kusto-phase3 + paths-ignore: + - 'docker/docs/**' + - 'docs/**' + - 'website/**' +jobs: + CheckLabels: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Labels check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 run_check.py + PythonUnitTests: + needs: CheckLabels + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Python unit tests + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 -m unittest discover -s . -p '*_test.py' + DockerHubPushAarch64: + needs: CheckLabels + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: + needs: CheckLabels + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/changed_images.json + StyleCheck: + needs: DockerHubPush + runs-on: [self-hosted, style-checker] + if: ${{ success() || failure() }} + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{ runner.temp }}/style_check + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=package_release + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + fetch-depth: 0 # for performance artifact + submodules: true + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + BuilderBinRelease: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_release + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinClangTidy: + needs: [DockerHubPush, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_tidy + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + UnitTestsReleaseClang: + needs: [BuilderBinRelease] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/unit_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Unit tests (release-clang) + REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Unit test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 unit_tests_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestRelease: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release) + REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" \ No newline at end of file diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 7a15e77becbb..64d8ff160356 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -20,11 +20,10 @@ jobs: CheckLabels: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -33,17 +32,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json @@ -51,17 +49,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -69,18 +66,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -89,7 +85,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -109,15 +105,14 @@ jobs: - name: Download changed images # even if artifact does not exist, e.g. on `do not test` label or failed Docker job continue-on-error: true - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Style Check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -139,15 +134,14 @@ jobs: REPO_COPY=${{runner.temp}}/docs_check/ClickHouse EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Docs Check run: | cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" @@ -166,11 +160,10 @@ jobs: - DocsCheck runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index da67edd4aa12..c665b7284095 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -22,34 +22,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -57,18 +55,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -77,7 +74,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -96,13 +93,12 @@ jobs: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}} RCSK EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml index 5afc066065e4..e67df15c4d36 100644 --- a/.github/workflows/jepsen.yml +++ b/.github/workflows/jepsen.yml @@ -19,12 +19,10 @@ jobs: TEMP_PATH=${{runner.temp}}/keeper_jepsen REPO_COPY=${{runner.temp}}/keeper_jepsen/ClickHouse EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 - name: Jepsen Test run: | @@ -50,12 +48,10 @@ jobs: # TEMP_PATH=${{runner.temp}}/server_jepsen # REPO_COPY=${{runner.temp}}/server_jepsen/ClickHouse # EOF - # - name: Clear repository - # run: | - # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" # - name: Check out repository code - # uses: actions/checkout@v2 + # uses: ClickHouse/checkout@v1 # with: + # clear-repository: true # fetch-depth: 0 # - name: Jepsen Test # run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f3d672136ef9..0efdb3caaad4 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -12,11 +12,10 @@ jobs: PythonUnitTests: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -24,34 +23,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -59,18 +56,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -79,7 +75,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -96,15 +92,14 @@ jobs: - name: Download changed images # even if artifact does not exist, e.g. on `do not test` label or failed Docker job continue-on-error: true - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Style Check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -126,13 +121,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -158,13 +152,12 @@ jobs: REPO_COPY=${{runner.temp}}/split_build_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: Shared build check @@ -196,28 +189,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -241,24 +231,24 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/images_path - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json @@ -282,28 +272,25 @@ jobs: BUILD_NAME=binary_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -327,26 +314,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -370,26 +355,24 @@ jobs: BUILD_NAME=package_ubsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -413,26 +396,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -456,26 +437,24 @@ jobs: BUILD_NAME=package_msan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -499,26 +478,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -545,26 +522,24 @@ jobs: BUILD_NAME=binary_shared EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -588,26 +563,24 @@ jobs: BUILD_NAME=binary_tidy EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -631,28 +604,25 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -676,28 +646,25 @@ jobs: BUILD_NAME=binary_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -721,28 +688,25 @@ jobs: BUILD_NAME=binary_freebsd EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -766,28 +730,25 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -811,28 +772,25 @@ jobs: BUILD_NAME=binary_ppc64le EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -856,28 +814,25 @@ jobs: BUILD_NAME=binary_amd64sse2 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -901,28 +856,25 @@ jobs: BUILD_NAME=binary_aarch64_v80compat EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -941,12 +893,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -986,14 +936,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1015,7 +964,6 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - # - BuilderBinGCC - BuilderBinPPC64 - BuilderBinAmd64SSE2 - BuilderBinAarch64V80Compat @@ -1033,14 +981,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1064,11 +1011,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Mark Commit Release Ready run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -1090,14 +1036,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1125,14 +1070,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1162,14 +1106,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1199,14 +1142,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1234,14 +1176,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1269,14 +1210,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1306,14 +1246,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1343,14 +1282,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1380,14 +1318,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1417,14 +1354,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1454,14 +1390,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1489,14 +1424,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1526,14 +1460,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1563,14 +1496,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1600,14 +1532,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1637,14 +1568,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1674,14 +1604,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1711,14 +1640,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1749,14 +1677,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1784,14 +1711,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1819,14 +1745,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1854,14 +1779,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1889,14 +1813,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1924,14 +1847,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1959,14 +1881,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1996,14 +1917,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2034,14 +1954,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2068,14 +1987,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2102,14 +2020,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2136,14 +2053,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2175,14 +2091,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2211,14 +2126,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2247,14 +2161,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2283,14 +2196,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2319,14 +2231,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2355,14 +2266,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2391,14 +2301,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2427,14 +2336,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2463,14 +2371,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2500,14 +2407,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2534,14 +2440,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2568,14 +2473,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2602,14 +2506,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2636,14 +2539,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2673,14 +2575,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2707,14 +2608,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2728,40 +2628,6 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - # UnitTestsReleaseGCC: - # needs: [BuilderBinGCC] - # runs-on: [self-hosted, fuzzer-unit-tester] - # steps: - # - name: Set envs - # run: | - # cat >> "$GITHUB_ENV" << 'EOF' - # TEMP_PATH=${{runner.temp}}/unit_tests_asan - # REPORTS_PATH=${{runner.temp}}/reports_dir - # CHECK_NAME=Unit tests (release-gcc) - # REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse - # EOF - # - name: Download json reports - # uses: actions/download-artifact@v2 - # with: - # path: ${{ env.REPORTS_PATH }} - # - name: Clear repository - # run: | - # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - # - name: Check out repository code - # uses: actions/checkout@v2 - # - name: Unit test - # run: | - # sudo rm -fr "$TEMP_PATH" - # mkdir -p "$TEMP_PATH" - # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - # cd "$REPO_COPY/tests/ci" - # python3 unit_tests_check.py "$CHECK_NAME" - # - name: Cleanup - # if: always() - # run: | - # docker ps --quiet | xargs --no-run-if-empty docker kill ||: - # docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - # sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2775,14 +2641,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2809,14 +2674,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2843,14 +2707,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2882,14 +2745,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -2918,14 +2780,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -2954,14 +2815,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -2990,14 +2850,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3026,14 +2885,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3062,14 +2920,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3098,14 +2955,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3134,14 +2990,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3171,14 +3026,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -3205,14 +3059,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -3291,11 +3144,10 @@ jobs: - SQLancerTestDebug runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 9ebbe4e090d0..415d1b8fdc40 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -16,34 +16,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 --all - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 --all - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -51,18 +49,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -71,7 +68,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -90,22 +87,17 @@ jobs: EOF echo "COVERITY_TOKEN=${{ secrets.COVERITY_TOKEN }}" >> "$GITHUB_ENV" - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - id: coverity-checkout - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: - fetch-depth: 0 # otherwise we will have no info about contributors + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" @@ -134,8 +126,10 @@ jobs: CC: clang-15 CXX: clang++-15 steps: - - uses: actions/checkout@v2 + - name: Check out repository code + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis submodules: true - name: Set up JDK 11 diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 857e2c7f6041..3564f95ed436 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -25,11 +25,10 @@ jobs: # Run the first check always, even if the CI is cancelled if: ${{ always() }} steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -38,11 +37,10 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -51,17 +49,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json @@ -69,17 +66,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -87,18 +83,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -107,7 +102,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -128,15 +123,14 @@ jobs: - name: Download changed images # even if artifact does not exist, e.g. on `do not test` label or failed Docker job continue-on-error: true - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Style Check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -158,16 +152,12 @@ jobs: REPO_COPY=${{runner.temp}}/fasttest/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" - mkdir "$GITHUB_WORKSPACE" - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} @@ -192,13 +182,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -224,13 +213,12 @@ jobs: REPO_COPY=${{runner.temp}}/split_build_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: Shared build check @@ -262,28 +250,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # for performance artifact + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -307,26 +292,24 @@ jobs: BUILD_NAME=binary_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -350,28 +333,25 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/images_path - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # for performance artifact - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -395,26 +375,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -438,26 +416,24 @@ jobs: BUILD_NAME=package_ubsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -481,26 +457,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -524,26 +498,24 @@ jobs: BUILD_NAME=package_msan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -567,26 +539,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -613,26 +583,24 @@ jobs: BUILD_NAME=binary_shared EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -656,26 +624,24 @@ jobs: BUILD_NAME=binary_tidy EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -699,26 +665,24 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -742,26 +706,24 @@ jobs: BUILD_NAME=binary_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -785,26 +747,24 @@ jobs: BUILD_NAME=binary_freebsd EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -828,26 +788,24 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -871,26 +829,24 @@ jobs: BUILD_NAME=binary_ppc64le EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -914,26 +870,24 @@ jobs: BUILD_NAME=binary_amd64sse2 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -957,26 +911,24 @@ jobs: BUILD_NAME=binary_aarch64_v80compat EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -995,12 +947,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -1039,14 +989,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1086,14 +1035,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1126,14 +1074,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1163,14 +1110,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1200,14 +1146,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1237,14 +1182,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1274,14 +1218,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1309,14 +1252,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1346,14 +1288,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1383,14 +1324,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1420,14 +1360,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1457,14 +1396,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1494,14 +1432,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1531,14 +1468,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1568,14 +1504,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1605,14 +1540,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1642,14 +1576,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1679,14 +1612,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1716,14 +1648,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1753,14 +1684,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1790,14 +1720,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1825,14 +1754,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1862,14 +1790,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1899,14 +1826,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1936,14 +1862,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1973,14 +1898,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2010,14 +1934,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2047,14 +1970,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2084,14 +2006,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2121,14 +2042,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2158,14 +2078,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2195,14 +2114,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2232,14 +2150,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2269,14 +2186,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2306,14 +2222,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2343,14 +2258,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2380,14 +2294,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2417,14 +2330,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2454,14 +2366,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2491,14 +2402,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2528,14 +2438,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2565,14 +2474,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2602,14 +2510,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2639,14 +2546,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2674,14 +2580,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2709,14 +2614,13 @@ jobs: REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Bugfix test run: | sudo rm -fr "$TEMP_PATH" @@ -2758,14 +2662,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2793,14 +2696,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2828,14 +2730,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2863,14 +2764,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2898,14 +2798,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2933,14 +2832,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2968,14 +2866,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -3005,14 +2902,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3043,14 +2939,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3077,14 +2972,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3111,14 +3005,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3145,14 +3038,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3182,14 +3074,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3216,14 +3107,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3250,14 +3140,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3284,14 +3173,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3318,14 +3206,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3357,14 +3244,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3393,14 +3279,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3429,14 +3314,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3465,14 +3349,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3501,14 +3384,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3537,14 +3419,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3573,14 +3454,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3609,14 +3489,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3645,14 +3524,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3681,14 +3559,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3717,14 +3594,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3753,14 +3629,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3789,14 +3664,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3825,14 +3699,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3861,14 +3734,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3897,14 +3769,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3931,14 +3802,13 @@ jobs: REPO_COPY=${{runner.temp}}/integration_tests_asan_flaky_check/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3968,14 +3838,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4002,14 +3871,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4036,14 +3904,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4070,14 +3937,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4104,14 +3970,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4143,14 +4008,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4179,14 +4043,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4215,14 +4078,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4251,14 +4113,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4287,14 +4148,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4323,14 +4183,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4359,14 +4218,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4395,14 +4253,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4432,14 +4289,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -4466,14 +4322,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -4599,11 +4454,10 @@ jobs: - SQLancerTestDebug runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0b0f125d641b..9200e5e87b8c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,7 +20,7 @@ jobs: REPO_COPY=${{runner.temp}}/release_packages/ClickHouse EOF - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: # Always use the most recent script version ref: master @@ -50,12 +50,10 @@ jobs: DockerServerImages: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # otherwise we will have no version info - name: Check docker clickhouse/clickhouse-server building run: | diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index bf35ca76fc6c..251087f33a55 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -15,34 +15,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -50,18 +48,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -70,7 +67,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -85,13 +82,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -123,28 +119,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -168,24 +161,24 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/images_path - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: - fetch-depth: 0 # otherwise we will have no info about contributors + clear-repository: true + submodules: true + fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json @@ -209,26 +202,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -252,26 +243,24 @@ jobs: BUILD_NAME=package_ubsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -295,26 +284,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -338,26 +325,24 @@ jobs: BUILD_NAME=package_msan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -381,26 +366,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -424,28 +407,25 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -469,28 +449,25 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -509,12 +486,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -553,14 +528,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -592,14 +566,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -623,11 +596,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Mark Commit Release Ready run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -649,14 +621,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -684,14 +655,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -721,14 +691,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -758,14 +727,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -795,14 +763,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -832,14 +799,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -869,14 +835,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -904,14 +869,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -941,14 +905,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -978,14 +941,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1015,14 +977,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1052,14 +1013,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1089,14 +1049,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1126,14 +1085,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1164,14 +1122,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1199,14 +1156,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1234,14 +1190,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1269,14 +1224,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1304,14 +1258,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1339,14 +1292,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1374,14 +1326,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1411,14 +1362,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1449,14 +1399,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1483,14 +1432,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1517,14 +1465,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1551,14 +1498,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1590,14 +1536,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1626,14 +1571,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1662,14 +1606,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1698,14 +1641,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1734,14 +1676,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1770,14 +1711,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1806,14 +1746,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1842,14 +1781,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1878,14 +1816,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1944,11 +1881,10 @@ jobs: - CompatibilityCheck runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index f8cfa1137cc4..f5b42e9c882a 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -34,7 +34,7 @@ jobs: run: | echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: ref: master fetch-depth: 0 diff --git a/.github/workflows/woboq.yml b/.github/workflows/woboq.yml index b928a4a8d3d7..363652c9f332 100644 --- a/.github/workflows/woboq.yml +++ b/.github/workflows/woboq.yml @@ -21,12 +21,10 @@ jobs: REPO_COPY=${{runner.temp}}/codebrowser/ClickHouse IMAGES_PATH=${{runner.temp}}/images_path EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true submodules: 'true' - name: Codebrowser run: | diff --git a/.gitmodules b/.gitmodules index 070109eb32d9..0805b6d54926 100644 --- a/.gitmodules +++ b/.gitmodules @@ -269,9 +269,6 @@ [submodule "contrib/vectorscan"] path = contrib/vectorscan url = https://github.com/VectorCamp/vectorscan.git -[submodule "contrib/liburing"] - path = contrib/liburing - url = https://github.com/axboe/liburing.git [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/ClickHouse/c-ares @@ -294,3 +291,6 @@ [submodule "contrib/google-benchmark"] path = contrib/google-benchmark url = https://github.com/google/benchmark.git +[submodule "contrib/libdivide"] + path = contrib/libdivide + url = https://github.com/ridiculousfish/libdivide.git diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index d06d3918612b..a97f3afb1abf 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54469) SET(VERSION_MAJOR 22) SET(VERSION_MINOR 12) -SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 0d211ed19849fe44b0e43fdebe2c15d76d560a77) -SET(VERSION_DESCRIBE v22.12.1.1-testing) -SET(VERSION_STRING 22.12.1.1) +SET(VERSION_PATCH 3) +SET(VERSION_GITHASH c790cfd4465bdf9a8c474837c27aa314ef6f61bd) +SET(VERSION_DESCRIBE v22.12.3.1-stable) +SET(VERSION_STRING 22.12.3.1) # end of autochange diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2e05b318b8fa..6f80059498ea 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -65,7 +65,7 @@ add_contrib (dragonbox-cmake dragonbox) add_contrib (vectorscan-cmake vectorscan) add_contrib (jemalloc-cmake jemalloc) add_contrib (libcpuid-cmake libcpuid) -add_contrib (libdivide) +add_contrib (libdivide-cmake) add_contrib (libmetrohash) add_contrib (lz4-cmake lz4) add_contrib (murmurhash) diff --git a/contrib/googletest-cmake/CMakeLists.txt b/contrib/googletest-cmake/CMakeLists.txt index 90fdde0c1859..94c35656987d 100644 --- a/contrib/googletest-cmake/CMakeLists.txt +++ b/contrib/googletest-cmake/CMakeLists.txt @@ -1,15 +1,30 @@ -set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest") +set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest") -add_library(_gtest "${SRC_DIR}/src/gtest-all.cc") +add_library(_gtest "${SRC_DIR}/googletest/src/gtest-all.cc") set_target_properties(_gtest PROPERTIES VERSION "1.0.0") target_compile_definitions (_gtest PUBLIC GTEST_HAS_POSIX_RE=0) -target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/include") -target_include_directories(_gtest PRIVATE "${SRC_DIR}") +target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/googletest/include") +target_include_directories(_gtest PRIVATE "${SRC_DIR}/googletest") -add_library(_gtest_main "${SRC_DIR}/src/gtest_main.cc") +add_library(_gtest_main "${SRC_DIR}/googletest/src/gtest_main.cc") set_target_properties(_gtest_main PROPERTIES VERSION "1.0.0") target_link_libraries(_gtest_main PUBLIC _gtest) add_library(_gtest_all INTERFACE) target_link_libraries(_gtest_all INTERFACE _gtest _gtest_main) add_library(ch_contrib::gtest_all ALIAS _gtest_all) + +add_library(_gmock "${SRC_DIR}/googlemock/src/gmock-all.cc") +set_target_properties(_gmock PROPERTIES VERSION "1.0.0") +target_compile_definitions (_gmock PUBLIC GTEST_HAS_POSIX_RE=0) +target_include_directories(_gmock SYSTEM PUBLIC "${SRC_DIR}/googlemock/include") +target_include_directories(_gmock PRIVATE "${SRC_DIR}/googlemock") +target_link_libraries(_gmock PUBLIC _gtest) + +add_library(_gmock_main "${SRC_DIR}/googlemock/src/gmock_main.cc") +set_target_properties(_gmock_main PROPERTIES VERSION "1.0.0") +target_link_libraries(_gmock_main PUBLIC _gmock) + +add_library(_gmock_all INTERFACE) +target_link_libraries(_gmock_all INTERFACE _gmock _gmock_main) +add_library(ch_contrib::gmock_all ALIAS _gmock_all) diff --git a/contrib/libdivide b/contrib/libdivide new file mode 160000 index 000000000000..57678d011970 --- /dev/null +++ b/contrib/libdivide @@ -0,0 +1 @@ +Subproject commit 57678d0119707d85e8a6190df53748c758769cdf diff --git a/contrib/libdivide-cmake/CMakeLists.txt b/contrib/libdivide-cmake/CMakeLists.txt new file mode 100644 index 000000000000..3174808bc231 --- /dev/null +++ b/contrib/libdivide-cmake/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LIBDIVIDE_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libdivide") +add_library (_libdivide INTERFACE) +# for libdivide.h +target_include_directories (_libdivide SYSTEM BEFORE INTERFACE ${LIBDIVIDE_SOURCE_DIR}) +# for libdivide-config.h +target_include_directories (_libdivide SYSTEM BEFORE INTERFACE .) +add_library (ch_contrib::libdivide ALIAS _libdivide) diff --git a/contrib/libdivide-cmake/libdivide-config.h b/contrib/libdivide-cmake/libdivide-config.h new file mode 100644 index 000000000000..8ef001fb97bc --- /dev/null +++ b/contrib/libdivide-cmake/libdivide-config.h @@ -0,0 +1,9 @@ +#if defined(__SSE2__) +# define LIBDIVIDE_SSE2 +#elif defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__) +# define LIBDIVIDE_AVX512 +#elif defined(__AVX2__) +# define LIBDIVIDE_AVX2 +#elif defined(__aarch64__) && defined(__ARM_NEON) +# define LIBDIVIDE_NEON +#endif diff --git a/contrib/libdivide/CMakeLists.txt b/contrib/libdivide/CMakeLists.txt deleted file mode 100644 index 45cbc0a584b5..000000000000 --- a/contrib/libdivide/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_library (_libdivide INTERFACE) -target_include_directories (_libdivide SYSTEM BEFORE INTERFACE .) -add_library (ch_contrib::libdivide ALIAS _libdivide) diff --git a/contrib/libdivide/LICENSE.txt b/contrib/libdivide/LICENSE.txt deleted file mode 100644 index d056b847bba8..000000000000 --- a/contrib/libdivide/LICENSE.txt +++ /dev/null @@ -1,20 +0,0 @@ - libdivide - Copyright (C) 2010 ridiculous_fish - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - libdivide@ridiculousfish.com diff --git a/contrib/libdivide/README.txt b/contrib/libdivide/README.txt deleted file mode 100644 index 2d17a68e4c2b..000000000000 --- a/contrib/libdivide/README.txt +++ /dev/null @@ -1,2 +0,0 @@ -https://github.com/ridiculousfish/libdivide -http://libdivide.com/ diff --git a/contrib/libdivide/libdivide.h b/contrib/libdivide/libdivide.h deleted file mode 100644 index 33d210310a16..000000000000 --- a/contrib/libdivide/libdivide.h +++ /dev/null @@ -1,2503 +0,0 @@ -// libdivide.h - Optimized integer division -// https://libdivide.com -// -// Copyright (C) 2010 - 2019 ridiculous_fish, -// Copyright (C) 2016 - 2019 Kim Walisch, -// -// libdivide is dual-licensed under the Boost or zlib licenses. -// You may use libdivide under the terms of either of these. -// See LICENSE.txt for more details. - -#ifndef LIBDIVIDE_H -#define LIBDIVIDE_H - -#define LIBDIVIDE_VERSION "3.0" -#define LIBDIVIDE_VERSION_MAJOR 3 -#define LIBDIVIDE_VERSION_MINOR 0 - -#include - -#if defined(__cplusplus) -#include -#include -#include -#else -#include -#include -#endif - -#if defined(LIBDIVIDE_SSE2) -#include -#endif -#if defined(LIBDIVIDE_AVX2) || defined(LIBDIVIDE_AVX512) -#include -#endif -#if defined(LIBDIVIDE_NEON) -#include -#endif - -#if defined(_MSC_VER) -#include -// disable warning C4146: unary minus operator applied -// to unsigned type, result still unsigned -#pragma warning(disable : 4146) -#define LIBDIVIDE_VC -#endif - -#if !defined(__has_builtin) -#define __has_builtin(x) 0 -#endif - -#if defined(__SIZEOF_INT128__) -#define HAS_INT128_T -// clang-cl on Windows does not yet support 128-bit division -#if !(defined(__clang__) && defined(LIBDIVIDE_VC)) -#define HAS_INT128_DIV -#endif -#endif - -#if defined(__x86_64__) || defined(_M_X64) -#define LIBDIVIDE_X86_64 -#endif - -#if defined(__i386__) -#define LIBDIVIDE_i386 -#endif - -#if defined(__GNUC__) || defined(__clang__) -#define LIBDIVIDE_GCC_STYLE_ASM -#endif - -#if defined(__cplusplus) || defined(LIBDIVIDE_VC) -#define LIBDIVIDE_FUNCTION __FUNCTION__ -#else -#define LIBDIVIDE_FUNCTION __func__ -#endif - -#define LIBDIVIDE_ERROR(msg) \ - do { \ - fprintf(stderr, "libdivide.h:%d: %s(): Error: %s\n", __LINE__, LIBDIVIDE_FUNCTION, msg); \ - abort(); \ - } while (0) - -#if defined(LIBDIVIDE_ASSERTIONS_ON) -#define LIBDIVIDE_ASSERT(x) \ - do { \ - if (!(x)) { \ - fprintf(stderr, "libdivide.h:%d: %s(): Assertion failed: %s\n", __LINE__, \ - LIBDIVIDE_FUNCTION, #x); \ - abort(); \ - } \ - } while (0) -#else -#define LIBDIVIDE_ASSERT(x) -#endif - -#ifdef __cplusplus -namespace libdivide { -#endif - -// pack divider structs to prevent compilers from padding. -// This reduces memory usage by up to 43% when using a large -// array of libdivide dividers and improves performance -// by up to 10% because of reduced memory bandwidth. -#pragma pack(push, 1) - -struct libdivide_u32_t { - uint32_t magic; - uint8_t more; -}; - -struct libdivide_s32_t { - int32_t magic; - uint8_t more; -}; - -struct libdivide_u64_t { - uint64_t magic; - uint8_t more; -}; - -struct libdivide_s64_t { - int64_t magic; - uint8_t more; -}; - -struct libdivide_u32_branchfree_t { - uint32_t magic; - uint8_t more; -}; - -struct libdivide_s32_branchfree_t { - int32_t magic; - uint8_t more; -}; - -struct libdivide_u64_branchfree_t { - uint64_t magic; - uint8_t more; -}; - -struct libdivide_s64_branchfree_t { - int64_t magic; - uint8_t more; -}; - -#pragma pack(pop) - -// Explanation of the "more" field: -// -// * Bits 0-5 is the shift value (for shift path or mult path). -// * Bit 6 is the add indicator for mult path. -// * Bit 7 is set if the divisor is negative. We use bit 7 as the negative -// divisor indicator so that we can efficiently use sign extension to -// create a bitmask with all bits set to 1 (if the divisor is negative) -// or 0 (if the divisor is positive). -// -// u32: [0-4] shift value -// [5] ignored -// [6] add indicator -// magic number of 0 indicates shift path -// -// s32: [0-4] shift value -// [5] ignored -// [6] add indicator -// [7] indicates negative divisor -// magic number of 0 indicates shift path -// -// u64: [0-5] shift value -// [6] add indicator -// magic number of 0 indicates shift path -// -// s64: [0-5] shift value -// [6] add indicator -// [7] indicates negative divisor -// magic number of 0 indicates shift path -// -// In s32 and s64 branchfree modes, the magic number is negated according to -// whether the divisor is negated. In branchfree strategy, it is not negated. - -enum { - LIBDIVIDE_32_SHIFT_MASK = 0x1F, - LIBDIVIDE_64_SHIFT_MASK = 0x3F, - LIBDIVIDE_ADD_MARKER = 0x40, - LIBDIVIDE_NEGATIVE_DIVISOR = 0x80 -}; - -static inline struct libdivide_s32_t libdivide_s32_gen(int32_t d); -static inline struct libdivide_u32_t libdivide_u32_gen(uint32_t d); -static inline struct libdivide_s64_t libdivide_s64_gen(int64_t d); -static inline struct libdivide_u64_t libdivide_u64_gen(uint64_t d); - -static inline struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d); -static inline struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d); -static inline struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d); -static inline struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d); - -static inline int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom); -static inline uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom); -static inline int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom); -static inline uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom); - -static inline int32_t libdivide_s32_branchfree_do( - int32_t numer, const struct libdivide_s32_branchfree_t *denom); -static inline uint32_t libdivide_u32_branchfree_do( - uint32_t numer, const struct libdivide_u32_branchfree_t *denom); -static inline int64_t libdivide_s64_branchfree_do( - int64_t numer, const struct libdivide_s64_branchfree_t *denom); -static inline uint64_t libdivide_u64_branchfree_do( - uint64_t numer, const struct libdivide_u64_branchfree_t *denom); - -static inline int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom); -static inline uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom); -static inline int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom); -static inline uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom); - -static inline int32_t libdivide_s32_branchfree_recover( - const struct libdivide_s32_branchfree_t *denom); -static inline uint32_t libdivide_u32_branchfree_recover( - const struct libdivide_u32_branchfree_t *denom); -static inline int64_t libdivide_s64_branchfree_recover( - const struct libdivide_s64_branchfree_t *denom); -static inline uint64_t libdivide_u64_branchfree_recover( - const struct libdivide_u64_branchfree_t *denom); - -//////// Internal Utility Functions - -static inline uint32_t libdivide_mullhi_u32(uint32_t x, uint32_t y) { - uint64_t xl = x, yl = y; - uint64_t rl = xl * yl; - return (uint32_t)(rl >> 32); -} - -static inline int32_t libdivide_mullhi_s32(int32_t x, int32_t y) { - int64_t xl = x, yl = y; - int64_t rl = xl * yl; - // needs to be arithmetic shift - return (int32_t)(rl >> 32); -} - -static inline uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) { -#if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64) - return __umulh(x, y); -#elif defined(HAS_INT128_T) - __uint128_t xl = x, yl = y; - __uint128_t rl = xl * yl; - return (uint64_t)(rl >> 64); -#else - // full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) - uint32_t mask = 0xFFFFFFFF; - uint32_t x0 = (uint32_t)(x & mask); - uint32_t x1 = (uint32_t)(x >> 32); - uint32_t y0 = (uint32_t)(y & mask); - uint32_t y1 = (uint32_t)(y >> 32); - uint32_t x0y0_hi = libdivide_mullhi_u32(x0, y0); - uint64_t x0y1 = x0 * (uint64_t)y1; - uint64_t x1y0 = x1 * (uint64_t)y0; - uint64_t x1y1 = x1 * (uint64_t)y1; - uint64_t temp = x1y0 + x0y0_hi; - uint64_t temp_lo = temp & mask; - uint64_t temp_hi = temp >> 32; - - return x1y1 + temp_hi + ((temp_lo + x0y1) >> 32); -#endif -} - -static inline int64_t libdivide_mullhi_s64(int64_t x, int64_t y) { -#if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64) - return __mulh(x, y); -#elif defined(HAS_INT128_T) - __int128_t xl = x, yl = y; - __int128_t rl = xl * yl; - return (int64_t)(rl >> 64); -#else - // full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) - uint32_t mask = 0xFFFFFFFF; - uint32_t x0 = (uint32_t)(x & mask); - uint32_t y0 = (uint32_t)(y & mask); - int32_t x1 = (int32_t)(x >> 32); - int32_t y1 = (int32_t)(y >> 32); - uint32_t x0y0_hi = libdivide_mullhi_u32(x0, y0); - int64_t t = x1 * (int64_t)y0 + x0y0_hi; - int64_t w1 = x0 * (int64_t)y1 + (t & mask); - - return x1 * (int64_t)y1 + (t >> 32) + (w1 >> 32); -#endif -} - -static inline int32_t libdivide_count_leading_zeros32(uint32_t val) { -#if defined(__GNUC__) || __has_builtin(__builtin_clz) - // Fast way to count leading zeros - return __builtin_clz(val); -#elif defined(LIBDIVIDE_VC) - unsigned long result; - if (_BitScanReverse(&result, val)) { - return 31 - result; - } - return 0; -#else - if (val == 0) return 32; - int32_t result = 8; - uint32_t hi = 0xFFU << 24; - while ((val & hi) == 0) { - hi >>= 8; - result += 8; - } - while (val & hi) { - result -= 1; - hi <<= 1; - } - return result; -#endif -} - -static inline int32_t libdivide_count_leading_zeros64(uint64_t val) { -#if defined(__GNUC__) || __has_builtin(__builtin_clzll) - // Fast way to count leading zeros - return __builtin_clzll(val); -#elif defined(LIBDIVIDE_VC) && defined(_WIN64) - unsigned long result; - if (_BitScanReverse64(&result, val)) { - return 63 - result; - } - return 0; -#else - uint32_t hi = val >> 32; - uint32_t lo = val & 0xFFFFFFFF; - if (hi != 0) return libdivide_count_leading_zeros32(hi); - return 32 + libdivide_count_leading_zeros32(lo); -#endif -} - -// libdivide_64_div_32_to_32: divides a 64-bit uint {u1, u0} by a 32-bit -// uint {v}. The result must fit in 32 bits. -// Returns the quotient directly and the remainder in *r -static inline uint32_t libdivide_64_div_32_to_32( - uint32_t u1, uint32_t u0, uint32_t v, uint32_t *r) { -#if (defined(LIBDIVIDE_i386) || defined(LIBDIVIDE_X86_64)) && defined(LIBDIVIDE_GCC_STYLE_ASM) - uint32_t result; - __asm__("divl %[v]" : "=a"(result), "=d"(*r) : [v] "r"(v), "a"(u0), "d"(u1)); - return result; -#else - uint64_t n = ((uint64_t)u1 << 32) | u0; - uint32_t result = (uint32_t)(n / v); - *r = (uint32_t)(n - result * (uint64_t)v); - return result; -#endif -} - -// libdivide_128_div_64_to_64: divides a 128-bit uint {u1, u0} by a 64-bit -// uint {v}. The result must fit in 64 bits. -// Returns the quotient directly and the remainder in *r -static uint64_t libdivide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r) { - // N.B. resist the temptation to use __uint128_t here. - // In LLVM compiler-rt, it performs a 128/128 -> 128 division which is many times slower than - // necessary. In gcc it's better but still slower than the divlu implementation, perhaps because - // it's not inlined. -#if defined(LIBDIVIDE_X86_64) && defined(LIBDIVIDE_GCC_STYLE_ASM) - uint64_t result; - __asm__("divq %[v]" : "=a"(result), "=d"(*r) : [v] "r"(v), "a"(u0), "d"(u1)); - return result; -#else - // Code taken from Hacker's Delight: - // http://www.hackersdelight.org/HDcode/divlu.c. - // License permits inclusion here per: - // http://www.hackersdelight.org/permissions.htm - - const uint64_t b = (1ULL << 32); // Number base (32 bits) - uint64_t un1, un0; // Norm. dividend LSD's - uint64_t vn1, vn0; // Norm. divisor digits - uint64_t q1, q0; // Quotient digits - uint64_t un64, un21, un10; // Dividend digit pairs - uint64_t rhat; // A remainder - int32_t s; // Shift amount for norm - - // If overflow, set rem. to an impossible value, - // and return the largest possible quotient - if (u1 >= v) { - *r = (uint64_t)-1; - return (uint64_t)-1; - } - - // count leading zeros - s = libdivide_count_leading_zeros64(v); - if (s > 0) { - // Normalize divisor - v = v << s; - un64 = (u1 << s) | (u0 >> (64 - s)); - un10 = u0 << s; // Shift dividend left - } else { - // Avoid undefined behavior of (u0 >> 64). - // The behavior is undefined if the right operand is - // negative, or greater than or equal to the length - // in bits of the promoted left operand. - un64 = u1; - un10 = u0; - } - - // Break divisor up into two 32-bit digits - vn1 = v >> 32; - vn0 = v & 0xFFFFFFFF; - - // Break right half of dividend into two digits - un1 = un10 >> 32; - un0 = un10 & 0xFFFFFFFF; - - // Compute the first quotient digit, q1 - q1 = un64 / vn1; - rhat = un64 - q1 * vn1; - - while (q1 >= b || q1 * vn0 > b * rhat + un1) { - q1 = q1 - 1; - rhat = rhat + vn1; - if (rhat >= b) break; - } - - // Multiply and subtract - un21 = un64 * b + un1 - q1 * v; - - // Compute the second quotient digit - q0 = un21 / vn1; - rhat = un21 - q0 * vn1; - - while (q0 >= b || q0 * vn0 > b * rhat + un0) { - q0 = q0 - 1; - rhat = rhat + vn1; - if (rhat >= b) break; - } - - *r = (un21 * b + un0 - q0 * v) >> s; - return q1 * b + q0; -#endif -} - -// Bitshift a u128 in place, left (signed_shift > 0) or right (signed_shift < 0) -static inline void libdivide_u128_shift(uint64_t *u1, uint64_t *u0, int32_t signed_shift) { - if (signed_shift > 0) { - uint32_t shift = signed_shift; - *u1 <<= shift; - *u1 |= *u0 >> (64 - shift); - *u0 <<= shift; - } else if (signed_shift < 0) { - uint32_t shift = -signed_shift; - *u0 >>= shift; - *u0 |= *u1 << (64 - shift); - *u1 >>= shift; - } -} - -// Computes a 128 / 128 -> 64 bit division, with a 128 bit remainder. -static uint64_t libdivide_128_div_128_to_64( - uint64_t u_hi, uint64_t u_lo, uint64_t v_hi, uint64_t v_lo, uint64_t *r_hi, uint64_t *r_lo) { -#if defined(HAS_INT128_T) && defined(HAS_INT128_DIV) - __uint128_t ufull = u_hi; - __uint128_t vfull = v_hi; - ufull = (ufull << 64) | u_lo; - vfull = (vfull << 64) | v_lo; - uint64_t res = (uint64_t)(ufull / vfull); - __uint128_t remainder = ufull - (vfull * res); - *r_lo = (uint64_t)remainder; - *r_hi = (uint64_t)(remainder >> 64); - return res; -#else - // Adapted from "Unsigned Doubleword Division" in Hacker's Delight - // We want to compute u / v - typedef struct { - uint64_t hi; - uint64_t lo; - } u128_t; - u128_t u = {u_hi, u_lo}; - u128_t v = {v_hi, v_lo}; - - if (v.hi == 0) { - // divisor v is a 64 bit value, so we just need one 128/64 division - // Note that we are simpler than Hacker's Delight here, because we know - // the quotient fits in 64 bits whereas Hacker's Delight demands a full - // 128 bit quotient - *r_hi = 0; - return libdivide_128_div_64_to_64(u.hi, u.lo, v.lo, r_lo); - } - // Here v >= 2**64 - // We know that v.hi != 0, so count leading zeros is OK - // We have 0 <= n <= 63 - uint32_t n = libdivide_count_leading_zeros64(v.hi); - - // Normalize the divisor so its MSB is 1 - u128_t v1t = v; - libdivide_u128_shift(&v1t.hi, &v1t.lo, n); - uint64_t v1 = v1t.hi; // i.e. v1 = v1t >> 64 - - // To ensure no overflow - u128_t u1 = u; - libdivide_u128_shift(&u1.hi, &u1.lo, -1); - - // Get quotient from divide unsigned insn. - uint64_t rem_ignored; - uint64_t q1 = libdivide_128_div_64_to_64(u1.hi, u1.lo, v1, &rem_ignored); - - // Undo normalization and division of u by 2. - u128_t q0 = {0, q1}; - libdivide_u128_shift(&q0.hi, &q0.lo, n); - libdivide_u128_shift(&q0.hi, &q0.lo, -63); - - // Make q0 correct or too small by 1 - // Equivalent to `if (q0 != 0) q0 = q0 - 1;` - if (q0.hi != 0 || q0.lo != 0) { - q0.hi -= (q0.lo == 0); // borrow - q0.lo -= 1; - } - - // Now q0 is correct. - // Compute q0 * v as q0v - // = (q0.hi << 64 + q0.lo) * (v.hi << 64 + v.lo) - // = (q0.hi * v.hi << 128) + (q0.hi * v.lo << 64) + - // (q0.lo * v.hi << 64) + q0.lo * v.lo) - // Each term is 128 bit - // High half of full product (upper 128 bits!) are dropped - u128_t q0v = {0, 0}; - q0v.hi = q0.hi * v.lo + q0.lo * v.hi + libdivide_mullhi_u64(q0.lo, v.lo); - q0v.lo = q0.lo * v.lo; - - // Compute u - q0v as u_q0v - // This is the remainder - u128_t u_q0v = u; - u_q0v.hi -= q0v.hi + (u.lo < q0v.lo); // second term is borrow - u_q0v.lo -= q0v.lo; - - // Check if u_q0v >= v - // This checks if our remainder is larger than the divisor - if ((u_q0v.hi > v.hi) || (u_q0v.hi == v.hi && u_q0v.lo >= v.lo)) { - // Increment q0 - q0.lo += 1; - q0.hi += (q0.lo == 0); // carry - - // Subtract v from remainder - u_q0v.hi -= v.hi + (u_q0v.lo < v.lo); - u_q0v.lo -= v.lo; - } - - *r_hi = u_q0v.hi; - *r_lo = u_q0v.lo; - - LIBDIVIDE_ASSERT(q0.hi == 0); - return q0.lo; -#endif -} - -////////// UINT32 - -static inline struct libdivide_u32_t libdivide_internal_u32_gen(uint32_t d, int branchfree) { - if (d == 0) { - LIBDIVIDE_ERROR("divider must be != 0"); - } - - struct libdivide_u32_t result; - uint32_t floor_log_2_d = 31 - libdivide_count_leading_zeros32(d); - - // Power of 2 - if ((d & (d - 1)) == 0) { - // We need to subtract 1 from the shift value in case of an unsigned - // branchfree divider because there is a hardcoded right shift by 1 - // in its division algorithm. Because of this we also need to add back - // 1 in its recovery algorithm. - result.magic = 0; - result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); - } else { - uint8_t more; - uint32_t rem, proposed_m; - proposed_m = libdivide_64_div_32_to_32(1U << floor_log_2_d, 0, d, &rem); - - LIBDIVIDE_ASSERT(rem > 0 && rem < d); - const uint32_t e = d - rem; - - // This power works if e < 2**floor_log_2_d. - if (!branchfree && (e < (1U << floor_log_2_d))) { - // This power works - more = floor_log_2_d; - } else { - // We have to use the general 33-bit algorithm. We need to compute - // (2**power) / d. However, we already have (2**(power-1))/d and - // its remainder. By doubling both, and then correcting the - // remainder, we can compute the larger division. - // don't care about overflow here - in fact, we expect it - proposed_m += proposed_m; - const uint32_t twice_rem = rem + rem; - if (twice_rem >= d || twice_rem < rem) proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - result.magic = 1 + proposed_m; - result.more = more; - // result.more's shift should in general be ceil_log_2_d. But if we - // used the smaller power, we subtract one from the shift because we're - // using the smaller power. If we're using the larger power, we - // subtract one from the shift because it's taken care of by the add - // indicator. So floor_log_2_d happens to be correct in both cases. - } - return result; -} - -struct libdivide_u32_t libdivide_u32_gen(uint32_t d) { - return libdivide_internal_u32_gen(d, 0); -} - -struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) { - if (d == 1) { - LIBDIVIDE_ERROR("branchfree divider must be != 1"); - } - struct libdivide_u32_t tmp = libdivide_internal_u32_gen(d, 1); - struct libdivide_u32_branchfree_t ret = { - tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_32_SHIFT_MASK)}; - return ret; -} - -uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return numer >> more; - } else { - uint32_t q = libdivide_mullhi_u32(denom->magic, numer); - if (more & LIBDIVIDE_ADD_MARKER) { - uint32_t t = ((numer - q) >> 1) + q; - return t >> (more & LIBDIVIDE_32_SHIFT_MASK); - } else { - // All upper bits are 0, - // don't need to mask them off. - return q >> more; - } - } -} - -uint32_t libdivide_u32_branchfree_do( - uint32_t numer, const struct libdivide_u32_branchfree_t *denom) { - uint32_t q = libdivide_mullhi_u32(denom->magic, numer); - uint32_t t = ((numer - q) >> 1) + q; - return t >> denom->more; -} - -uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - - if (!denom->magic) { - return 1U << shift; - } else if (!(more & LIBDIVIDE_ADD_MARKER)) { - // We compute q = n/d = n*m / 2^(32 + shift) - // Therefore we have d = 2^(32 + shift) / m - // We need to ceil it. - // We know d is not a power of 2, so m is not a power of 2, - // so we can just add 1 to the floor - uint32_t hi_dividend = 1U << shift; - uint32_t rem_ignored; - return 1 + libdivide_64_div_32_to_32(hi_dividend, 0, denom->magic, &rem_ignored); - } else { - // Here we wish to compute d = 2^(32+shift+1)/(m+2^32). - // Notice (m + 2^32) is a 33 bit number. Use 64 bit division for now - // Also note that shift may be as high as 31, so shift + 1 will - // overflow. So we have to compute it as 2^(32+shift)/(m+2^32), and - // then double the quotient and remainder. - uint64_t half_n = 1ULL << (32 + shift); - uint64_t d = (1ULL << 32) | denom->magic; - // Note that the quotient is guaranteed <= 32 bits, but the remainder - // may need 33! - uint32_t half_q = (uint32_t)(half_n / d); - uint64_t rem = half_n % d; - // We computed 2^(32+shift)/(m+2^32) - // Need to double it, and then add 1 to the quotient if doubling th - // remainder would increase the quotient. - // Note that rem<<1 cannot overflow, since rem < d and d is 33 bits - uint32_t full_q = half_q + half_q + ((rem << 1) >= d); - - // We rounded down in gen (hence +1) - return full_q + 1; - } -} - -uint32_t libdivide_u32_branchfree_recover(const struct libdivide_u32_branchfree_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - - if (!denom->magic) { - return 1U << (shift + 1); - } else { - // Here we wish to compute d = 2^(32+shift+1)/(m+2^32). - // Notice (m + 2^32) is a 33 bit number. Use 64 bit division for now - // Also note that shift may be as high as 31, so shift + 1 will - // overflow. So we have to compute it as 2^(32+shift)/(m+2^32), and - // then double the quotient and remainder. - uint64_t half_n = 1ULL << (32 + shift); - uint64_t d = (1ULL << 32) | denom->magic; - // Note that the quotient is guaranteed <= 32 bits, but the remainder - // may need 33! - uint32_t half_q = (uint32_t)(half_n / d); - uint64_t rem = half_n % d; - // We computed 2^(32+shift)/(m+2^32) - // Need to double it, and then add 1 to the quotient if doubling th - // remainder would increase the quotient. - // Note that rem<<1 cannot overflow, since rem < d and d is 33 bits - uint32_t full_q = half_q + half_q + ((rem << 1) >= d); - - // We rounded down in gen (hence +1) - return full_q + 1; - } -} - -/////////// UINT64 - -static inline struct libdivide_u64_t libdivide_internal_u64_gen(uint64_t d, int branchfree) { - if (d == 0) { - LIBDIVIDE_ERROR("divider must be != 0"); - } - - struct libdivide_u64_t result; - uint32_t floor_log_2_d = 63 - libdivide_count_leading_zeros64(d); - - // Power of 2 - if ((d & (d - 1)) == 0) { - // We need to subtract 1 from the shift value in case of an unsigned - // branchfree divider because there is a hardcoded right shift by 1 - // in its division algorithm. Because of this we also need to add back - // 1 in its recovery algorithm. - result.magic = 0; - result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); - } else { - uint64_t proposed_m, rem; - uint8_t more; - // (1 << (64 + floor_log_2_d)) / d - proposed_m = libdivide_128_div_64_to_64(1ULL << floor_log_2_d, 0, d, &rem); - - LIBDIVIDE_ASSERT(rem > 0 && rem < d); - const uint64_t e = d - rem; - - // This power works if e < 2**floor_log_2_d. - if (!branchfree && e < (1ULL << floor_log_2_d)) { - // This power works - more = floor_log_2_d; - } else { - // We have to use the general 65-bit algorithm. We need to compute - // (2**power) / d. However, we already have (2**(power-1))/d and - // its remainder. By doubling both, and then correcting the - // remainder, we can compute the larger division. - // don't care about overflow here - in fact, we expect it - proposed_m += proposed_m; - const uint64_t twice_rem = rem + rem; - if (twice_rem >= d || twice_rem < rem) proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - result.magic = 1 + proposed_m; - result.more = more; - // result.more's shift should in general be ceil_log_2_d. But if we - // used the smaller power, we subtract one from the shift because we're - // using the smaller power. If we're using the larger power, we - // subtract one from the shift because it's taken care of by the add - // indicator. So floor_log_2_d happens to be correct in both cases, - // which is why we do it outside of the if statement. - } - return result; -} - -struct libdivide_u64_t libdivide_u64_gen(uint64_t d) { - return libdivide_internal_u64_gen(d, 0); -} - -struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d) { - if (d == 1) { - LIBDIVIDE_ERROR("branchfree divider must be != 1"); - } - struct libdivide_u64_t tmp = libdivide_internal_u64_gen(d, 1); - struct libdivide_u64_branchfree_t ret = { - tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_64_SHIFT_MASK)}; - return ret; -} - -uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return numer >> more; - } else { - uint64_t q = libdivide_mullhi_u64(denom->magic, numer); - if (more & LIBDIVIDE_ADD_MARKER) { - uint64_t t = ((numer - q) >> 1) + q; - return t >> (more & LIBDIVIDE_64_SHIFT_MASK); - } else { - // All upper bits are 0, - // don't need to mask them off. - return q >> more; - } - } -} - -uint64_t libdivide_u64_branchfree_do( - uint64_t numer, const struct libdivide_u64_branchfree_t *denom) { - uint64_t q = libdivide_mullhi_u64(denom->magic, numer); - uint64_t t = ((numer - q) >> 1) + q; - return t >> denom->more; -} - -uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - - if (!denom->magic) { - return 1ULL << shift; - } else if (!(more & LIBDIVIDE_ADD_MARKER)) { - // We compute q = n/d = n*m / 2^(64 + shift) - // Therefore we have d = 2^(64 + shift) / m - // We need to ceil it. - // We know d is not a power of 2, so m is not a power of 2, - // so we can just add 1 to the floor - uint64_t hi_dividend = 1ULL << shift; - uint64_t rem_ignored; - return 1 + libdivide_128_div_64_to_64(hi_dividend, 0, denom->magic, &rem_ignored); - } else { - // Here we wish to compute d = 2^(64+shift+1)/(m+2^64). - // Notice (m + 2^64) is a 65 bit number. This gets hairy. See - // libdivide_u32_recover for more on what we do here. - // TODO: do something better than 128 bit math - - // Full n is a (potentially) 129 bit value - // half_n is a 128 bit value - // Compute the hi half of half_n. Low half is 0. - uint64_t half_n_hi = 1ULL << shift, half_n_lo = 0; - // d is a 65 bit value. The high bit is always set to 1. - const uint64_t d_hi = 1, d_lo = denom->magic; - // Note that the quotient is guaranteed <= 64 bits, - // but the remainder may need 65! - uint64_t r_hi, r_lo; - uint64_t half_q = - libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo); - // We computed 2^(64+shift)/(m+2^64) - // Double the remainder ('dr') and check if that is larger than d - // Note that d is a 65 bit value, so r1 is small and so r1 + r1 - // cannot overflow - uint64_t dr_lo = r_lo + r_lo; - uint64_t dr_hi = r_hi + r_hi + (dr_lo < r_lo); // last term is carry - int dr_exceeds_d = (dr_hi > d_hi) || (dr_hi == d_hi && dr_lo >= d_lo); - uint64_t full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0); - return full_q + 1; - } -} - -uint64_t libdivide_u64_branchfree_recover(const struct libdivide_u64_branchfree_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - - if (!denom->magic) { - return 1ULL << (shift + 1); - } else { - // Here we wish to compute d = 2^(64+shift+1)/(m+2^64). - // Notice (m + 2^64) is a 65 bit number. This gets hairy. See - // libdivide_u32_recover for more on what we do here. - // TODO: do something better than 128 bit math - - // Full n is a (potentially) 129 bit value - // half_n is a 128 bit value - // Compute the hi half of half_n. Low half is 0. - uint64_t half_n_hi = 1ULL << shift, half_n_lo = 0; - // d is a 65 bit value. The high bit is always set to 1. - const uint64_t d_hi = 1, d_lo = denom->magic; - // Note that the quotient is guaranteed <= 64 bits, - // but the remainder may need 65! - uint64_t r_hi, r_lo; - uint64_t half_q = - libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo); - // We computed 2^(64+shift)/(m+2^64) - // Double the remainder ('dr') and check if that is larger than d - // Note that d is a 65 bit value, so r1 is small and so r1 + r1 - // cannot overflow - uint64_t dr_lo = r_lo + r_lo; - uint64_t dr_hi = r_hi + r_hi + (dr_lo < r_lo); // last term is carry - int dr_exceeds_d = (dr_hi > d_hi) || (dr_hi == d_hi && dr_lo >= d_lo); - uint64_t full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0); - return full_q + 1; - } -} - -/////////// SINT32 - -static inline struct libdivide_s32_t libdivide_internal_s32_gen(int32_t d, int branchfree) { - if (d == 0) { - LIBDIVIDE_ERROR("divider must be != 0"); - } - - struct libdivide_s32_t result; - - // If d is a power of 2, or negative a power of 2, we have to use a shift. - // This is especially important because the magic algorithm fails for -1. - // To check if d is a power of 2 or its inverse, it suffices to check - // whether its absolute value has exactly one bit set. This works even for - // INT_MIN, because abs(INT_MIN) == INT_MIN, and INT_MIN has one bit set - // and is a power of 2. - uint32_t ud = (uint32_t)d; - uint32_t absD = (d < 0) ? -ud : ud; - uint32_t floor_log_2_d = 31 - libdivide_count_leading_zeros32(absD); - // check if exactly one bit is set, - // don't care if absD is 0 since that's divide by zero - if ((absD & (absD - 1)) == 0) { - // Branchfree and normal paths are exactly the same - result.magic = 0; - result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); - } else { - LIBDIVIDE_ASSERT(floor_log_2_d >= 1); - - uint8_t more; - // the dividend here is 2**(floor_log_2_d + 31), so the low 32 bit word - // is 0 and the high word is floor_log_2_d - 1 - uint32_t rem, proposed_m; - proposed_m = libdivide_64_div_32_to_32(1U << (floor_log_2_d - 1), 0, absD, &rem); - const uint32_t e = absD - rem; - - // We are going to start with a power of floor_log_2_d - 1. - // This works if works if e < 2**floor_log_2_d. - if (!branchfree && e < (1U << floor_log_2_d)) { - // This power works - more = floor_log_2_d - 1; - } else { - // We need to go one higher. This should not make proposed_m - // overflow, but it will make it negative when interpreted as an - // int32_t. - proposed_m += proposed_m; - const uint32_t twice_rem = rem + rem; - if (twice_rem >= absD || twice_rem < rem) proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - - proposed_m += 1; - int32_t magic = (int32_t)proposed_m; - - // Mark if we are negative. Note we only negate the magic number in the - // branchfull case. - if (d < 0) { - more |= LIBDIVIDE_NEGATIVE_DIVISOR; - if (!branchfree) { - magic = -magic; - } - } - - result.more = more; - result.magic = magic; - } - return result; -} - -struct libdivide_s32_t libdivide_s32_gen(int32_t d) { - return libdivide_internal_s32_gen(d, 0); -} - -struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d) { - struct libdivide_s32_t tmp = libdivide_internal_s32_gen(d, 1); - struct libdivide_s32_branchfree_t result = {tmp.magic, tmp.more}; - return result; -} - -int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - - if (!denom->magic) { - uint32_t sign = (int8_t)more >> 7; - uint32_t mask = (1U << shift) - 1; - uint32_t uq = numer + ((numer >> 31) & mask); - int32_t q = (int32_t)uq; - q >>= shift; - q = (q ^ sign) - sign; - return q; - } else { - uint32_t uq = (uint32_t)libdivide_mullhi_s32(denom->magic, numer); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift and then sign extend - int32_t sign = (int8_t)more >> 7; - // q += (more < 0 ? -numer : numer) - // cast required to avoid UB - uq += ((uint32_t)numer ^ sign) - sign; - } - int32_t q = (int32_t)uq; - q >>= shift; - q += (q < 0); - return q; - } -} - -int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift and then sign extend - int32_t sign = (int8_t)more >> 7; - int32_t magic = denom->magic; - int32_t q = libdivide_mullhi_s32(magic, numer); - q += numer; - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is a power of - // 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - uint32_t q_sign = (uint32_t)(q >> 31); - q += q_sign & ((1U << shift) - is_power_of_2); - - // Now arithmetic right shift - q >>= shift; - // Negate if needed - q = (q ^ sign) - sign; - - return q; -} - -int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - if (!denom->magic) { - uint32_t absD = 1U << shift; - if (more & LIBDIVIDE_NEGATIVE_DIVISOR) { - absD = -absD; - } - return (int32_t)absD; - } else { - // Unsigned math is much easier - // We negate the magic number only in the branchfull case, and we don't - // know which case we're in. However we have enough information to - // determine the correct sign of the magic number. The divisor was - // negative if LIBDIVIDE_NEGATIVE_DIVISOR is set. If ADD_MARKER is set, - // the magic number's sign is opposite that of the divisor. - // We want to compute the positive magic number. - int negative_divisor = (more & LIBDIVIDE_NEGATIVE_DIVISOR); - int magic_was_negated = (more & LIBDIVIDE_ADD_MARKER) ? denom->magic > 0 : denom->magic < 0; - - // Handle the power of 2 case (including branchfree) - if (denom->magic == 0) { - int32_t result = 1U << shift; - return negative_divisor ? -result : result; - } - - uint32_t d = (uint32_t)(magic_was_negated ? -denom->magic : denom->magic); - uint64_t n = 1ULL << (32 + shift); // this shift cannot exceed 30 - uint32_t q = (uint32_t)(n / d); - int32_t result = (int32_t)q; - result += 1; - return negative_divisor ? -result : result; - } -} - -int32_t libdivide_s32_branchfree_recover(const struct libdivide_s32_branchfree_t *denom) { - return libdivide_s32_recover((const struct libdivide_s32_t *)denom); -} - -///////////// SINT64 - -static inline struct libdivide_s64_t libdivide_internal_s64_gen(int64_t d, int branchfree) { - if (d == 0) { - LIBDIVIDE_ERROR("divider must be != 0"); - } - - struct libdivide_s64_t result; - - // If d is a power of 2, or negative a power of 2, we have to use a shift. - // This is especially important because the magic algorithm fails for -1. - // To check if d is a power of 2 or its inverse, it suffices to check - // whether its absolute value has exactly one bit set. This works even for - // INT_MIN, because abs(INT_MIN) == INT_MIN, and INT_MIN has one bit set - // and is a power of 2. - uint64_t ud = (uint64_t)d; - uint64_t absD = (d < 0) ? -ud : ud; - uint32_t floor_log_2_d = 63 - libdivide_count_leading_zeros64(absD); - // check if exactly one bit is set, - // don't care if absD is 0 since that's divide by zero - if ((absD & (absD - 1)) == 0) { - // Branchfree and non-branchfree cases are the same - result.magic = 0; - result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); - } else { - // the dividend here is 2**(floor_log_2_d + 63), so the low 64 bit word - // is 0 and the high word is floor_log_2_d - 1 - uint8_t more; - uint64_t rem, proposed_m; - proposed_m = libdivide_128_div_64_to_64(1ULL << (floor_log_2_d - 1), 0, absD, &rem); - const uint64_t e = absD - rem; - - // We are going to start with a power of floor_log_2_d - 1. - // This works if works if e < 2**floor_log_2_d. - if (!branchfree && e < (1ULL << floor_log_2_d)) { - // This power works - more = floor_log_2_d - 1; - } else { - // We need to go one higher. This should not make proposed_m - // overflow, but it will make it negative when interpreted as an - // int32_t. - proposed_m += proposed_m; - const uint64_t twice_rem = rem + rem; - if (twice_rem >= absD || twice_rem < rem) proposed_m += 1; - // note that we only set the LIBDIVIDE_NEGATIVE_DIVISOR bit if we - // also set ADD_MARKER this is an annoying optimization that - // enables algorithm #4 to avoid the mask. However we always set it - // in the branchfree case - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - proposed_m += 1; - int64_t magic = (int64_t)proposed_m; - - // Mark if we are negative - if (d < 0) { - more |= LIBDIVIDE_NEGATIVE_DIVISOR; - if (!branchfree) { - magic = -magic; - } - } - - result.more = more; - result.magic = magic; - } - return result; -} - -struct libdivide_s64_t libdivide_s64_gen(int64_t d) { - return libdivide_internal_s64_gen(d, 0); -} - -struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d) { - struct libdivide_s64_t tmp = libdivide_internal_s64_gen(d, 1); - struct libdivide_s64_branchfree_t ret = {tmp.magic, tmp.more}; - return ret; -} - -int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - - if (!denom->magic) { // shift path - uint64_t mask = (1ULL << shift) - 1; - uint64_t uq = numer + ((numer >> 63) & mask); - int64_t q = (int64_t)uq; - q >>= shift; - // must be arithmetic shift and then sign-extend - int64_t sign = (int8_t)more >> 7; - q = (q ^ sign) - sign; - return q; - } else { - uint64_t uq = (uint64_t)libdivide_mullhi_s64(denom->magic, numer); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift and then sign extend - int64_t sign = (int8_t)more >> 7; - // q += (more < 0 ? -numer : numer) - // cast required to avoid UB - uq += ((uint64_t)numer ^ sign) - sign; - } - int64_t q = (int64_t)uq; - q >>= shift; - q += (q < 0); - return q; - } -} - -int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift and then sign extend - int64_t sign = (int8_t)more >> 7; - int64_t magic = denom->magic; - int64_t q = libdivide_mullhi_s64(magic, numer); - q += numer; - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is a power of - // 2, or (2**shift) if it is not a power of 2. - uint64_t is_power_of_2 = (magic == 0); - uint64_t q_sign = (uint64_t)(q >> 63); - q += q_sign & ((1ULL << shift) - is_power_of_2); - - // Arithmetic right shift - q >>= shift; - // Negate if needed - q = (q ^ sign) - sign; - - return q; -} - -int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - if (denom->magic == 0) { // shift path - uint64_t absD = 1ULL << shift; - if (more & LIBDIVIDE_NEGATIVE_DIVISOR) { - absD = -absD; - } - return (int64_t)absD; - } else { - // Unsigned math is much easier - int negative_divisor = (more & LIBDIVIDE_NEGATIVE_DIVISOR); - int magic_was_negated = (more & LIBDIVIDE_ADD_MARKER) ? denom->magic > 0 : denom->magic < 0; - - uint64_t d = (uint64_t)(magic_was_negated ? -denom->magic : denom->magic); - uint64_t n_hi = 1ULL << shift, n_lo = 0; - uint64_t rem_ignored; - uint64_t q = libdivide_128_div_64_to_64(n_hi, n_lo, d, &rem_ignored); - int64_t result = (int64_t)(q + 1); - if (negative_divisor) { - result = -result; - } - return result; - } -} - -int64_t libdivide_s64_branchfree_recover(const struct libdivide_s64_branchfree_t *denom) { - return libdivide_s64_recover((const struct libdivide_s64_t *)denom); -} - -#if defined(LIBDIVIDE_NEON) - -static inline uint32x4_t libdivide_u32_do_vec128( - uint32x4_t numers, const struct libdivide_u32_t *denom); -static inline int32x4_t libdivide_s32_do_vec128( - int32x4_t numers, const struct libdivide_s32_t *denom); -static inline uint64x2_t libdivide_u64_do_vec128( - uint64x2_t numers, const struct libdivide_u64_t *denom); -static inline int64x2_t libdivide_s64_do_vec128( - int64x2_t numers, const struct libdivide_s64_t *denom); - -static inline uint32x4_t libdivide_u32_branchfree_do_vec128( - uint32x4_t numers, const struct libdivide_u32_branchfree_t *denom); -static inline int32x4_t libdivide_s32_branchfree_do_vec128( - int32x4_t numers, const struct libdivide_s32_branchfree_t *denom); -static inline uint64x2_t libdivide_u64_branchfree_do_vec128( - uint64x2_t numers, const struct libdivide_u64_branchfree_t *denom); -static inline int64x2_t libdivide_s64_branchfree_do_vec128( - int64x2_t numers, const struct libdivide_s64_branchfree_t *denom); - -//////// Internal Utility Functions - -// Logical right shift by runtime value. -// NEON implements right shift as left shits by negative values. -static inline uint32x4_t libdivide_u32_neon_srl(uint32x4_t v, uint8_t amt) { - int32_t wamt = static_cast(amt); - return vshlq_u32(v, vdupq_n_s32(-wamt)); -} - -static inline uint64x2_t libdivide_u64_neon_srl(uint64x2_t v, uint8_t amt) { - int64_t wamt = static_cast(amt); - return vshlq_u64(v, vdupq_n_s64(-wamt)); -} - -// Arithmetic right shift by runtime value. -static inline int32x4_t libdivide_s32_neon_sra(int32x4_t v, uint8_t amt) { - int32_t wamt = static_cast(amt); - return vshlq_s32(v, vdupq_n_s32(-wamt)); -} - -static inline int64x2_t libdivide_s64_neon_sra(int64x2_t v, uint8_t amt) { - int64_t wamt = static_cast(amt); - return vshlq_s64(v, vdupq_n_s64(-wamt)); -} - -static inline int64x2_t libdivide_s64_signbits(int64x2_t v) { return vshrq_n_s64(v, 63); } - -static inline uint32x4_t libdivide_mullhi_u32_vec128(uint32x4_t a, uint32_t b) { - // Desire is [x0, x1, x2, x3] - uint32x4_t w1 = vreinterpretq_u32_u64(vmull_n_u32(vget_low_u32(a), b)); // [_, x0, _, x1] - uint32x4_t w2 = vreinterpretq_u32_u64(vmull_high_n_u32(a, b)); //[_, x2, _, x3] - return vuzp2q_u32(w1, w2); // [x0, x1, x2, x3] -} - -static inline int32x4_t libdivide_mullhi_s32_vec128(int32x4_t a, int32_t b) { - int32x4_t w1 = vreinterpretq_s32_s64(vmull_n_s32(vget_low_s32(a), b)); // [_, x0, _, x1] - int32x4_t w2 = vreinterpretq_s32_s64(vmull_high_n_s32(a, b)); //[_, x2, _, x3] - return vuzp2q_s32(w1, w2); // [x0, x1, x2, x3] -} - -static inline uint64x2_t libdivide_mullhi_u64_vec128(uint64x2_t x, uint64_t sy) { - // full 128 bits product is: - // x0*y0 + (x0*y1 << 32) + (x1*y0 << 32) + (x1*y1 << 64) - // Note x0,y0,x1,y1 are all conceptually uint32, products are 32x32->64. - - // Get low and high words. x0 contains low 32 bits, x1 is high 32 bits. - uint64x2_t y = vdupq_n_u64(sy); - uint32x2_t x0 = vmovn_u64(x); - uint32x2_t y0 = vmovn_u64(y); - uint32x2_t x1 = vshrn_n_u64(x, 32); - uint32x2_t y1 = vshrn_n_u64(y, 32); - - // Compute x0*y0. - uint64x2_t x0y0 = vmull_u32(x0, y0); - uint64x2_t x0y0_hi = vshrq_n_u64(x0y0, 32); - - // Compute other intermediate products. - uint64x2_t temp = vmlal_u32(x0y0_hi, x1, y0); // temp = x0y0_hi + x1*y0; - // We want to split temp into its low 32 bits and high 32 bits, both - // in the low half of 64 bit registers. - // Use shifts to avoid needing a reg for the mask. - uint64x2_t temp_lo = vshrq_n_u64(vshlq_n_u64(temp, 32), 32); // temp_lo = temp & 0xFFFFFFFF; - uint64x2_t temp_hi = vshrq_n_u64(temp, 32); // temp_hi = temp >> 32; - - temp_lo = vmlal_u32(temp_lo, x0, y1); // temp_lo += x0*y0 - temp_lo = vshrq_n_u64(temp_lo, 32); // temp_lo >>= 32 - temp_hi = vmlal_u32(temp_hi, x1, y1); // temp_hi += x1*y1 - uint64x2_t result = vaddq_u64(temp_hi, temp_lo); - return result; -} - -static inline int64x2_t libdivide_mullhi_s64_vec128(int64x2_t x, int64_t sy) { - int64x2_t p = vreinterpretq_s64_u64( - libdivide_mullhi_u64_vec128(vreinterpretq_u64_s64(x), static_cast(sy))); - int64x2_t y = vdupq_n_s64(sy); - int64x2_t t1 = vandq_s64(libdivide_s64_signbits(x), y); - int64x2_t t2 = vandq_s64(libdivide_s64_signbits(y), x); - p = vsubq_s64(p, t1); - p = vsubq_s64(p, t2); - return p; -} - -////////// UINT32 - -uint32x4_t libdivide_u32_do_vec128(uint32x4_t numers, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return libdivide_u32_neon_srl(numers, more); - } else { - uint32x4_t q = libdivide_mullhi_u32_vec128(numers, denom->magic); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - // Note we can use halving-subtract to avoid the shift. - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32x4_t t = vaddq_u32(vhsubq_u32(numers, q), q); - return libdivide_u32_neon_srl(t, shift); - } else { - return libdivide_u32_neon_srl(q, more); - } - } -} - -uint32x4_t libdivide_u32_branchfree_do_vec128( - uint32x4_t numers, const struct libdivide_u32_branchfree_t *denom) { - uint32x4_t q = libdivide_mullhi_u32_vec128(numers, denom->magic); - uint32x4_t t = vaddq_u32(vhsubq_u32(numers, q), q); - return libdivide_u32_neon_srl(t, denom->more); -} - -////////// UINT64 - -uint64x2_t libdivide_u64_do_vec128(uint64x2_t numers, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return libdivide_u64_neon_srl(numers, more); - } else { - uint64x2_t q = libdivide_mullhi_u64_vec128(numers, denom->magic); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - // No 64-bit halving subtracts in NEON :( - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64x2_t t = vaddq_u64(vshrq_n_u64(vsubq_u64(numers, q), 1), q); - return libdivide_u64_neon_srl(t, shift); - } else { - return libdivide_u64_neon_srl(q, more); - } - } -} - -uint64x2_t libdivide_u64_branchfree_do_vec128( - uint64x2_t numers, const struct libdivide_u64_branchfree_t *denom) { - uint64x2_t q = libdivide_mullhi_u64_vec128(numers, denom->magic); - uint64x2_t t = vaddq_u64(vshrq_n_u64(vsubq_u64(numers, q), 1), q); - return libdivide_u64_neon_srl(t, denom->more); -} - -////////// SINT32 - -int32x4_t libdivide_s32_do_vec128(int32x4_t numers, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32_t mask = (1U << shift) - 1; - int32x4_t roundToZeroTweak = vdupq_n_s32((int)mask); - // q = numer + ((numer >> 31) & roundToZeroTweak); - int32x4_t q = vaddq_s32(numers, vandq_s32(vshrq_n_s32(numers, 31), roundToZeroTweak)); - q = libdivide_s32_neon_sra(q, shift); - int32x4_t sign = vdupq_n_s32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = vsubq_s32(veorq_s32(q, sign), sign); - return q; - } else { - int32x4_t q = libdivide_mullhi_s32_vec128(numers, denom->magic); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - int32x4_t sign = vdupq_n_s32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = vaddq_s32(q, vsubq_s32(veorq_s32(numers, sign), sign)); - } - // q >>= shift - q = libdivide_s32_neon_sra(q, more & LIBDIVIDE_32_SHIFT_MASK); - q = vaddq_s32( - q, vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(q), 31))); // q += (q < 0) - return q; - } -} - -int32x4_t libdivide_s32_branchfree_do_vec128( - int32x4_t numers, const struct libdivide_s32_branchfree_t *denom) { - int32_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift - int32x4_t sign = vdupq_n_s32((int8_t)more >> 7); - int32x4_t q = libdivide_mullhi_s32_vec128(numers, magic); - q = vaddq_s32(q, numers); // q += numers - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - int32x4_t q_sign = vshrq_n_s32(q, 31); // q_sign = q >> 31 - int32x4_t mask = vdupq_n_s32((1U << shift) - is_power_of_2); - q = vaddq_s32(q, vandq_s32(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s32_neon_sra(q, shift); // q >>= shift - q = vsubq_s32(veorq_s32(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -////////// SINT64 - -int64x2_t libdivide_s64_do_vec128(int64x2_t numers, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { // shift path - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64_t mask = (1ULL << shift) - 1; - int64x2_t roundToZeroTweak = vdupq_n_s64(mask); // TODO: no need to sign extend - // q = numer + ((numer >> 63) & roundToZeroTweak); - int64x2_t q = - vaddq_s64(numers, vandq_s64(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_neon_sra(q, shift); - // q = (q ^ sign) - sign; - int64x2_t sign = vreinterpretq_s64_s8(vdupq_n_s8((int8_t)more >> 7)); - q = vsubq_s64(veorq_s64(q, sign), sign); - return q; - } else { - int64x2_t q = libdivide_mullhi_s64_vec128(numers, magic); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - int64x2_t sign = vdupq_n_s64((int8_t)more >> 7); // TODO: no need to widen - // q += ((numer ^ sign) - sign); - q = vaddq_s64(q, vsubq_s64(veorq_s64(numers, sign), sign)); - } - // q >>= denom->mult_path.shift - q = libdivide_s64_neon_sra(q, more & LIBDIVIDE_64_SHIFT_MASK); - q = vaddq_s64( - q, vreinterpretq_s64_u64(vshrq_n_u64(vreinterpretq_u64_s64(q), 63))); // q += (q < 0) - return q; - } -} - -int64x2_t libdivide_s64_branchfree_do_vec128( - int64x2_t numers, const struct libdivide_s64_branchfree_t *denom) { - int64_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift - int64x2_t sign = vdupq_n_s64((int8_t)more >> 7); // TODO: avoid sign extend - - // libdivide_mullhi_s64(numers, magic); - int64x2_t q = libdivide_mullhi_s64_vec128(numers, magic); - q = vaddq_s64(q, numers); // q += numers - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2. - uint32_t is_power_of_2 = (magic == 0); - int64x2_t q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 - int64x2_t mask = vdupq_n_s64((1ULL << shift) - is_power_of_2); - q = vaddq_s64(q, vandq_s64(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s64_neon_sra(q, shift); // q >>= shift - q = vsubq_s64(veorq_s64(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -#endif - -#if defined(LIBDIVIDE_AVX512) - -static inline __m512i libdivide_u32_do_vec512(__m512i numers, const struct libdivide_u32_t *denom); -static inline __m512i libdivide_s32_do_vec512(__m512i numers, const struct libdivide_s32_t *denom); -static inline __m512i libdivide_u64_do_vec512(__m512i numers, const struct libdivide_u64_t *denom); -static inline __m512i libdivide_s64_do_vec512(__m512i numers, const struct libdivide_s64_t *denom); - -static inline __m512i libdivide_u32_branchfree_do_vec512( - __m512i numers, const struct libdivide_u32_branchfree_t *denom); -static inline __m512i libdivide_s32_branchfree_do_vec512( - __m512i numers, const struct libdivide_s32_branchfree_t *denom); -static inline __m512i libdivide_u64_branchfree_do_vec512( - __m512i numers, const struct libdivide_u64_branchfree_t *denom); -static inline __m512i libdivide_s64_branchfree_do_vec512( - __m512i numers, const struct libdivide_s64_branchfree_t *denom); - -//////// Internal Utility Functions - -static inline __m512i libdivide_s64_signbits(__m512i v) { - ; - return _mm512_srai_epi64(v, 63); -} - -static inline __m512i libdivide_s64_shift_right_vec512(__m512i v, int amt) { - return _mm512_srai_epi64(v, amt); -} - -// Here, b is assumed to contain one 32-bit value repeated. -static inline __m512i libdivide_mullhi_u32_vec512(__m512i a, __m512i b) { - __m512i hi_product_0Z2Z = _mm512_srli_epi64(_mm512_mul_epu32(a, b), 32); - __m512i a1X3X = _mm512_srli_epi64(a, 32); - __m512i mask = _mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0); - __m512i hi_product_Z1Z3 = _mm512_and_si512(_mm512_mul_epu32(a1X3X, b), mask); - return _mm512_or_si512(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// b is one 32-bit value repeated. -static inline __m512i libdivide_mullhi_s32_vec512(__m512i a, __m512i b) { - __m512i hi_product_0Z2Z = _mm512_srli_epi64(_mm512_mul_epi32(a, b), 32); - __m512i a1X3X = _mm512_srli_epi64(a, 32); - __m512i mask = _mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0); - __m512i hi_product_Z1Z3 = _mm512_and_si512(_mm512_mul_epi32(a1X3X, b), mask); - return _mm512_or_si512(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// Here, y is assumed to contain one 64-bit value repeated. -static inline __m512i libdivide_mullhi_u64_vec512(__m512i x, __m512i y) { - // see m128i variant for comments. - __m512i x0y0 = _mm512_mul_epu32(x, y); - __m512i x0y0_hi = _mm512_srli_epi64(x0y0, 32); - - __m512i x1 = _mm512_shuffle_epi32(x, (_MM_PERM_ENUM)_MM_SHUFFLE(3, 3, 1, 1)); - __m512i y1 = _mm512_shuffle_epi32(y, (_MM_PERM_ENUM)_MM_SHUFFLE(3, 3, 1, 1)); - - __m512i x0y1 = _mm512_mul_epu32(x, y1); - __m512i x1y0 = _mm512_mul_epu32(x1, y); - __m512i x1y1 = _mm512_mul_epu32(x1, y1); - - __m512i mask = _mm512_set1_epi64(0xFFFFFFFF); - __m512i temp = _mm512_add_epi64(x1y0, x0y0_hi); - __m512i temp_lo = _mm512_and_si512(temp, mask); - __m512i temp_hi = _mm512_srli_epi64(temp, 32); - - temp_lo = _mm512_srli_epi64(_mm512_add_epi64(temp_lo, x0y1), 32); - temp_hi = _mm512_add_epi64(x1y1, temp_hi); - return _mm512_add_epi64(temp_lo, temp_hi); -} - -// y is one 64-bit value repeated. -static inline __m512i libdivide_mullhi_s64_vec512(__m512i x, __m512i y) { - __m512i p = libdivide_mullhi_u64_vec512(x, y); - __m512i t1 = _mm512_and_si512(libdivide_s64_signbits(x), y); - __m512i t2 = _mm512_and_si512(libdivide_s64_signbits(y), x); - p = _mm512_sub_epi64(p, t1); - p = _mm512_sub_epi64(p, t2); - return p; -} - -////////// UINT32 - -__m512i libdivide_u32_do_vec512(__m512i numers, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm512_srli_epi32(numers, more); - } else { - __m512i q = libdivide_mullhi_u32_vec512(numers, _mm512_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - __m512i t = _mm512_add_epi32(_mm512_srli_epi32(_mm512_sub_epi32(numers, q), 1), q); - return _mm512_srli_epi32(t, shift); - } else { - return _mm512_srli_epi32(q, more); - } - } -} - -__m512i libdivide_u32_branchfree_do_vec512( - __m512i numers, const struct libdivide_u32_branchfree_t *denom) { - __m512i q = libdivide_mullhi_u32_vec512(numers, _mm512_set1_epi32(denom->magic)); - __m512i t = _mm512_add_epi32(_mm512_srli_epi32(_mm512_sub_epi32(numers, q), 1), q); - return _mm512_srli_epi32(t, denom->more); -} - -////////// UINT64 - -__m512i libdivide_u64_do_vec512(__m512i numers, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm512_srli_epi64(numers, more); - } else { - __m512i q = libdivide_mullhi_u64_vec512(numers, _mm512_set1_epi64(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - __m512i t = _mm512_add_epi64(_mm512_srli_epi64(_mm512_sub_epi64(numers, q), 1), q); - return _mm512_srli_epi64(t, shift); - } else { - return _mm512_srli_epi64(q, more); - } - } -} - -__m512i libdivide_u64_branchfree_do_vec512( - __m512i numers, const struct libdivide_u64_branchfree_t *denom) { - __m512i q = libdivide_mullhi_u64_vec512(numers, _mm512_set1_epi64(denom->magic)); - __m512i t = _mm512_add_epi64(_mm512_srli_epi64(_mm512_sub_epi64(numers, q), 1), q); - return _mm512_srli_epi64(t, denom->more); -} - -////////// SINT32 - -__m512i libdivide_s32_do_vec512(__m512i numers, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32_t mask = (1U << shift) - 1; - __m512i roundToZeroTweak = _mm512_set1_epi32(mask); - // q = numer + ((numer >> 31) & roundToZeroTweak); - __m512i q = _mm512_add_epi32( - numers, _mm512_and_si512(_mm512_srai_epi32(numers, 31), roundToZeroTweak)); - q = _mm512_srai_epi32(q, shift); - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm512_sub_epi32(_mm512_xor_si512(q, sign), sign); - return q; - } else { - __m512i q = libdivide_mullhi_s32_vec512(numers, _mm512_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm512_add_epi32(q, _mm512_sub_epi32(_mm512_xor_si512(numers, sign), sign)); - } - // q >>= shift - q = _mm512_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); - q = _mm512_add_epi32(q, _mm512_srli_epi32(q, 31)); // q += (q < 0) - return q; - } -} - -__m512i libdivide_s32_branchfree_do_vec512( - __m512i numers, const struct libdivide_s32_branchfree_t *denom) { - int32_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - __m512i q = libdivide_mullhi_s32_vec512(numers, _mm512_set1_epi32(magic)); - q = _mm512_add_epi32(q, numers); // q += numers - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - __m512i q_sign = _mm512_srai_epi32(q, 31); // q_sign = q >> 31 - __m512i mask = _mm512_set1_epi32((1U << shift) - is_power_of_2); - q = _mm512_add_epi32(q, _mm512_and_si512(q_sign, mask)); // q = q + (q_sign & mask) - q = _mm512_srai_epi32(q, shift); // q >>= shift - q = _mm512_sub_epi32(_mm512_xor_si512(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -////////// SINT64 - -__m512i libdivide_s64_do_vec512(__m512i numers, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { // shift path - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64_t mask = (1ULL << shift) - 1; - __m512i roundToZeroTweak = _mm512_set1_epi64(mask); - // q = numer + ((numer >> 63) & roundToZeroTweak); - __m512i q = _mm512_add_epi64( - numers, _mm512_and_si512(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_shift_right_vec512(q, shift); - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm512_sub_epi64(_mm512_xor_si512(q, sign), sign); - return q; - } else { - __m512i q = libdivide_mullhi_s64_vec512(numers, _mm512_set1_epi64(magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm512_add_epi64(q, _mm512_sub_epi64(_mm512_xor_si512(numers, sign), sign)); - } - // q >>= denom->mult_path.shift - q = libdivide_s64_shift_right_vec512(q, more & LIBDIVIDE_64_SHIFT_MASK); - q = _mm512_add_epi64(q, _mm512_srli_epi64(q, 63)); // q += (q < 0) - return q; - } -} - -__m512i libdivide_s64_branchfree_do_vec512( - __m512i numers, const struct libdivide_s64_branchfree_t *denom) { - int64_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - - // libdivide_mullhi_s64(numers, magic); - __m512i q = libdivide_mullhi_s64_vec512(numers, _mm512_set1_epi64(magic)); - q = _mm512_add_epi64(q, numers); // q += numers - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2. - uint32_t is_power_of_2 = (magic == 0); - __m512i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 - __m512i mask = _mm512_set1_epi64((1ULL << shift) - is_power_of_2); - q = _mm512_add_epi64(q, _mm512_and_si512(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s64_shift_right_vec512(q, shift); // q >>= shift - q = _mm512_sub_epi64(_mm512_xor_si512(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -#endif - -#if defined(LIBDIVIDE_AVX2) - -static inline __m256i libdivide_u32_do_vec256(__m256i numers, const struct libdivide_u32_t *denom); -static inline __m256i libdivide_s32_do_vec256(__m256i numers, const struct libdivide_s32_t *denom); -static inline __m256i libdivide_u64_do_vec256(__m256i numers, const struct libdivide_u64_t *denom); -static inline __m256i libdivide_s64_do_vec256(__m256i numers, const struct libdivide_s64_t *denom); - -static inline __m256i libdivide_u32_branchfree_do_vec256( - __m256i numers, const struct libdivide_u32_branchfree_t *denom); -static inline __m256i libdivide_s32_branchfree_do_vec256( - __m256i numers, const struct libdivide_s32_branchfree_t *denom); -static inline __m256i libdivide_u64_branchfree_do_vec256( - __m256i numers, const struct libdivide_u64_branchfree_t *denom); -static inline __m256i libdivide_s64_branchfree_do_vec256( - __m256i numers, const struct libdivide_s64_branchfree_t *denom); - -//////// Internal Utility Functions - -// Implementation of _mm256_srai_epi64(v, 63) (from AVX512). -static inline __m256i libdivide_s64_signbits(__m256i v) { - __m256i hiBitsDuped = _mm256_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1)); - __m256i signBits = _mm256_srai_epi32(hiBitsDuped, 31); - return signBits; -} - -// Implementation of _mm256_srai_epi64 (from AVX512). -static inline __m256i libdivide_s64_shift_right_vec256(__m256i v, int amt) { - const int b = 64 - amt; - __m256i m = _mm256_set1_epi64x(1ULL << (b - 1)); - __m256i x = _mm256_srli_epi64(v, amt); - __m256i result = _mm256_sub_epi64(_mm256_xor_si256(x, m), m); - return result; -} - -// Here, b is assumed to contain one 32-bit value repeated. -static inline __m256i libdivide_mullhi_u32_vec256(__m256i a, __m256i b) { - __m256i hi_product_0Z2Z = _mm256_srli_epi64(_mm256_mul_epu32(a, b), 32); - __m256i a1X3X = _mm256_srli_epi64(a, 32); - __m256i mask = _mm256_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0); - __m256i hi_product_Z1Z3 = _mm256_and_si256(_mm256_mul_epu32(a1X3X, b), mask); - return _mm256_or_si256(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// b is one 32-bit value repeated. -static inline __m256i libdivide_mullhi_s32_vec256(__m256i a, __m256i b) { - __m256i hi_product_0Z2Z = _mm256_srli_epi64(_mm256_mul_epi32(a, b), 32); - __m256i a1X3X = _mm256_srli_epi64(a, 32); - __m256i mask = _mm256_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0); - __m256i hi_product_Z1Z3 = _mm256_and_si256(_mm256_mul_epi32(a1X3X, b), mask); - return _mm256_or_si256(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// Here, y is assumed to contain one 64-bit value repeated. -static inline __m256i libdivide_mullhi_u64_vec256(__m256i x, __m256i y) { - // see m128i variant for comments. - __m256i x0y0 = _mm256_mul_epu32(x, y); - __m256i x0y0_hi = _mm256_srli_epi64(x0y0, 32); - - __m256i x1 = _mm256_shuffle_epi32(x, _MM_SHUFFLE(3, 3, 1, 1)); - __m256i y1 = _mm256_shuffle_epi32(y, _MM_SHUFFLE(3, 3, 1, 1)); - - __m256i x0y1 = _mm256_mul_epu32(x, y1); - __m256i x1y0 = _mm256_mul_epu32(x1, y); - __m256i x1y1 = _mm256_mul_epu32(x1, y1); - - __m256i mask = _mm256_set1_epi64x(0xFFFFFFFF); - __m256i temp = _mm256_add_epi64(x1y0, x0y0_hi); - __m256i temp_lo = _mm256_and_si256(temp, mask); - __m256i temp_hi = _mm256_srli_epi64(temp, 32); - - temp_lo = _mm256_srli_epi64(_mm256_add_epi64(temp_lo, x0y1), 32); - temp_hi = _mm256_add_epi64(x1y1, temp_hi); - return _mm256_add_epi64(temp_lo, temp_hi); -} - -// y is one 64-bit value repeated. -static inline __m256i libdivide_mullhi_s64_vec256(__m256i x, __m256i y) { - __m256i p = libdivide_mullhi_u64_vec256(x, y); - __m256i t1 = _mm256_and_si256(libdivide_s64_signbits(x), y); - __m256i t2 = _mm256_and_si256(libdivide_s64_signbits(y), x); - p = _mm256_sub_epi64(p, t1); - p = _mm256_sub_epi64(p, t2); - return p; -} - -////////// UINT32 - -__m256i libdivide_u32_do_vec256(__m256i numers, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm256_srli_epi32(numers, more); - } else { - __m256i q = libdivide_mullhi_u32_vec256(numers, _mm256_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - __m256i t = _mm256_add_epi32(_mm256_srli_epi32(_mm256_sub_epi32(numers, q), 1), q); - return _mm256_srli_epi32(t, shift); - } else { - return _mm256_srli_epi32(q, more); - } - } -} - -__m256i libdivide_u32_branchfree_do_vec256( - __m256i numers, const struct libdivide_u32_branchfree_t *denom) { - __m256i q = libdivide_mullhi_u32_vec256(numers, _mm256_set1_epi32(denom->magic)); - __m256i t = _mm256_add_epi32(_mm256_srli_epi32(_mm256_sub_epi32(numers, q), 1), q); - return _mm256_srli_epi32(t, denom->more); -} - -////////// UINT64 - -__m256i libdivide_u64_do_vec256(__m256i numers, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm256_srli_epi64(numers, more); - } else { - __m256i q = libdivide_mullhi_u64_vec256(numers, _mm256_set1_epi64x(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - __m256i t = _mm256_add_epi64(_mm256_srli_epi64(_mm256_sub_epi64(numers, q), 1), q); - return _mm256_srli_epi64(t, shift); - } else { - return _mm256_srli_epi64(q, more); - } - } -} - -__m256i libdivide_u64_branchfree_do_vec256( - __m256i numers, const struct libdivide_u64_branchfree_t *denom) { - __m256i q = libdivide_mullhi_u64_vec256(numers, _mm256_set1_epi64x(denom->magic)); - __m256i t = _mm256_add_epi64(_mm256_srli_epi64(_mm256_sub_epi64(numers, q), 1), q); - return _mm256_srli_epi64(t, denom->more); -} - -////////// SINT32 - -__m256i libdivide_s32_do_vec256(__m256i numers, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32_t mask = (1U << shift) - 1; - __m256i roundToZeroTweak = _mm256_set1_epi32(mask); - // q = numer + ((numer >> 31) & roundToZeroTweak); - __m256i q = _mm256_add_epi32( - numers, _mm256_and_si256(_mm256_srai_epi32(numers, 31), roundToZeroTweak)); - q = _mm256_srai_epi32(q, shift); - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm256_sub_epi32(_mm256_xor_si256(q, sign), sign); - return q; - } else { - __m256i q = libdivide_mullhi_s32_vec256(numers, _mm256_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm256_add_epi32(q, _mm256_sub_epi32(_mm256_xor_si256(numers, sign), sign)); - } - // q >>= shift - q = _mm256_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); - q = _mm256_add_epi32(q, _mm256_srli_epi32(q, 31)); // q += (q < 0) - return q; - } -} - -__m256i libdivide_s32_branchfree_do_vec256( - __m256i numers, const struct libdivide_s32_branchfree_t *denom) { - int32_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - __m256i q = libdivide_mullhi_s32_vec256(numers, _mm256_set1_epi32(magic)); - q = _mm256_add_epi32(q, numers); // q += numers - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - __m256i q_sign = _mm256_srai_epi32(q, 31); // q_sign = q >> 31 - __m256i mask = _mm256_set1_epi32((1U << shift) - is_power_of_2); - q = _mm256_add_epi32(q, _mm256_and_si256(q_sign, mask)); // q = q + (q_sign & mask) - q = _mm256_srai_epi32(q, shift); // q >>= shift - q = _mm256_sub_epi32(_mm256_xor_si256(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -////////// SINT64 - -__m256i libdivide_s64_do_vec256(__m256i numers, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { // shift path - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64_t mask = (1ULL << shift) - 1; - __m256i roundToZeroTweak = _mm256_set1_epi64x(mask); - // q = numer + ((numer >> 63) & roundToZeroTweak); - __m256i q = _mm256_add_epi64( - numers, _mm256_and_si256(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_shift_right_vec256(q, shift); - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm256_sub_epi64(_mm256_xor_si256(q, sign), sign); - return q; - } else { - __m256i q = libdivide_mullhi_s64_vec256(numers, _mm256_set1_epi64x(magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm256_add_epi64(q, _mm256_sub_epi64(_mm256_xor_si256(numers, sign), sign)); - } - // q >>= denom->mult_path.shift - q = libdivide_s64_shift_right_vec256(q, more & LIBDIVIDE_64_SHIFT_MASK); - q = _mm256_add_epi64(q, _mm256_srli_epi64(q, 63)); // q += (q < 0) - return q; - } -} - -__m256i libdivide_s64_branchfree_do_vec256( - __m256i numers, const struct libdivide_s64_branchfree_t *denom) { - int64_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - - // libdivide_mullhi_s64(numers, magic); - __m256i q = libdivide_mullhi_s64_vec256(numers, _mm256_set1_epi64x(magic)); - q = _mm256_add_epi64(q, numers); // q += numers - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2. - uint32_t is_power_of_2 = (magic == 0); - __m256i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 - __m256i mask = _mm256_set1_epi64x((1ULL << shift) - is_power_of_2); - q = _mm256_add_epi64(q, _mm256_and_si256(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s64_shift_right_vec256(q, shift); // q >>= shift - q = _mm256_sub_epi64(_mm256_xor_si256(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -#endif - -#if defined(LIBDIVIDE_SSE2) - -static inline __m128i libdivide_u32_do_vec128(__m128i numers, const struct libdivide_u32_t *denom); -static inline __m128i libdivide_s32_do_vec128(__m128i numers, const struct libdivide_s32_t *denom); -static inline __m128i libdivide_u64_do_vec128(__m128i numers, const struct libdivide_u64_t *denom); -static inline __m128i libdivide_s64_do_vec128(__m128i numers, const struct libdivide_s64_t *denom); - -static inline __m128i libdivide_u32_branchfree_do_vec128( - __m128i numers, const struct libdivide_u32_branchfree_t *denom); -static inline __m128i libdivide_s32_branchfree_do_vec128( - __m128i numers, const struct libdivide_s32_branchfree_t *denom); -static inline __m128i libdivide_u64_branchfree_do_vec128( - __m128i numers, const struct libdivide_u64_branchfree_t *denom); -static inline __m128i libdivide_s64_branchfree_do_vec128( - __m128i numers, const struct libdivide_s64_branchfree_t *denom); - -//////// Internal Utility Functions - -// Implementation of _mm_srai_epi64(v, 63) (from AVX512). -static inline __m128i libdivide_s64_signbits(__m128i v) { - __m128i hiBitsDuped = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1)); - __m128i signBits = _mm_srai_epi32(hiBitsDuped, 31); - return signBits; -} - -// Implementation of _mm_srai_epi64 (from AVX512). -static inline __m128i libdivide_s64_shift_right_vec128(__m128i v, int amt) { - const int b = 64 - amt; - __m128i m = _mm_set1_epi64x(1ULL << (b - 1)); - __m128i x = _mm_srli_epi64(v, amt); - __m128i result = _mm_sub_epi64(_mm_xor_si128(x, m), m); - return result; -} - -// Here, b is assumed to contain one 32-bit value repeated. -static inline __m128i libdivide_mullhi_u32_vec128(__m128i a, __m128i b) { - __m128i hi_product_0Z2Z = _mm_srli_epi64(_mm_mul_epu32(a, b), 32); - __m128i a1X3X = _mm_srli_epi64(a, 32); - __m128i mask = _mm_set_epi32(-1, 0, -1, 0); - __m128i hi_product_Z1Z3 = _mm_and_si128(_mm_mul_epu32(a1X3X, b), mask); - return _mm_or_si128(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// SSE2 does not have a signed multiplication instruction, but we can convert -// unsigned to signed pretty efficiently. Again, b is just a 32 bit value -// repeated four times. -static inline __m128i libdivide_mullhi_s32_vec128(__m128i a, __m128i b) { - __m128i p = libdivide_mullhi_u32_vec128(a, b); - // t1 = (a >> 31) & y, arithmetic shift - __m128i t1 = _mm_and_si128(_mm_srai_epi32(a, 31), b); - __m128i t2 = _mm_and_si128(_mm_srai_epi32(b, 31), a); - p = _mm_sub_epi32(p, t1); - p = _mm_sub_epi32(p, t2); - return p; -} - -// Here, y is assumed to contain one 64-bit value repeated. -static inline __m128i libdivide_mullhi_u64_vec128(__m128i x, __m128i y) { - // full 128 bits product is: - // x0*y0 + (x0*y1 << 32) + (x1*y0 << 32) + (x1*y1 << 64) - // Note x0,y0,x1,y1 are all conceptually uint32, products are 32x32->64. - - // Compute x0*y0. - // Note x1, y1 are ignored by mul_epu32. - __m128i x0y0 = _mm_mul_epu32(x, y); - __m128i x0y0_hi = _mm_srli_epi64(x0y0, 32); - - // Get x1, y1 in the low bits. - // We could shuffle or right shift. Shuffles are preferred as they preserve - // the source register for the next computation. - __m128i x1 = _mm_shuffle_epi32(x, _MM_SHUFFLE(3, 3, 1, 1)); - __m128i y1 = _mm_shuffle_epi32(y, _MM_SHUFFLE(3, 3, 1, 1)); - - // No need to mask off top 32 bits for mul_epu32. - __m128i x0y1 = _mm_mul_epu32(x, y1); - __m128i x1y0 = _mm_mul_epu32(x1, y); - __m128i x1y1 = _mm_mul_epu32(x1, y1); - - // Mask here selects low bits only. - __m128i mask = _mm_set1_epi64x(0xFFFFFFFF); - __m128i temp = _mm_add_epi64(x1y0, x0y0_hi); - __m128i temp_lo = _mm_and_si128(temp, mask); - __m128i temp_hi = _mm_srli_epi64(temp, 32); - - temp_lo = _mm_srli_epi64(_mm_add_epi64(temp_lo, x0y1), 32); - temp_hi = _mm_add_epi64(x1y1, temp_hi); - return _mm_add_epi64(temp_lo, temp_hi); -} - -// y is one 64-bit value repeated. -static inline __m128i libdivide_mullhi_s64_vec128(__m128i x, __m128i y) { - __m128i p = libdivide_mullhi_u64_vec128(x, y); - __m128i t1 = _mm_and_si128(libdivide_s64_signbits(x), y); - __m128i t2 = _mm_and_si128(libdivide_s64_signbits(y), x); - p = _mm_sub_epi64(p, t1); - p = _mm_sub_epi64(p, t2); - return p; -} - -////////// UINT32 - -__m128i libdivide_u32_do_vec128(__m128i numers, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm_srli_epi32(numers, more); - } else { - __m128i q = libdivide_mullhi_u32_vec128(numers, _mm_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q); - return _mm_srli_epi32(t, shift); - } else { - return _mm_srli_epi32(q, more); - } - } -} - -__m128i libdivide_u32_branchfree_do_vec128( - __m128i numers, const struct libdivide_u32_branchfree_t *denom) { - __m128i q = libdivide_mullhi_u32_vec128(numers, _mm_set1_epi32(denom->magic)); - __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q); - return _mm_srli_epi32(t, denom->more); -} - -////////// UINT64 - -__m128i libdivide_u64_do_vec128(__m128i numers, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm_srli_epi64(numers, more); - } else { - __m128i q = libdivide_mullhi_u64_vec128(numers, _mm_set1_epi64x(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q); - return _mm_srli_epi64(t, shift); - } else { - return _mm_srli_epi64(q, more); - } - } -} - -__m128i libdivide_u64_branchfree_do_vec128( - __m128i numers, const struct libdivide_u64_branchfree_t *denom) { - __m128i q = libdivide_mullhi_u64_vec128(numers, _mm_set1_epi64x(denom->magic)); - __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q); - return _mm_srli_epi64(t, denom->more); -} - -////////// SINT32 - -__m128i libdivide_s32_do_vec128(__m128i numers, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32_t mask = (1U << shift) - 1; - __m128i roundToZeroTweak = _mm_set1_epi32(mask); - // q = numer + ((numer >> 31) & roundToZeroTweak); - __m128i q = - _mm_add_epi32(numers, _mm_and_si128(_mm_srai_epi32(numers, 31), roundToZeroTweak)); - q = _mm_srai_epi32(q, shift); - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm_sub_epi32(_mm_xor_si128(q, sign), sign); - return q; - } else { - __m128i q = libdivide_mullhi_s32_vec128(numers, _mm_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm_add_epi32(q, _mm_sub_epi32(_mm_xor_si128(numers, sign), sign)); - } - // q >>= shift - q = _mm_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); - q = _mm_add_epi32(q, _mm_srli_epi32(q, 31)); // q += (q < 0) - return q; - } -} - -__m128i libdivide_s32_branchfree_do_vec128( - __m128i numers, const struct libdivide_s32_branchfree_t *denom) { - int32_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - __m128i q = libdivide_mullhi_s32_vec128(numers, _mm_set1_epi32(magic)); - q = _mm_add_epi32(q, numers); // q += numers - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - __m128i q_sign = _mm_srai_epi32(q, 31); // q_sign = q >> 31 - __m128i mask = _mm_set1_epi32((1U << shift) - is_power_of_2); - q = _mm_add_epi32(q, _mm_and_si128(q_sign, mask)); // q = q + (q_sign & mask) - q = _mm_srai_epi32(q, shift); // q >>= shift - q = _mm_sub_epi32(_mm_xor_si128(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -////////// SINT64 - -__m128i libdivide_s64_do_vec128(__m128i numers, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { // shift path - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64_t mask = (1ULL << shift) - 1; - __m128i roundToZeroTweak = _mm_set1_epi64x(mask); - // q = numer + ((numer >> 63) & roundToZeroTweak); - __m128i q = - _mm_add_epi64(numers, _mm_and_si128(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_shift_right_vec128(q, shift); - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm_sub_epi64(_mm_xor_si128(q, sign), sign); - return q; - } else { - __m128i q = libdivide_mullhi_s64_vec128(numers, _mm_set1_epi64x(magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm_add_epi64(q, _mm_sub_epi64(_mm_xor_si128(numers, sign), sign)); - } - // q >>= denom->mult_path.shift - q = libdivide_s64_shift_right_vec128(q, more & LIBDIVIDE_64_SHIFT_MASK); - q = _mm_add_epi64(q, _mm_srli_epi64(q, 63)); // q += (q < 0) - return q; - } -} - -__m128i libdivide_s64_branchfree_do_vec128( - __m128i numers, const struct libdivide_s64_branchfree_t *denom) { - int64_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - - // libdivide_mullhi_s64(numers, magic); - __m128i q = libdivide_mullhi_s64_vec128(numers, _mm_set1_epi64x(magic)); - q = _mm_add_epi64(q, numers); // q += numers - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2. - uint32_t is_power_of_2 = (magic == 0); - __m128i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 - __m128i mask = _mm_set1_epi64x((1ULL << shift) - is_power_of_2); - q = _mm_add_epi64(q, _mm_and_si128(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s64_shift_right_vec128(q, shift); // q >>= shift - q = _mm_sub_epi64(_mm_xor_si128(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -#endif - -/////////// C++ stuff - -#ifdef __cplusplus - -enum Branching { - BRANCHFULL, // use branching algorithms - BRANCHFREE // use branchfree algorithms -}; - -#if defined(LIBDIVIDE_NEON) -// Helper to deduce NEON vector type for integral type. -template -struct NeonVecFor {}; - -template <> -struct NeonVecFor { - typedef uint32x4_t type; -}; - -template <> -struct NeonVecFor { - typedef int32x4_t type; -}; - -template <> -struct NeonVecFor { - typedef uint64x2_t type; -}; - -template <> -struct NeonVecFor { - typedef int64x2_t type; -}; -#endif - -// Versions of our algorithms for SIMD. -#if defined(LIBDIVIDE_NEON) -#define LIBDIVIDE_DIVIDE_NEON(ALGO, INT_TYPE) \ - typename NeonVecFor::type divide(typename NeonVecFor::type n) const { \ - return libdivide_##ALGO##_do_vec128(n, &denom); \ - } -#else -#define LIBDIVIDE_DIVIDE_NEON(ALGO, INT_TYPE) -#endif -#if defined(LIBDIVIDE_SSE2) -#define LIBDIVIDE_DIVIDE_SSE2(ALGO) \ - __m128i divide(__m128i n) const { return libdivide_##ALGO##_do_vec128(n, &denom); } -#else -#define LIBDIVIDE_DIVIDE_SSE2(ALGO) -#endif - -#if defined(LIBDIVIDE_AVX2) -#define LIBDIVIDE_DIVIDE_AVX2(ALGO) \ - __m256i divide(__m256i n) const { return libdivide_##ALGO##_do_vec256(n, &denom); } -#else -#define LIBDIVIDE_DIVIDE_AVX2(ALGO) -#endif - -#if defined(LIBDIVIDE_AVX512) -#define LIBDIVIDE_DIVIDE_AVX512(ALGO) \ - __m512i divide(__m512i n) const { return libdivide_##ALGO##_do_vec512(n, &denom); } -#else -#define LIBDIVIDE_DIVIDE_AVX512(ALGO) -#endif - -// The DISPATCHER_GEN() macro generates C++ methods (for the given integer -// and algorithm types) that redirect to libdivide's C API. -#define DISPATCHER_GEN(T, ALGO) \ - libdivide_##ALGO##_t denom; \ - dispatcher() {} \ - dispatcher(T d) : denom(libdivide_##ALGO##_gen(d)) {} \ - T divide(T n) const { return libdivide_##ALGO##_do(n, &denom); } \ - T recover() const { return libdivide_##ALGO##_recover(&denom); } \ - LIBDIVIDE_DIVIDE_NEON(ALGO, T) \ - LIBDIVIDE_DIVIDE_SSE2(ALGO) \ - LIBDIVIDE_DIVIDE_AVX2(ALGO) \ - LIBDIVIDE_DIVIDE_AVX512(ALGO) - -// The dispatcher selects a specific division algorithm for a given -// type and ALGO using partial template specialization. -template -struct dispatcher {}; - -template <> -struct dispatcher { - DISPATCHER_GEN(int32_t, s32) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(int32_t, s32_branchfree) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(uint32_t, u32) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(uint32_t, u32_branchfree) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(int64_t, s64) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(int64_t, s64_branchfree) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(uint64_t, u64) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(uint64_t, u64_branchfree) -}; - -// This is the main divider class for use by the user (C++ API). -// The actual division algorithm is selected using the dispatcher struct -// based on the integer and algorithm template parameters. -template -class divider { - public: - // We leave the default constructor empty so that creating - // an array of dividers and then initializing them - // later doesn't slow us down. - divider() {} - - // Constructor that takes the divisor as a parameter - divider(T d) : div(d) {} - - // Divides n by the divisor - T divide(T n) const { return div.divide(n); } - - // Recovers the divisor, returns the value that was - // used to initialize this divider object. - T recover() const { return div.recover(); } - - bool operator==(const divider &other) const { - return div.denom.magic == other.denom.magic && div.denom.more == other.denom.more; - } - - bool operator!=(const divider &other) const { return !(*this == other); } - - // Vector variants treat the input as packed integer values with the same type as the divider - // (e.g. s32, u32, s64, u64) and divides each of them by the divider, returning the packed - // quotients. -#if defined(LIBDIVIDE_SSE2) - __m128i divide(__m128i n) const { return div.divide(n); } -#endif -#if defined(LIBDIVIDE_AVX2) - __m256i divide(__m256i n) const { return div.divide(n); } -#endif -#if defined(LIBDIVIDE_AVX512) - __m512i divide(__m512i n) const { return div.divide(n); } -#endif -#if defined(LIBDIVIDE_NEON) - typename NeonVecFor::type divide(typename NeonVecFor::type n) const { - return div.divide(n); - } -#endif - - private: - // Storage for the actual divisor - dispatcher::value, std::is_signed::value, sizeof(T), ALGO> div; -}; - -// Overload of operator / for scalar division -template -T operator/(T n, const divider &div) { - return div.divide(n); -} - -// Overload of operator /= for scalar division -template -T &operator/=(T &n, const divider &div) { - n = div.divide(n); - return n; -} - -// Overloads for vector types. -#if defined(LIBDIVIDE_SSE2) -template -__m128i operator/(__m128i n, const divider &div) { - return div.divide(n); -} - -template -__m128i operator/=(__m128i &n, const divider &div) { - n = div.divide(n); - return n; -} -#endif -#if defined(LIBDIVIDE_AVX2) -template -__m256i operator/(__m256i n, const divider &div) { - return div.divide(n); -} - -template -__m256i operator/=(__m256i &n, const divider &div) { - n = div.divide(n); - return n; -} -#endif -#if defined(LIBDIVIDE_AVX512) -template -__m512i operator/(__m512i n, const divider &div) { - return div.divide(n); -} - -template -__m512i operator/=(__m512i &n, const divider &div) { - n = div.divide(n); - return n; -} -#endif - -#if defined(LIBDIVIDE_NEON) -template -uint32x4_t operator/(uint32x4_t n, const divider &div) { - return div.divide(n); -} - -template -int32x4_t operator/(int32x4_t n, const divider &div) { - return div.divide(n); -} - -template -uint64x2_t operator/(uint64x2_t n, const divider &div) { - return div.divide(n); -} - -template -int64x2_t operator/(int64x2_t n, const divider &div) { - return div.divide(n); -} - -template -uint32x4_t operator/=(uint32x4_t &n, const divider &div) { - n = div.divide(n); - return n; -} - -template -int32x4_t operator/=(int32x4_t &n, const divider &div) { - n = div.divide(n); - return n; -} - -template -uint64x2_t operator/=(uint64x2_t &n, const divider &div) { - n = div.divide(n); - return n; -} - -template -int64x2_t operator/=(int64x2_t &n, const divider &div) { - n = div.divide(n); - return n; -} -#endif - -#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900) -// libdivide::branchfree_divider -template -using branchfree_divider = divider; -#endif - -} // namespace libdivide - -#endif // __cplusplus - -#endif // LIBDIVIDE_H diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 7359e0a9402e..bf6f9bc9e499 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -116,6 +116,7 @@ function clone_submodules contrib/base64 contrib/cctz contrib/libcpuid + contrib/libdivide contrib/double-conversion contrib/llvm-project contrib/lz4 diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py index 6dc3d05d0512..2edf6ba3591d 100755 --- a/docker/test/style/process_style_check_result.py +++ b/docker/test/style/process_style_check_result.py @@ -19,6 +19,7 @@ def process_result(result_folder): "typos", "whitespaces", "workflows", + "submodules", "docs spelling", ) diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 80911bf86273..315efb9e6c44 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -10,7 +10,7 @@ echo "Check style" | ts echo "Check python formatting with black" | ts ./check-black -n |& tee /test_output/black_output.txt echo "Check python type hinting with mypy" | ts -./check-mypy -n |& tee /test_output/mypy_output.txt +./check-mypy -n |& tee /test_output/mypy_output.txt echo "Check typos" | ts ./check-typos |& tee /test_output/typos_output.txt echo "Check docs spelling" | ts @@ -19,6 +19,8 @@ echo "Check whitespaces" | ts ./check-whitespaces -n |& tee /test_output/whitespaces_output.txt echo "Check workflows" | ts ./check-workflows |& tee /test_output/workflows_output.txt +echo "Check submodules" | ts +./check-submodules |& tee /test_output/submodules_output.txt echo "Check shell scripts with shellcheck" | ts ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 7614a09c018b..23c14ab6c927 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -461,6 +461,9 @@ The `set` index can be used with all functions. Function subsets for other index | [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | | [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | | hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | +| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | +| hasTokenCaseInsensitive | ✗ | ✗ | ✗ | ✔ | ✗ | +| hasTokenCaseInsensitiveOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 6156a823d583..be8e26daa870 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1104,6 +1104,7 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %d | day of the month, zero-padded (01-31) | 02 | | %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 | | %e | day of the month, space-padded ( 1-31) |   2 | +| %f | fractional second from the fractional part of DateTime64 | 1234560 | | %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 | | %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 | | %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 | @@ -1143,6 +1144,20 @@ Result: └────────────────────────────────────────────┘ ``` +Query: + +``` sql +SELECT formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 7), '%f') +``` + +Result: + +``` +┌─formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 7), '%f')─┐ +│ 1234560 │ +└─────────────────────────────────────────────────────────────────────┘ +``` + ## dateName Returns specified part of date. diff --git a/programs/server/users.xml b/programs/server/users.xml index 5e2ff51bf4de..9810feb9a53c 100644 --- a/programs/server/users.xml +++ b/programs/server/users.xml @@ -5,6 +5,15 @@ + + random diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bd8b221e2ba9..77a7b8d7352d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -611,6 +611,7 @@ if (ENABLE_TESTS) ) target_link_libraries(unit_tests_dbms PRIVATE + ch_contrib::gmock_all ch_contrib::gtest_all clickhouse_functions clickhouse_aggregate_functions diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 09e44a3ac098..47b1bf10f8b5 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -320,7 +320,8 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - std::unique_ptr parser; + ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); + ParserKQLStatement kql_parser(end, global_context->getSettings().allow_settings_after_format_in_insert); ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -331,15 +332,24 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const Dialect & dialect = settings.dialect; - if (dialect == Dialect::kusto) - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); - else - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + auto begin = pos; if (is_interactive || ignore_error) { String message; - res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + if (dialect == Dialect::kusto) + res = tryParseQuery(kql_parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + else if (dialect == Dialect::kusto_auto) + { + res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + if (!res) + { + pos = begin; + res = tryParseQuery(kql_parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + } + } + else + res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) { @@ -349,7 +359,22 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + if (dialect == Dialect::kusto) + res = parseQueryAndMovePosition(kql_parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + else if (dialect == Dialect::kusto_auto) + { + try + { + res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + } + catch(...) + { + pos = begin; + res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + } + } + else + res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) @@ -2001,21 +2026,9 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) bool ClientBase::processQueryText(const String & text) { - auto trimmed_input = trim(text, [](char c) { return isWhitespaceASCII(c) || c == ';'; }); - - if (exit_strings.end() != exit_strings.find(trimmed_input)) + if (exit_strings.end() != exit_strings.find(trim(text, [](char c) { return isWhitespaceASCII(c) || c == ';'; }))) return false; - if (trimmed_input.starts_with("\\i")) - { - size_t skip_prefix_size = std::strlen("\\i"); - auto file_name = trim( - trimmed_input.substr(skip_prefix_size, trimmed_input.size() - skip_prefix_size), - [](char c) { return isWhitespaceASCII(c); }); - - return processMultiQueryFromFile(file_name); - } - if (!is_multiquery) { assert(!query_fuzzer_runs); @@ -2223,17 +2236,6 @@ void ClientBase::runInteractive() } -bool ClientBase::processMultiQueryFromFile(const String & file_name) -{ - String queries_from_file; - - ReadBufferFromFile in(file_name); - readStringUntilEOF(queries_from_file, in); - - return executeMultiQuery(queries_from_file); -} - - void ClientBase::runNonInteractive() { if (delayed_interactive) @@ -2241,13 +2243,23 @@ void ClientBase::runNonInteractive() if (!queries_files.empty()) { + auto process_multi_query_from_file = [&](const String & file) + { + String queries_from_file; + + ReadBufferFromFile in(file); + readStringUntilEOF(queries_from_file, in); + + return executeMultiQuery(queries_from_file); + }; + for (const auto & queries_file : queries_files) { for (const auto & interleave_file : interleave_queries_files) - if (!processMultiQueryFromFile(interleave_file)) + if (!process_multi_query_from_file(interleave_file)) return; - if (!processMultiQueryFromFile(queries_file)) + if (!process_multi_query_from_file(queries_file)) return; } diff --git a/src/Columns/FilterDescription.cpp b/src/Columns/FilterDescription.cpp index f8f4ee365ef0..4147801f21ee 100644 --- a/src/Columns/FilterDescription.cpp +++ b/src/Columns/FilterDescription.cpp @@ -78,7 +78,8 @@ FilterDescription::FilterDescription(const IColumn & column_) const NullMap & null_map = nullable_column->getNullMapData(); IColumn::Filter & res = concrete_column->getData(); - size_t size = res.size(); + const auto size = res.size(); + assert(size == null_map.size()); for (size_t i = 0; i < size; ++i) res[i] = res[i] && !null_map[i]; diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 7bac3f04fc66..fa1a10d22f20 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -148,31 +148,30 @@ std::vector Settings::getAllRegisteredNames() const void Settings::set(std::string_view name, const Field & value) { - BaseSettings::set(name, value); - if (name == "compatibility") - applyCompatibilitySetting(); + applyCompatibilitySetting(value.get()); /// If we change setting that was changed by compatibility setting before /// we should remove it from settings_changed_by_compatibility_setting, /// otherwise the next time we will change compatibility setting /// this setting will be changed too (and we don't want it). else if (settings_changed_by_compatibility_setting.contains(name)) settings_changed_by_compatibility_setting.erase(name); + + BaseSettings::set(name, value); } -void Settings::applyCompatibilitySetting() +void Settings::applyCompatibilitySetting(const String & compatibility_value) { /// First, revert all changes applied by previous compatibility setting for (const auto & setting_name : settings_changed_by_compatibility_setting) resetToDefault(setting_name); settings_changed_by_compatibility_setting.clear(); - String compatibility = getString("compatibility"); /// If setting value is empty, we don't need to change settings - if (compatibility.empty()) + if (compatibility_value.empty()) return; - ClickHouseVersion version(compatibility); + ClickHouseVersion version(compatibility_value); /// Iterate through ClickHouse version in descending order and apply reversed /// changes for each version that is higher that version from compatibility setting for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index acf11ca31481..f77651326bec 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -922,7 +922,7 @@ struct Settings : public BaseSettings, public IHints<2, Settings void setDefaultValue(const String & name) { resetToDefault(name); } private: - void applyCompatibilitySetting(); + void applyCompatibilitySetting(const String & compatibility); std::unordered_set settings_changed_by_compatibility_setting; }; diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index ee378b295fa6..0c637c6d345d 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -28,7 +29,8 @@ class ClickHouseVersion for (const auto & split_element : split) { size_t component; - if (!tryParse(component, split_element)) + ReadBufferFromString buf(split_element); + if (!tryReadIntText(component, buf) || !buf.eof()) throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; components.push_back(component); } @@ -78,41 +80,43 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { - {"22.12", {{"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, + {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, + {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, + {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, + {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, + {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, + {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, + {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, + {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, + {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, + {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, + {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, + {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, + {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, + {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, + {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, + {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, + {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, + {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, + {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, + {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, + {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, + {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, + {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, + {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, + {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, + {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, + {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, + {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, + {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, + {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, + {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, + {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, + {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, + {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, + {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, + {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, + {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; } diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 632587106a1a..19d43727cabb 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -161,5 +161,7 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, {{"clickhouse", Dialect::clickhouse}, - {"kusto", Dialect::kusto}}) + {"kusto", Dialect::kusto}, + {"kusto_auto", Dialect::kusto_auto}}) + } diff --git a/src/Core/SortCursor.h b/src/Core/SortCursor.h index abd3e3c85f82..fcc7c21b104e 100644 --- a/src/Core/SortCursor.h +++ b/src/Core/SortCursor.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h index 2634578f3252..07e3c70f41f8 100644 --- a/src/Core/callOnTypeIndex.h +++ b/src/Core/callOnTypeIndex.h @@ -157,6 +157,7 @@ inline bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f) class DataTypeDate; class DataTypeDate32; +class DataTypeInterval; class DataTypeString; class DataTypeFixedString; class DataTypeUUID; @@ -199,6 +200,8 @@ bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args) case TypeIndex::DateTime: return f(TypePair(), std::forward(args)...); case TypeIndex::DateTime64: return f(TypePair(), std::forward(args)...); + case TypeIndex::Interval: return f(TypePair(), std::forward(args)...); + case TypeIndex::String: return f(TypePair(), std::forward(args)...); case TypeIndex::FixedString: return f(TypePair(), std::forward(args)...); diff --git a/src/DataTypes/DataTypeInterval.h b/src/DataTypes/DataTypeInterval.h index 83d89a734606..d3d33f1280ba 100644 --- a/src/DataTypes/DataTypeInterval.h +++ b/src/DataTypes/DataTypeInterval.h @@ -25,7 +25,7 @@ class DataTypeInterval final : public DataTypeNumberBase IntervalKind getKind() const { return kind; } - explicit DataTypeInterval(IntervalKind kind_) : kind(kind_) {} + explicit DataTypeInterval(IntervalKind kind_ = {}) : kind(kind_) {} std::string doGetName() const override { return fmt::format("Interval{}", kind.toString()); } const char * getFamilyName() const override { return "Interval"; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 7440896b4c96..40972b0e8ef5 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -371,6 +371,7 @@ struct WhichDataType constexpr bool isDateTime() const { return idx == TypeIndex::DateTime; } constexpr bool isDateTime64() const { return idx == TypeIndex::DateTime64; } constexpr bool isDateOrDate32() const { return isDate() || isDate32(); } + constexpr bool isDateOrDate32OrDateTimeOrDateTime64() const { return isDate() || isDate32() || isDateTime() || isDateTime64(); } constexpr bool isString() const { return idx == TypeIndex::String; } constexpr bool isFixedString() const { return idx == TypeIndex::FixedString; } @@ -547,6 +548,7 @@ inline bool isAggregateFunction(const DataTypePtr & data_type) template constexpr bool IsDataTypeDecimal = false; template constexpr bool IsDataTypeNumber = false; template constexpr bool IsDataTypeDateOrDateTime = false; +template constexpr bool IsDataTypeInterval = false; template constexpr bool IsDataTypeEnum = false; template constexpr bool IsDataTypeDecimalOrNumber = IsDataTypeDecimal || IsDataTypeNumber; @@ -562,6 +564,8 @@ class DataTypeDate32; class DataTypeDateTime; class DataTypeDateTime64; +class DataTypeInterval; + template constexpr bool IsDataTypeDecimal> = true; template <> inline constexpr bool IsDataTypeDecimal = true; @@ -572,6 +576,8 @@ template <> inline constexpr bool IsDataTypeDateOrDateTime = tru template <> inline constexpr bool IsDataTypeDateOrDateTime = true; template <> inline constexpr bool IsDataTypeDateOrDateTime = true; +template <> inline constexpr bool IsDataTypeInterval = true; + template class DataTypeEnum; diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 6d5f6c427993..bc3894f6e1a0 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -32,7 +31,7 @@ String getExceptionMessage( template std::conditional_t, T> -getArgument(const ASTPtr & arguments, size_t argument_index, const char * argument_name [[maybe_unused]], const std::string context_data_type_name) +getArgument(const ASTPtr & arguments, size_t argument_index, const char * argument_name [[maybe_unused]], const std::string & context_data_type_name) { using NearestResultType = NearestFieldType; const auto field_type = Field::TypeToEnum::value; diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 93374f933b79..20a14e60db53 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -103,6 +103,8 @@ endif() add_subdirectory(JSONPath) list (APPEND PRIVATE_LIBS clickhouse_functions_jsonpath) +add_subdirectory(Kusto) +list (APPEND OBJECT_LIBS $) # Signed integer overflow on user-provided data inside boost::geometry - ignore. set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-sanitize=signed-integer-overflow) diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h index c8cef81333a7..de00e9397d6a 100644 --- a/src/Functions/CountSubstringsImpl.h +++ b/src/Functions/CountSubstringsImpl.h @@ -36,8 +36,12 @@ struct CountSubstringsImpl const ColumnString::Offsets & haystack_offsets, const std::string & needle, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + const UInt8 * const begin = haystack_data.data(); const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; @@ -104,8 +108,12 @@ struct CountSubstringsImpl std::string haystack, std::string needle, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + Impl::toLowerIfNeed(haystack); Impl::toLowerIfNeed(needle); @@ -138,8 +146,12 @@ struct CountSubstringsImpl const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + ColumnString::Offset prev_haystack_offset = 0; ColumnString::Offset prev_needle_offset = 0; @@ -191,10 +203,13 @@ struct CountSubstringsImpl const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { - /// NOTE You could use haystack indexing. But this is a rare case. + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + /// NOTE You could use haystack indexing. But this is a rare case. ColumnString::Offset prev_needle_offset = 0; size_t size = needle_offsets.size(); diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h index f8cffab9f486..e0a69290e7a9 100644 --- a/src/Functions/DivisionUtils.h +++ b/src/Functions/DivisionUtils.h @@ -70,6 +70,7 @@ struct DivideIntegralImpl { using ResultType = typename NumberTraits::ResultOfIntegerDivision::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template @@ -125,6 +126,7 @@ struct ModuloImpl using IntegerBType = typename NumberTraits::ToInteger::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 0a79ac3b0d99..4f40402f986e 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -99,6 +99,9 @@ template constexpr bool IsIntegralOrExtendedOrDecimal = IsIntegralOrExtended || IsDataTypeDecimal; +template constexpr bool IsInterval = false; +template <> inline constexpr bool IsInterval = true; + template constexpr bool IsFloatingPoint = false; template <> inline constexpr bool IsFloatingPoint = true; template <> inline constexpr bool IsFloatingPoint = true; @@ -120,6 +123,7 @@ struct BinaryOperationTraits { using T0 = typename LeftDataType::FieldType; using T1 = typename RightDataType::FieldType; + private: /// it's not correct for Decimal using Op = Operation; @@ -199,8 +203,6 @@ template static void NO_INLINE process(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t size, const NullMap * right_nullmap = nullptr) @@ -633,6 +635,8 @@ class FunctionBinaryArithmetic : public IFunction { static constexpr const bool is_plus = IsOperation::plus; static constexpr const bool is_minus = IsOperation::minus; + static constexpr const bool is_modulo = IsOperation::modulo; + static constexpr const bool is_modulo_or_zero = IsOperation::modulo_or_zero; static constexpr const bool is_multiply = IsOperation::multiply; static constexpr const bool is_division = IsOperation::division; @@ -670,8 +674,31 @@ class FunctionBinaryArithmetic : public IFunction }); } + static ColumnsWithTypeAndName switchArgumentOrder(const ColumnsWithTypeAndName & arguments) + { + auto new_arguments = arguments; + + /// Interval argument must be second. + if (isDateOrDate32(arguments[1].type) || isDateTime(arguments[1].type) || isDateTime64(arguments[1].type)) + std::swap(new_arguments[0], new_arguments[1]); + + /// Change interval argument type to its representation + if (WhichDataType(new_arguments[1].type).isInterval()) + new_arguments[1].type = std::make_shared>(); + + return new_arguments; + } + + static FunctionOverloadResolverPtr getFunctionForDateTimeArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, ContextPtr context) + { + if (isDateTime64(type0) && isDateTime64(type1) && is_minus) + return FunctionFactory::instance().get("dateTime64Diff", context); + + return {}; + } + static FunctionOverloadResolverPtr - getFunctionForIntervalArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, ContextPtr context) + getFunctionForDateTimeIntervalArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, ContextPtr context) { bool first_is_date_or_datetime = isDateOrDate32(type0) || isDateTime(type0) || isDateTime64(type0); bool second_is_date_or_datetime = isDateOrDate32(type1) || isDateTime(type1) || isDateTime64(type1); @@ -982,18 +1009,37 @@ class FunctionBinaryArithmetic : public IFunction ColumnPtr executeDateTimeIntervalPlusMinus(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const FunctionOverloadResolverPtr & function_builder) const { - ColumnsWithTypeAndName new_arguments = arguments; + const auto new_arguments = switchArgumentOrder(arguments); + auto function = function_builder->build(new_arguments); + return function->execute(new_arguments, result_type, input_rows_count); + } - /// Interval argument must be second. - if (isDateOrDate32(arguments[1].type) || isDateTime(arguments[1].type) || isDateTime64(arguments[1].type)) - std::swap(new_arguments[0], new_arguments[1]); + ColumnPtr + executeInterval(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const + { + const auto convert_argument = [this, &input_rows_count](const ColumnWithTypeAndName & argument) + { + if (const WhichDataType which_data_type(*argument.type); which_data_type.isInterval()) + { + const ColumnsWithTypeAndName conversion_args{ + argument, + createConstColumnWithTypeAndName( + DataTypeNumber().getName(), "target_type")}; - /// Change interval argument type to its representation - if (WhichDataType(new_arguments[1].type).isInterval()) - new_arguments[1].type = std::make_shared>(); + const auto converted = executeFunctionCall(context, "cast", conversion_args, input_rows_count); + return asArgument(converted, argument.name); + } - auto function = function_builder->build(new_arguments); - return function->execute(new_arguments, result_type, input_rows_count); + return argument; + }; + + const ColumnsWithTypeAndName adjusted_args{convert_argument(arguments.front()), convert_argument(arguments.back())}; + const auto intermediate = executeFunctionCall(context, name, adjusted_args, input_rows_count); + + const ColumnsWithTypeAndName conversion_args + = {asArgument(intermediate, "intermediate"), + createConstColumnWithTypeAndName(result_type->getName(), "target_type")}; + return executeFunctionCall(context, "accurateCastOrNull", conversion_args, input_rows_count).first; } ColumnPtr executeDateTimeTupleOfIntervalsPlusMinus(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, @@ -1209,21 +1255,23 @@ class FunctionBinaryArithmetic : public IFunction return arguments[0]; } - /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval. - if (auto function_builder = getFunctionForIntervalArithmetic(arguments[0], arguments[1], context)) + if (auto function_builder = getFunctionForDateTimeArithmetic(arguments[0], arguments[1], context)) { ColumnsWithTypeAndName new_arguments(2); - for (size_t i = 0; i < 2; ++i) new_arguments[i].type = arguments[i]; - /// Interval argument must be second. - if (isDateOrDate32(new_arguments[1].type) || isDateTime(new_arguments[1].type) || isDateTime64(new_arguments[1].type)) - std::swap(new_arguments[0], new_arguments[1]); + return function_builder->build(new_arguments)->getResultType(); + } - /// Change interval argument to its representation - new_arguments[1].type = std::make_shared>(); + /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval. + if (auto function_builder = getFunctionForDateTimeIntervalArithmetic(arguments[0], arguments[1], context)) + { + ColumnsWithTypeAndName new_arguments(2); + for (size_t i = 0; i < 2; ++i) + new_arguments[i].type = arguments[i]; + new_arguments = switchArgumentOrder(new_arguments); auto function = function_builder->build(new_arguments); return function->getResultType(); } @@ -1290,6 +1338,7 @@ class FunctionBinaryArithmetic : public IFunction { using LeftDataType = std::decay_t; using RightDataType = std::decay_t; + using ConcreteOp = Op; if constexpr ((std::is_same_v || std::is_same_v) || (std::is_same_v || std::is_same_v)) @@ -1297,7 +1346,7 @@ class FunctionBinaryArithmetic : public IFunction if constexpr (std::is_same_v && std::is_same_v) { - if constexpr (!Op::allow_fixed_string) + if constexpr (!ConcreteOp::allow_fixed_string) return false; else { @@ -1309,7 +1358,7 @@ class FunctionBinaryArithmetic : public IFunction } } - if constexpr (!Op::allow_string_integer) + if constexpr (!ConcreteOp::allow_string_integer) return false; else if constexpr (!IsIntegral) return false; @@ -1319,21 +1368,6 @@ class FunctionBinaryArithmetic : public IFunction type_res = std::make_shared(); return true; } - else if constexpr (std::is_same_v || std::is_same_v) - { - if constexpr (std::is_same_v && - std::is_same_v) - { - if constexpr (is_plus || is_minus) - { - if (left.getKind() == right.getKind()) - { - type_res = std::make_shared(left.getKind()); - return true; - } - } - } - } else { using ResultDataType = typename BinaryOperationTraits::ResultDataType; @@ -1380,6 +1414,42 @@ class FunctionBinaryArithmetic : public IFunction tz = &left; type_res = std::make_shared(*tz); } + else if constexpr (IsInterval || IsInterval) + { + if constexpr (!ConcreteOp::allow_interval) + return false; + + const auto nested_type = std::invoke( + [&]() -> std::shared_ptr + { + static constexpr auto is_left_interval = IsInterval; + static constexpr auto is_right_interval = IsInterval; + if constexpr ( + is_left_interval && !is_right_interval + && (is_division || is_modulo || is_modulo_or_zero || is_multiply)) + return std::make_shared(left.getKind()); + else if constexpr (!is_left_interval && is_right_interval && is_multiply) + return std::make_shared(right.getKind()); + else if constexpr ( + is_left_interval && is_right_interval + && (is_division || is_minus || is_modulo || is_modulo_or_zero || is_plus)) + { + if (left.getKind() != right.getKind()) + return {}; + else if constexpr (!is_division) + return std::make_shared(left.getKind()); + else + return std::make_shared(); + } + else + return {}; + }); + + if (nested_type) + type_res = makeNullable(nested_type); + + return static_cast(nested_type); + } else type_res = std::make_shared(); return true; @@ -1709,8 +1779,11 @@ class FunctionBinaryArithmetic : public IFunction return executeAggregateAddition(arguments, result_type, input_rows_count); } + if (auto function_builder = getFunctionForDateTimeArithmetic(arguments[0].type, arguments[1].type, context)) + return function_builder->build(arguments)->execute(arguments, result_type, input_rows_count); + /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval. - if (auto function_builder = getFunctionForIntervalArithmetic(arguments[0].type, arguments[1].type, context)) + if (auto function_builder = getFunctionForDateTimeIntervalArithmetic(arguments[0].type, arguments[1].type, context)) { return executeDateTimeIntervalPlusMinus(arguments, result_type, input_rows_count, function_builder); } @@ -1770,6 +1843,7 @@ class FunctionBinaryArithmetic : public IFunction { using LeftDataType = std::decay_t; using RightDataType = std::decay_t; + using ConcreteOp = Op; if constexpr ((std::is_same_v || std::is_same_v) || (std::is_same_v || std::is_same_v)) @@ -1777,13 +1851,13 @@ class FunctionBinaryArithmetic : public IFunction if constexpr (std::is_same_v && std::is_same_v) { - if constexpr (!Op::allow_fixed_string) + if constexpr (!ConcreteOp::allow_fixed_string) return false; else return (res = executeFixedString(arguments)) != nullptr; } - if constexpr (!Op::allow_string_integer) + if constexpr (!ConcreteOp::allow_string_integer) return false; else if constexpr (!IsIntegral) return false; @@ -1794,6 +1868,13 @@ class FunctionBinaryArithmetic : public IFunction else if constexpr (std::is_same_v) return (res = executeStringInteger(arguments, left, right)) != nullptr; } + else if constexpr (IsInterval || IsInterval) + { + if constexpr (!ConcreteOp::allow_interval) + return false; + + return (res = executeInterval(arguments, result_type, input_rows_count)) != nullptr; + } else return (res = executeNumeric(arguments, left, right, right_nullmap)) != nullptr; }); diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 74045cd9f12c..4fce4aa48565 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -1,3 +1,5 @@ +#include "FunctionFactory.h" + #include #include #include @@ -318,4 +320,16 @@ bool isDecimalOrNullableDecimal(const DataTypePtr & type) return isDecimal(assert_cast(type.get())->getNestedType()); } +std::pair executeFunctionCall( + const ContextPtr & context, const std::string & name, const ColumnsWithTypeAndName & arguments, const size_t input_rows_count) +{ + const auto function = FunctionFactory::instance().get(name, context)->build(arguments); + const auto & result_data_type = function->getResultType(); + return {function->execute(arguments, result_data_type, input_rows_count), result_data_type}; +} + +ColumnWithTypeAndName asArgument(const std::pair & column_with_type, const std::string_view name) +{ + return {column_with_type.first, column_with_type.second, std::string(name)}; +} } diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 18a4e5840809..670fa005f0f2 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB @@ -174,4 +175,15 @@ struct NullPresence NullPresence getNullPresense(const ColumnsWithTypeAndName & args); bool isDecimalOrNullableDecimal(const DataTypePtr & type); + +template +ColumnWithTypeAndName createConstColumnWithTypeAndName(const typename T::FieldType & value, const std::string & name, Args&&... args) +{ + return {T().createColumnConst(1, toField(value)), std::make_shared(std::forward(args)...), name}; +} + +std::pair executeFunctionCall( + const ContextPtr & context, const std::string & name, const ColumnsWithTypeAndName & arguments, size_t input_rows_count); + +ColumnWithTypeAndName asArgument(const std::pair & column_with_type, std::string_view name); } diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h index befab1e0c91e..a0ae5b63a6e0 100644 --- a/src/Functions/FunctionUnaryArithmetic.h +++ b/src/Functions/FunctionUnaryArithmetic.h @@ -213,11 +213,12 @@ class FunctionUnaryArithmetic : public IFunction return false; result = std::make_shared(type.getN()); } - else if constexpr (std::is_same_v) + else if constexpr (std::is_same_v && !is_sign_function) { - if constexpr (!IsUnaryOperation::negate) + if constexpr (!Op::allow_interval) return false; - result = std::make_shared(type.getKind()); + + result = std::make_shared(type.getKind()); } else { diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index d607af54fcd0..81a0a1a42bc1 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -111,17 +111,17 @@ REGISTER_FUNCTION(Conversion) factory.registerFunction(); factory.registerFunction(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index c9638ab95afd..68c19f6b8bb6 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2370,6 +2370,17 @@ using FunctionToDate32 = FunctionConvert; using FunctionToDateTime32 = FunctionConvert; using FunctionToDateTime64 = FunctionConvert; +using FunctionToIntervalNanosecond = FunctionConvert; +using FunctionToIntervalMicrosecond = FunctionConvert; +using FunctionToIntervalMillisecond = FunctionConvert; +using FunctionToIntervalSecond = FunctionConvert; +using FunctionToIntervalMinute = FunctionConvert; +using FunctionToIntervalHour = FunctionConvert; +using FunctionToIntervalDay = FunctionConvert; +using FunctionToIntervalWeek = FunctionConvert; +using FunctionToIntervalMonth = FunctionConvert; +using FunctionToIntervalQuarter = FunctionConvert; +using FunctionToIntervalYear = FunctionConvert; using FunctionToUUID = FunctionConvert>; using FunctionToString = FunctionConvert; using FunctionToUnixTimestamp = FunctionConvert>; @@ -2720,8 +2731,8 @@ class FunctionCast final : public FunctionCastBase { TypeIndex from_type_index = from_type->getTypeId(); WhichDataType which(from_type_index); - bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull) - && (which.isInt() || which.isUInt() || which.isFloat()); + const bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull) + && (which.isInt() || which.isUInt() || which.isFloat() || which.isInterval()); if (requested_result_is_nullable && checkAndGetDataType(from_type.get())) { @@ -2733,8 +2744,27 @@ class FunctionCast final : public FunctionCastBase } else if (!can_apply_accurate_cast) { - FunctionPtr function = FunctionTo::Type::create(); - return createFunctionAdaptor(function, from_type); + if constexpr (std::is_same_v) + { + const auto to_interval_function = std::invoke( + [interval_kind = to_type->getKind()] + { + switch (interval_kind) + { +#define DECLARE_CASE(NAME) \ + case IntervalKind::NAME: \ + return FunctionToInterval##NAME::create(); + FOR_EACH_INTERVAL_KIND(DECLARE_CASE) +#undef DECLARE_CASE + } + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected interval kind"); + }); + + return createFunctionAdaptor(to_interval_function, from_type); + } + else + return createFunctionAdaptor(FunctionTo::Type::create(), from_type); } auto wrapper_cast_type = cast_type; @@ -2748,7 +2778,8 @@ class FunctionCast final : public FunctionCastBase using LeftDataType = typename Types::LeftType; using RightDataType = typename Types::RightType; - if constexpr (IsDataTypeNumber && IsDataTypeNumber) + if constexpr ((IsDataTypeNumber || IsDataTypeInterval) + && (IsDataTypeNumber || IsDataTypeInterval)) { if (wrapper_cast_type == CastType::accurate) { @@ -3760,6 +3791,7 @@ class FunctionCast final : public FunctionCastBase std::is_same_v || std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v) { ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); diff --git a/src/Functions/FunctionsStringSearch.h b/src/Functions/FunctionsStringSearch.h index d8da525e63a4..c9de29697bfc 100644 --- a/src/Functions/FunctionsStringSearch.h +++ b/src/Functions/FunctionsStringSearch.h @@ -1,10 +1,12 @@ #pragma once #include +#include #include #include #include #include +#include #include #include #include @@ -61,11 +63,18 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -template +enum class ExecutionErrorPolicy +{ + Null, + Throw +}; + +template class FunctionsStringSearch : public IFunction { public: static constexpr auto name = Impl::name; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } @@ -117,7 +126,11 @@ class FunctionsStringSearch : public IFunction arguments[2]->getName(), getName()); } - return std::make_shared>(); + auto return_type = std::make_shared>(); + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + return makeNullable(return_type); + + return return_type; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override @@ -133,21 +146,31 @@ class FunctionsStringSearch : public IFunction const ColumnConst * col_needle_const = typeid_cast(&*column_needle); using ResultType = typename Impl::ResultType; + auto col_res = ColumnVector::create(); + auto & vec_res = col_res->getData(); + + const auto create_null_map = [&]() -> ColumnUInt8::MutablePtr + { + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + return ColumnUInt8::create(vec_res.size()); + + return {}; + }; if constexpr (!Impl::use_default_implementation_for_constants) { - bool is_col_start_pos_const = column_start_pos == nullptr || isColumnConst(*column_start_pos); if (col_haystack_const && col_needle_const) { - auto col_res = ColumnVector::create(); - typename ColumnVector::Container & vec_res = col_res->getData(); + const auto is_col_start_pos_const = !column_start_pos || isColumnConst(*column_start_pos); vec_res.resize(is_col_start_pos_const ? 1 : column_start_pos->size()); + const auto null_map = create_null_map(); Impl::constantConstant( col_haystack_const->getValue(), col_needle_const->getValue(), column_start_pos, - vec_res); + vec_res, + null_map.get()); if (is_col_start_pos_const) return result_type->createColumnConst(col_haystack_const->size(), toField(vec_res[0])); @@ -156,10 +179,8 @@ class FunctionsStringSearch : public IFunction } } - auto col_res = ColumnVector::create(); - - typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(column_haystack->size()); + auto null_map = create_null_map(); const ColumnString * col_haystack_vector = checkAndGetColumn(&*column_haystack); const ColumnFixedString * col_haystack_vector_fixed = checkAndGetColumn(&*column_haystack); @@ -172,14 +193,16 @@ class FunctionsStringSearch : public IFunction col_needle_vector->getChars(), col_needle_vector->getOffsets(), column_start_pos, - vec_res); + vec_res, + null_map.get()); else if (col_haystack_vector && col_needle_const) Impl::vectorConstant( col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needle_const->getValue(), column_start_pos, - vec_res); + vec_res, + null_map.get()); else if (col_haystack_vector_fixed && col_needle_vector) Impl::vectorFixedVector( col_haystack_vector_fixed->getChars(), @@ -187,20 +210,23 @@ class FunctionsStringSearch : public IFunction col_needle_vector->getChars(), col_needle_vector->getOffsets(), column_start_pos, - vec_res); + vec_res, + null_map.get()); else if (col_haystack_vector_fixed && col_needle_const) Impl::vectorFixedConstant( col_haystack_vector_fixed->getChars(), col_haystack_vector_fixed->getN(), col_needle_const->getValue(), - vec_res); + vec_res, + null_map.get()); else if (col_haystack_const && col_needle_vector) Impl::constantVector( col_haystack_const->getValue(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), column_start_pos, - vec_res); + vec_res, + null_map.get()); else throw Exception( ErrorCodes::ILLEGAL_COLUMN, @@ -209,6 +235,9 @@ class FunctionsStringSearch : public IFunction arguments[1].column->getName(), getName()); + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + return ColumnNullable::create(std::move(col_res), std::move(null_map)); + return col_res; } }; diff --git a/src/Functions/FunctionsStringSearchToString.h b/src/Functions/FunctionsStringSearchToString.h index 6d2d5efdb3dd..44293f60c37a 100644 --- a/src/Functions/FunctionsStringSearchToString.h +++ b/src/Functions/FunctionsStringSearchToString.h @@ -86,4 +86,64 @@ class FunctionsStringSearchToString : public IFunction } }; + +template +class KqlStringSearchToString : public IFunction +{ +public: + static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 3; } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception( + "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!isString(arguments[1])) + throw Exception( + "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!isUnsignedInteger(arguments[2])) + throw Exception( + "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr column = arguments[0].column; + const ColumnPtr column_needle = arguments[1].column; + const auto capture = arguments[2].column->getUInt(0); + + const ColumnConst * col_needle = typeid_cast(&*column_needle); + if (!col_needle) + throw Exception("Second argument of function " + getName() + " must be constant string", ErrorCodes::ILLEGAL_COLUMN); + + if (const ColumnString * col = checkAndGetColumn(column.get())) + { + auto col_res = ColumnString::create(); + + ColumnString::Chars & vec_res = col_res->getChars(); + ColumnString::Offsets & offsets_res = col_res->getOffsets(); + Impl::vector(col->getChars(), col->getOffsets(), col_needle->getValue(), static_cast(capture), vec_res, offsets_res); + + return col_res; + } + else + throw Exception( + "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; } diff --git a/src/Functions/FunctionsVisitParam.h b/src/Functions/FunctionsVisitParam.h index 5f86923b0d10..5e13fbbad5ce 100644 --- a/src/Functions/FunctionsVisitParam.h +++ b/src/Functions/FunctionsVisitParam.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -91,8 +92,12 @@ struct ExtractParamImpl const ColumnString::Offsets & haystack_offsets, std::string needle, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + if (start_pos != nullptr) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' doesn't support start_pos argument", name); diff --git a/src/Functions/GCDLCMImpl.h b/src/Functions/GCDLCMImpl.h index fbe1f6a1a90b..7a4b2840d3a8 100644 --- a/src/Functions/GCDLCMImpl.h +++ b/src/Functions/GCDLCMImpl.h @@ -23,6 +23,7 @@ struct GCDLCMImpl { using ResultType = typename NumberTraits::ResultOfAdditionMultiplication::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index 9328bd99139b..18e629b58fb2 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -24,14 +24,15 @@ struct HasTokenImpl static constexpr bool supports_start_pos = false; static constexpr auto name = Name::name; - static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1, 2};} + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1, 2}; } static void vectorConstant( const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, const std::string & pattern, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + ColumnUInt8 * res_null) { if (start_pos != nullptr) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name); @@ -43,34 +44,51 @@ struct HasTokenImpl const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - /// The current index in the array of strings. - size_t i = 0; - - TokenSearcher searcher(pattern.data(), pattern.size(), end - pos); - - /// We will search for the next occurrence in all rows at once. - while (pos < end && end != (pos = searcher.search(pos, end - pos))) + try { - /// Let's determine which index it refers to. - while (begin + haystack_offsets[i] <= pos) + /// Parameter `pattern` is supposed to be a literal of letters and/or numbers. + /// Otherwise, an exception from the constructor of `TokenSearcher` is thrown. + /// If no exception is thrown at that point, then no further error cases may occur. + TokenSearcher searcher(pattern.data(), pattern.size(), end - pos); + if (res_null) + std::ranges::fill(res_null->getData(), false); + + /// The current index in the array of strings. + size_t i = 0; + /// We will search for the next occurrence in all rows at once. + while (pos < end && end != (pos = searcher.search(pos, end - pos))) { - res[i] = negate; + /// Let's determine which index it refers to. + while (begin + haystack_offsets[i] <= pos) + { + res[i] = negate; + ++i; + } + + /// We check that the entry does not pass through the boundaries of strings. + if (pos + pattern.size() < begin + haystack_offsets[i]) + res[i] = !negate; + else + res[i] = negate; + + pos = begin + haystack_offsets[i]; ++i; } - /// We check that the entry does not pass through the boundaries of strings. - if (pos + pattern.size() < begin + haystack_offsets[i]) - res[i] = !negate; + /// Tail, in which there can be no substring. + if (i < res.size()) + memset(&res[i], negate, (res.size() - i) * sizeof(res[0])); + } + catch (...) + { + if (!res_null) + throw; else - res[i] = negate; - - pos = begin + haystack_offsets[i]; - ++i; + { + std::ranges::fill(res, 0); + std::ranges::fill(res_null->getData(), true); + } } - - /// Tail, in which there can be no substring. - if (i < res.size()) - memset(&res[i], negate, (res.size() - i) * sizeof(res[0])); } template diff --git a/src/Functions/IsOperation.h b/src/Functions/IsOperation.h index 39f9114f5e09..830d77f58e35 100644 --- a/src/Functions/IsOperation.h +++ b/src/Functions/IsOperation.h @@ -15,6 +15,7 @@ template struct DivideIntegralOrZeroImpl; template struct LeastBaseImpl; template struct GreatestBaseImpl; template struct ModuloImpl; +template struct ModuloOrZeroImpl; template struct PositiveModuloImpl; template struct EqualsOp; template struct NotEqualsOp; @@ -54,6 +55,7 @@ struct IsOperation static constexpr bool div_int = IsSameOperation::value; static constexpr bool div_int_or_zero = IsSameOperation::value; static constexpr bool modulo = IsSameOperation::value; + static constexpr bool modulo_or_zero = IsSameOperation::value; static constexpr bool positive_modulo = IsSameOperation::value; static constexpr bool least = IsSameOperation::value; static constexpr bool greatest = IsSameOperation::value; diff --git a/src/Functions/Kusto/CMakeLists.txt b/src/Functions/Kusto/CMakeLists.txt new file mode 100644 index 000000000000..3c534905d227 --- /dev/null +++ b/src/Functions/Kusto/CMakeLists.txt @@ -0,0 +1,8 @@ +include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") +add_headers_and_sources(clickhouse_functions_kusto .) +add_library(clickhouse_functions_kusto OBJECT ${clickhouse_functions_kusto_sources} ${clickhouse_functions_kusto_headers}) +target_link_libraries(clickhouse_functions_kusto PRIVATE dbms clickhouse_functions_gatherutils) + +if (OMIT_HEAVY_DEBUG_SYMBOLS) + target_compile_options(clickhouse_functions_kusto PRIVATE "-g0") +endif() diff --git a/src/Functions/Kusto/KqlArrayIif.cpp b/src/Functions/Kusto/KqlArrayIif.cpp new file mode 100644 index 000000000000..588ea14fdaa2 --- /dev/null +++ b/src/Functions/Kusto/KqlArrayIif.cpp @@ -0,0 +1,146 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlArrayIif : public KqlFunctionBase +{ +public: + static constexpr auto name = "kql_ArrayIif"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + explicit FunctionKqlArrayIif(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlArrayIif() override = default; + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 3; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + + bool isDataTypeBoolORBoolConvertible(std::string_view datatype_name) const; + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + +private: + ContextPtr context; +}; + +bool FunctionKqlArrayIif::isDataTypeBoolORBoolConvertible(std::string_view datatype_name) const +{ + if(datatype_name.find("Int") != datatype_name.npos || datatype_name.find("Float") != datatype_name.npos + || datatype_name.find("Decimal") != datatype_name.npos || datatype_name.find("Bool") != datatype_name.npos) + return true; + return false; +} + +DataTypePtr FunctionKqlArrayIif::getReturnTypeImpl(const DataTypes & arguments) const +{ + const auto * array_type0 = typeid_cast(arguments[0].get()); + if (!array_type0) + throw Exception("First argument for function " + getName() + " must be an array but it has type " + + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + DataTypePtr nested_type1, nested_type2; + + const auto * array_type1 = typeid_cast(arguments[1].get()); + if (!array_type1) + nested_type1 = makeNullable(arguments[1]->getPtr()); + else + nested_type1 = makeNullable(array_type1->getNestedType()); + + const auto * array_type2 = typeid_cast(arguments[2].get()); + if (!array_type2) + nested_type2 = makeNullable(arguments[2]->getPtr()); + else + nested_type2 = makeNullable(array_type2->getNestedType()); + + if (nested_type1->getName() != nested_type2->getName()) + throw Exception("Last two arguments for function " + getName() + " must have same nested data type " + + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + DataTypes types = {nested_type1, nested_type2}; + + return std::make_shared(getLeastSupertype(types)); +} + +ColumnPtr FunctionKqlArrayIif::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const DataTypePtr & elem_type = static_cast(*result_type).getNestedType(); + auto out = ColumnArray::create(elem_type->createColumn()); + + if(input_rows_count == 0) + return out; + + IColumn & out_data = out->getData(); + IColumn::Offsets & out_offsets = out->getOffsets(); + size_t total_length = 0; + for(size_t i = 0; i < input_rows_count; i++) + { + Field array0; + arguments[0].column->get(i, array0); + total_length += array0.get().size(); + } + + out_data.reserve(total_length); + out_offsets.resize(input_rows_count); + IColumn::Offset current_offset = 0; + + for(size_t i = 0; i < input_rows_count; i++) + { + Field array0; + arguments[0].column->get(i, array0); + size_t len0 = array0.get().size(); + for(size_t k = 0; k < len0; k++) + { + if(!isDataTypeBoolORBoolConvertible(array0.get().at(k).getTypeName())) + out_data.insert(Field()); + else + { + Field temp; + std::string dump = array0.get().at(k).dump(); + dump = dump.substr(dump.find("_") + 1); + if(dump == "0" || dump == "-0") + arguments[2].column->get(i, temp); + else + arguments[1].column->get(i, temp); + if(temp.getTypeName() == "Array") + { + if(k < temp.get().size()) + out_data.insert(temp.get().at(k)); + else + out_data.insert(Field()); + } + else + out_data.insert(temp); + } + } + current_offset += len0; + out_offsets[i] = current_offset; + } + return out; +} + +REGISTER_FUNCTION(KqlArrayIif) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/Kusto/KqlArraySort.cpp b/src/Functions/Kusto/KqlArraySort.cpp new file mode 100644 index 000000000000..1a8b17e099c5 --- /dev/null +++ b/src/Functions/Kusto/KqlArraySort.cpp @@ -0,0 +1,494 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +template +class FunctionKqlArraySort : public KqlFunctionBase +{ +public: + static constexpr auto name = Name::name; + explicit FunctionKqlArraySort(ContextPtr context_) : context(context_) { } + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.empty()) + throw Exception( + "Function " + getName() + " needs at least one argument; passed " + toString(arguments.size()) + ".", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + auto array_count = arguments.size(); + + if (!isArray(arguments.at(array_count - 1).type)) + --array_count; + + DataTypes nested_types; + for (size_t index = 0; index < array_count; ++index) + { + const DataTypeArray * array_type = checkAndGetDataType(arguments[index].type.get()); + if (!array_type) + throw Exception( + "Argument " + toString(index + 1) + " of function " + getName() + " must be array. Found " + + arguments[0].type->getName() + " instead.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + nested_types.emplace_back(array_type->getNestedType()); + } + + DataTypes data_types(array_count); + + for (size_t i = 0; i < array_count; ++i) + data_types[i] = std::make_shared(makeNullable(nested_types[i])); + + return std::make_shared(data_types); + } + + /* ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + if (input_rows_count == 0) + return result_type->createColumn(); + + if (input_rows_count > 1) + std::cout<< 1; + size_t array_count = arguments.size(); + auto & last_arg = arguments[array_count - 1]; + DataTypes nested_types; + String sort_function = is_desc ? "arrayReverseSort" : "arraySort"; + + size_t input_rows_count_ = 1; + + if (!isArray(last_arg.type)) + --array_count; + std::vector tuple_columns(input_rows_count,Columns(array_count)); + + // ColumnArray::ColumnOffsets::MutablePtr result_offsets_column; + // result_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count); + + for (size_t row_num = 0; row_num < input_rows_count; ++row_num) + { + bool null_last = true; + if (!isArray(last_arg.type)) + { + null_last = last_arg.column->getBool(row_num); + // std::cout< null_indices; + + + for (size_t i = 0; i < array_count; ++i) + { + ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst(); + + const ColumnArray * column_array = checkAndGetColumn(holder.get()); + const DataTypeArray * array_type = checkAndGetDataType(arguments[i].type.get()); + + if (!column_array) + throw Exception( + "Argument " + toString(i + 1) + " of function " + getName() + + " must be array." + " Found column " + + holder->getName() + " instead.", + ErrorCodes::ILLEGAL_COLUMN); + + nested_types.emplace_back(makeNullable(array_type->getNestedType())); + + + auto get_single_row = [&] + { + const ColumnArray * src_col = checkAndGetColumn(arguments[i].column.get()); + const IColumn::Offsets & src_offsets = src_col->getOffsets(); + const IColumn::Offsets * prev_offsets = &src_offsets; + const IColumn * prev_data = &src_col->getData(); + + ColumnArray::ColumnOffsets::MutablePtr single_row_offsets_column; + + single_row_offsets_column = ColumnArray::ColumnOffsets::create(1); + + IColumn::Offsets & single_offsets = single_row_offsets_column->getData(); + single_offsets[0] = (*prev_offsets)[row_num]; + auto single_column = ColumnArray::create(prev_data->getPtr(),std::move(single_row_offsets_column)); + ColumnWithTypeAndName single_row_args{single_column, std::make_shared(nested_types[i]), "array"}; + return single_row_args; + }; + if (i == 0) + { + first_array_column = holder; + new_args.push_back(get_single_row()); + //new_args.push_back(arguments[i]); + } + else if (!column_array->hasEqualOffsets(static_cast(*first_array_column))) + { + null_indices.insert(i); + } + else + new_args.push_back(get_single_row()); + //new_args.push_back(arguments[i]); + + } + + auto zipped + = FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count_); + + ColumnsWithTypeAndName sort_arg({{zipped, std::make_shared(result_type), "zipped"}}); + auto sorted_tuple + = FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count_); + + auto null_type = std::make_shared(std::make_shared()); + + + size_t sorted_index = 0; + for (size_t i = 0; i < array_count; ++i) + { + if (null_indices.contains(i)) + { + auto fun_array = FunctionFactory::instance().get("array", context); + + DataTypePtr arg_type + = std::make_shared(makeNullable(nested_types[i])); + + ColumnsWithTypeAndName null_array_arg({ + {null_type->createColumnConstWithDefaultValue(input_rows_count_), null_type, "NULL"}, + }); + + tuple_columns[row_num][i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count_); + tuple_columns[row_num][i] = tuple_columns[row_num][i]->convertToFullColumnIfConst(); + } + else + { + ColumnsWithTypeAndName untuple_args( + {{ColumnWithTypeAndName(sorted_tuple, std::make_shared(result_type), "sorted")}, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(sorted_index + 1))), std::make_shared(), ""}}); + auto tuple_coulmn = FunctionFactory::instance() + .get("tupleElement", context) + ->build(untuple_args) + ->execute(untuple_args, result_type, input_rows_count_); + + auto out_tmp = ColumnArray::create(nested_types[i]->createColumn()); + + size_t array_size = tuple_coulmn->size(); + auto * arr = checkAndGetColumn(tuple_coulmn.get()); + + for (size_t j = 0; j < array_size; ++j) + { + Field arr_field; + arr->get(j, arr_field); + out_tmp->insert(arr_field); + } + + tuple_columns[row_num][i] = std::move(out_tmp); + + ++sorted_index; + } + } + + if (!null_last) + { + //Columns adjusted_columns(array_count); + std::vector adjusted_columns(input_rows_count,Columns(array_count)); + + ColumnWithTypeAndName arg_of_index{nullptr, std::make_shared(nested_types[0]), "array"}; + arg_of_index.column = tuple_columns[row_num][0]; + + auto inside_null_type = nested_types[0]; + ColumnsWithTypeAndName indexof_args({ + arg_of_index, + {inside_null_type->createColumnConstWithDefaultValue(input_rows_count_), inside_null_type, "NULL"}, + }); + + auto null_index_datetype = std::make_shared(); + + ColumnWithTypeAndName slice_index{nullptr, null_index_datetype, ""}; + slice_index.column = FunctionFactory::instance() + .get("indexOf", context) + ->build(indexof_args) + ->execute(indexof_args, result_type, input_rows_count_); + + auto null_index_in_array = slice_index.column->get64(0); + if (null_index_in_array > 0) + { + ColumnWithTypeAndName slice_index_len{nullptr, null_index_datetype, ""}; + slice_index_len.column = DataTypeUInt64().createColumnConst(1, toField(UInt64(null_index_in_array - 1))); + + auto fun_slice = FunctionFactory::instance().get("arraySlice", context); + + for (size_t i = 0; i < array_count; ++i) + { + if (null_indices.contains(i)) + { + adjusted_columns[row_num][i] = std::move(tuple_columns[row_num][i]); + } + else + { + DataTypePtr arg_type = std::make_shared(nested_types[i]); + + ColumnsWithTypeAndName slice_args_left( + {{ColumnWithTypeAndName(tuple_columns[row_num][i], arg_type, "array")}, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(1))), std::make_shared(), ""}, + slice_index_len}); + + ColumnsWithTypeAndName slice_args_right( + {{ColumnWithTypeAndName(tuple_columns[row_num][i], arg_type, "array")}, slice_index}); + ColumnWithTypeAndName arr_left{ + fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count_), arg_type, ""}; + ColumnWithTypeAndName arr_right{ + fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count_), arg_type, ""}; + + ColumnsWithTypeAndName arr_cancat({arr_right, arr_left}); + auto out_tmp = FunctionFactory::instance() + .get("arrayConcat", context) + ->build(arr_cancat) + ->execute(arr_cancat, arg_type, input_rows_count_); + adjusted_columns[row_num][i] = std::move(out_tmp); + } + + } + // return ColumnTuple::create(adjusted_columns); + tuple_columns = std::move(adjusted_columns); + } + } + //return ColumnTuple::create(tuple_columns); + // do insert here + } + + Columns total_tuple_columns(array_count); + for (size_t i = 0; i < array_count; ++i) + { + auto out_tmp = ColumnArray::create(nested_types[i]->createColumn()); + + IColumn::Offsets & out_offsets = out_tmp->getOffsets(); + IColumn::Offset current_offset = 0; + for (size_t row = 0; row < input_rows_count; ++row) + { + size_t array_size = tuple_columns[row][i]->size(); + auto * arr = checkAndGetColumn(tuple_columns[row][i].get()); + const IColumn::Offsets & src_offsets = arr->getOffsets(); + + for (size_t j = 0; j < array_size; ++j) + { + Field arr_field; + arr->get(j, arr_field); + out_tmp->insert(arr_field); + } + current_offset += src_offsets[0]; + out_offsets[row] = current_offset; + } + + total_tuple_columns[i] = std::move(out_tmp); + } + + return ColumnTuple::create(total_tuple_columns); + } +*/ + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + size_t array_count = arguments.size(); + auto & last_arg = arguments[array_count - 1]; + + size_t input_rows_count_ = input_rows_count; + + bool null_last = true; + if (!isArray(last_arg.type)) + { + --array_count; + null_last = check_condition(last_arg, context, input_rows_count_); + } + + ColumnsWithTypeAndName new_args; + ColumnPtr first_array_column; + std::unordered_set null_indices; + DataTypes nested_types; + + String sort_function = is_desc ? "arrayReverseSort" : "arraySort"; + + for (size_t i = 0; i < array_count; ++i) + { + ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst(); + + const ColumnArray * column_array = checkAndGetColumn(holder.get()); + const DataTypeArray * array_type = checkAndGetDataType(arguments[i].type.get()); + + if (!column_array) + throw Exception( + "Argument " + toString(i + 1) + " of function " + getName() + + " must be array." + " Found column " + + holder->getName() + " instead.", + ErrorCodes::ILLEGAL_COLUMN); + + nested_types.emplace_back(makeNullable(array_type->getNestedType())); + if (i == 0) + { + first_array_column = holder; + new_args.push_back(arguments[i]); + } + else if (!column_array->hasEqualOffsets(static_cast(*first_array_column))) + { + null_indices.insert(i); + } + else + new_args.push_back(arguments[i]); + } + + auto zipped + = FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count_); + + ColumnsWithTypeAndName sort_arg({{zipped, std::make_shared(result_type), "zipped"}}); + auto sorted_tuple + = FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count_); + + auto null_type = std::make_shared(std::make_shared()); + + Columns tuple_columns(array_count); + size_t sorted_index = 0; + for (size_t i = 0; i < array_count; ++i) + { + if (null_indices.contains(i)) + { + auto fun_array = FunctionFactory::instance().get("array", context); + + DataTypePtr arg_type + = std::make_shared(makeNullable(nested_types[i])); + + ColumnsWithTypeAndName null_array_arg({ + {null_type->createColumnConstWithDefaultValue(input_rows_count_), null_type, "NULL"}, + }); + + tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count_); + tuple_columns[i] = tuple_columns[i]->convertToFullColumnIfConst(); + } + else + { + ColumnsWithTypeAndName untuple_args( + {{ColumnWithTypeAndName(sorted_tuple, std::make_shared(result_type), "sorted")}, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(sorted_index + 1))), std::make_shared(), ""}}); + auto tuple_coulmn = FunctionFactory::instance() + .get("tupleElement", context) + ->build(untuple_args) + ->execute(untuple_args, result_type, input_rows_count_); + + auto out_tmp = ColumnArray::create(nested_types[i]->createColumn()); + + size_t array_size = tuple_coulmn->size(); + auto * arr = checkAndGetColumn(tuple_coulmn.get()); + + for (size_t j = 0; j < array_size; ++j) + { + Field arr_field; + arr->get(j, arr_field); + out_tmp->insert(arr_field); + } + + tuple_columns[i] = std::move(out_tmp); + + ++sorted_index; + } + } + + if (!null_last) + { + Columns adjusted_columns(array_count); + + ColumnWithTypeAndName arg_of_index{nullptr, std::make_shared(nested_types[0]), "array"}; + arg_of_index.column = tuple_columns[0]; + + auto inside_null_type = nested_types[0]; + ColumnsWithTypeAndName indexof_args({ + arg_of_index, + {inside_null_type->createColumnConstWithDefaultValue(input_rows_count_), inside_null_type, "NULL"}, + }); + + auto null_index_datetype = std::make_shared(); + + ColumnWithTypeAndName slice_index{nullptr, null_index_datetype, ""}; + slice_index.column = FunctionFactory::instance() + .get("indexOf", context) + ->build(indexof_args) + ->execute(indexof_args, result_type, input_rows_count_); + + auto null_index_in_array = slice_index.column->get64(0); + if (null_index_in_array > 0) + { + ColumnWithTypeAndName slice_index_len{nullptr, null_index_datetype, ""}; + slice_index_len.column = DataTypeUInt64().createColumnConst(1, toField(UInt64(null_index_in_array - 1))); + + auto fun_slice = FunctionFactory::instance().get("arraySlice", context); + + for (size_t i = 0; i < array_count; ++i) + { + if (null_indices.contains(i)) + { + adjusted_columns[i] = std::move(tuple_columns[i]); + } + else + { + DataTypePtr arg_type = std::make_shared(nested_types[i]); + + ColumnsWithTypeAndName slice_args_left( + {{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")}, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(1))), std::make_shared(), ""}, + slice_index_len}); + + ColumnsWithTypeAndName slice_args_right( + {{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")}, slice_index}); + ColumnWithTypeAndName arr_left{ + fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count_), arg_type, ""}; + ColumnWithTypeAndName arr_right{ + fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count_), arg_type, ""}; + + ColumnsWithTypeAndName arr_cancat({arr_right, arr_left}); + auto out_tmp = FunctionFactory::instance() + .get("arrayConcat", context) + ->build(arr_cancat) + ->execute(arr_cancat, arg_type, input_rows_count_); + adjusted_columns[i] = std::move(out_tmp); + } + } + return ColumnTuple::create(adjusted_columns); + } + } + return ColumnTuple::create(tuple_columns); + } + +private: + ContextPtr context; +}; + +struct NameKqlArraySortAsc +{ + static constexpr auto name = "kql_array_sort_asc"; +}; + +struct NameKqlArraySortDesc +{ + static constexpr auto name = "kql_array_sort_desc"; +}; + +using FunctionKqlArraySortAsc = FunctionKqlArraySort; +using FunctionKqlArraySortDesc = FunctionKqlArraySort; + +REGISTER_FUNCTION(KqlArraySort) +{ + factory.registerFunction(); + factory.registerFunction(); +} + +} diff --git a/src/Functions/Kusto/KqlFunctionBase.h b/src/Functions/Kusto/KqlFunctionBase.h new file mode 100644 index 000000000000..efdf8982f4b1 --- /dev/null +++ b/src/Functions/Kusto/KqlFunctionBase.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "Functions/array/FunctionArrayMapped.h" + +namespace DB +{ + +class KqlFunctionBase : public IFunction +{ +public: + static bool check_condition (const ColumnWithTypeAndName & condition, ContextPtr context, size_t input_rows_count) + { + ColumnsWithTypeAndName if_columns( + { + condition, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(1))), std::make_shared(), ""}, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(2))), std::make_shared(), ""} + }); + auto if_res = FunctionFactory::instance().get("if", context)->build(if_columns)->execute(if_columns, std::make_shared(), input_rows_count); + auto result = if_res->getUInt(0); + return (result == 1); + } +}; + +} diff --git a/src/Functions/Kusto/KqlIndexOf.cpp b/src/Functions/Kusto/KqlIndexOf.cpp new file mode 100644 index 000000000000..aec179333206 --- /dev/null +++ b/src/Functions/Kusto/KqlIndexOf.cpp @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class FunctionKqlIndexOf : public KqlFunctionBase +{ +public: + static constexpr auto name = "kql_indexof"; + explicit FunctionKqlIndexOf(ContextPtr context_) : context(context_) { } + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() < 2 || 5 < arguments.size()) + throw Exception( + "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + + ", should be 2 to 5.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (arguments.size() >= 3) + { + for (size_t i = 3; i < arguments.size(); ++i) + if (!isInteger(arguments.at(i).type)) + throw Exception("Illegal type of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + return std::make_shared(std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + int64_t occurrence = 1; + + auto null_type = std::make_shared(std::make_shared()); + auto null_column = null_type->createColumnConstWithDefaultValue(1); + auto not_found_column = DataTypeUInt64().createColumnConst(1, toField(UInt64(0))); + + ColumnPtr column_source = arguments[0].column; + ColumnPtr column_lookup = arguments[1].column; + ColumnPtr column_start_pos = DataTypeUInt64().createColumnConst(input_rows_count, toField(UInt64(1))); + ColumnPtr column_length = DataTypeInt64().createColumnConst(input_rows_count, toField(Int64(-1))); + + if (!isString(arguments[0].type)) + column_source = FunctionFactory::instance() + .get("toString", context) + ->build({arguments[0]}) + ->execute({arguments[0]}, std::make_shared(), input_rows_count); + + if (!isString(arguments[1].type)) + column_lookup = FunctionFactory::instance() + .get("toString", context) + ->build({arguments[1]}) + ->execute({arguments[1]}, std::make_shared(), input_rows_count); + + if (arguments.size() >= 3) + { + auto input_start_column = ColumnUInt64::create(); + for (size_t j = 0; j < input_rows_count; ++j) + { + StringRef source = column_source->getDataAt(j); + auto start_pos = arguments[2].column->getInt(j); + if (start_pos < 0) + { + start_pos = source.size + start_pos; + if (start_pos < 0) + start_pos = 0; + } + ++start_pos; + input_start_column->insertValue(start_pos); + } + column_start_pos = std::move(input_start_column); + } + + if (arguments.size() >= 4) + column_length = arguments[3].column; + + if (arguments.size() == 5) + occurrence = arguments[4].column->getInt(0); //must be a constant + + if (occurrence < 0) + return null_column; + + ColumnPtr last_pos = not_found_column; + for (auto i = 0; i < occurrence; ++i) + { + ColumnsWithTypeAndName position_args( + {{ColumnWithTypeAndName(column_source, std::make_shared(), "source")}, + {ColumnWithTypeAndName(column_lookup, std::make_shared(), "lookup")}, + {ColumnWithTypeAndName(column_start_pos, std::make_shared(), "start_pos")}}); + auto pos = FunctionFactory::instance() + .get("position", context) + ->build(position_args) + ->execute(position_args, result_type, input_rows_count); + last_pos = pos; + + auto new_pos_column = ColumnUInt64::create(); + for (size_t j = 0; j < input_rows_count; ++j) + { + new_pos_column->insertValue(pos->getInt(j) + 1); + } + column_start_pos = std::move(new_pos_column); + } + + auto null_map = ColumnUInt8::create(input_rows_count); + auto result_column = ColumnInt64::create(); + for (size_t i = 0; i < input_rows_count; ++i) + { + auto length = column_length->getInt(i); + null_map->getData()[i] = length < -1; + + auto pos_val = last_pos->get64(i); + if (length > -1 && last_pos->get64(i) > UInt64(length) + 1) + pos_val = 0; + result_column->insertValue(Int64(pos_val) -1); // used for kql, so returned index is 0 based + } + return ColumnNullable::create(std::move(result_column), std::move(null_map)); + } + +private: + ContextPtr context; +}; + +REGISTER_FUNCTION(KqlIndexOf) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/Kusto/KqlRange.cpp b/src/Functions/Kusto/KqlRange.cpp new file mode 100644 index 000000000000..4470ee1d316a --- /dev/null +++ b/src/Functions/Kusto/KqlRange.cpp @@ -0,0 +1,825 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB +{ +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int TYPE_MISMATCH; +} + +static constexpr size_t max_array_size_as_field = 1000000; // the value from ColumnArray.cpp + +class FunctionKqlRange : public IFunction +{ +public: + static constexpr auto name = "kql_range"; + + const size_t max_elements; + static FunctionPtr create(ContextPtr context_) { return std::make_shared(std::move(context_)); } + explicit FunctionKqlRange(ContextPtr context) : max_elements(context->getSettingsRef().function_range_max_elements_in_block) { } + +private: + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() < 2 || 3 < arguments.size()) + throw Exception( + "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + + ", should be 2 or 3.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto & start = arguments[0]; + const auto & end = arguments[1]; + + WhichDataType start_type(*start); + WhichDataType end_type(*end); + + auto return_type = start; + if (start_type.isNullable()) + { + const auto * nullable_type = checkAndGetDataType(start.get()); + if (nullable_type) + { + return_type = nullable_type->getNestedType(); + start_type = WhichDataType(nullable_type->getNestedType()); + } + } + if (end_type.isNullable()) + { + const auto * nullable_type = checkAndGetDataType(end.get()); + if (nullable_type) + { + end_type = WhichDataType(nullable_type->getNestedType()); + } + } + if ((!start_type.isDateTime64()) && !start_type.isInterval() && !isNumber(start_type)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type of first argument of function {}, expected DateTime64, Interval or Number", + getName()); + + if ((start_type.isDateTime64() && !end_type.isDateTime64()) || (!start_type.isDateTime64() && end_type.isDateTime64())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Type not match of first and second argument of function {}", getName()); + + if ((start_type.isInterval() && !end_type.isInterval()) || (!start_type.isInterval() && end_type.isInterval())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Type not match of first and second argument of function {}", getName()); + + if (arguments.size() == 3) + { + const auto & step = arguments[2]; + const WhichDataType step_type(*step); + if (!isNumber(step_type) && !step_type.isInterval()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type of third argument of function {}, expected Interval or Number", + getName()); + } + + if (start_type.isDateTime64() || start_type.isInterval()) + return std::make_shared(return_type); + DataTypePtr common_type = getLeastSupertype(arguments); + return std::make_shared(common_type); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); + WhichDataType which(elem_type); + ColumnPtr res; + const auto & start = arguments[0]; + WhichDataType start_type(*start.type); + + ColumnsWithTypeAndName new_args; + + for (size_t i = 0; i < arguments.size(); ++i) + { + const auto & arg_col = arguments[i]; + WhichDataType arg_type(*arg_col.type); + + if (arg_type.isNullable()) + { + const auto * nullable_type = checkAndGetDataType(arg_col.type.get()); + const auto & nested_type = nullable_type->getNestedType(); + const auto * nullable_column = checkAndGetColumn(*arguments[i].column); + ColumnPtr nested_column = nullable_column->getNestedColumnPtr(); + ColumnWithTypeAndName new_arg{nullptr, nested_type, "new_arg"}; + new_arg.column = nested_column; + new_args.push_back(new_arg); + + if (i == 0) + start_type = nested_type; + } + else + new_args.push_back(arguments[i]); + } + + if (start_type.isDateTime64()) + { + return executeDateTime64(new_args, result_type, input_rows_count); + } + + if (start_type.isInterval()) + { + return executeInterval(new_args, result_type, input_rows_count); + } + + Columns columns_holder(3); + ColumnRawPtrs column_ptrs(3); + + for (size_t i = 0; i < new_args.size(); ++i) + { + if (i <= 1) + columns_holder[i] = castColumn(new_args[i], elem_type)->convertToFullColumnIfConst(); + else + columns_holder[i] = castColumn(new_args[i], elem_type); + + column_ptrs[i] = columns_holder[i].get(); + } + + /// Step is one by default. + if (new_args.size() == 2) + { + /// Convert a column with constant 1 to the result type. + if (start_type.isFloat32()) + columns_holder[2] = castColumn( + {DataTypeFloat32().createColumnConst(input_rows_count, 1.0), std::make_shared(), {}}, elem_type); + else if (start_type.isFloat64()) + columns_holder[2] = castColumn( + {DataTypeFloat64().createColumnConst(input_rows_count, 1.0), std::make_shared(), {}}, elem_type); + else if (start_type.isUInt8() || start_type.isUInt16() || start_type.isUInt32() || start_type.isUInt64()) + columns_holder[2] = castColumn( + {DataTypeUInt8().createColumnConst(input_rows_count, 1), std::make_shared(), {}}, elem_type); + else + columns_holder[2] + = castColumn({DataTypeInt8().createColumnConst(input_rows_count, 1), std::make_shared(), {}}, elem_type); + + column_ptrs[2] = columns_holder[2].get(); + } + + bool is_start_const = isColumnConst(*column_ptrs[0]); + bool is_step_const = isColumnConst(*column_ptrs[2]); + + if (is_start_const && is_step_const) + { + UInt64 start_uint = assert_cast(*column_ptrs[0]).getUInt(0); + UInt64 step_uint = assert_cast(*column_ptrs[2]).getUInt(0); + Int64 start_int = assert_cast(*column_ptrs[0]).getInt(0); + Int64 step_int = assert_cast(*column_ptrs[2]).getInt(0); + Float32 start_float32 = assert_cast(*column_ptrs[0]).getFloat32(0); + Float32 step_float32 = assert_cast(*column_ptrs[2]).getFloat32(0); + Float64 start_float64 = assert_cast(*column_ptrs[0]).getFloat64(0); + Float64 step_float64 = assert_cast(*column_ptrs[2]).getFloat64(0); + + if ((res = executeConstStartStep(column_ptrs[1], start_uint, step_uint, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_uint, step_uint, input_rows_count)) + || (res = executeConstStartStep( + column_ptrs[1], static_cast(start_uint), static_cast(step_uint), input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_uint, step_uint, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_int, step_int, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_int, step_int, input_rows_count)) + || (res = executeConstStartStep( + column_ptrs[1], static_cast(start_uint), static_cast(step_uint), input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_int, step_int, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_float32, step_float32, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_float64, step_float64, input_rows_count))) + { + } + } + else if (is_start_const && !is_step_const) + { + UInt64 start_uint = assert_cast(*column_ptrs[0]).getUInt(0); + Int64 start_int = assert_cast(*column_ptrs[0]).getInt(0); + Float32 start_float32 = assert_cast(*column_ptrs[0]).getFloat32(0); + Float64 start_float64 = assert_cast(*column_ptrs[0]).getFloat64(0); + + if ((res = executeConstStart(column_ptrs[1], column_ptrs[2], start_uint, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_uint, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start_uint), input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_uint, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_int, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_int, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start_uint), input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_int, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_float32, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_float64, input_rows_count))) + { + } + } + else if (!is_start_const && is_step_const) + { + UInt64 step_uint = assert_cast(*column_ptrs[2]).getUInt(0); + Int64 step_int = assert_cast(*column_ptrs[2]).getInt(0); + Float32 step_float32 = assert_cast(*column_ptrs[2]).getFloat32(0); + Float64 step_float64 = assert_cast(*column_ptrs[2]).getFloat64(0); + + if ((res = executeConstStep(column_ptrs[0], column_ptrs[1], step_uint, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_uint, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_uint, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step_uint), input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_int, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_int, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step_uint), input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_int, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_float32, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_float64, input_rows_count))) + { + } + } + else + { + if ((res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))) + { + } + } + + if (!res) + { + throw Exception{ + "Illegal columns " + column_ptrs[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; + } + + return res; + } + + template + ColumnPtr executeConstStartStep(const IColumn * end_arg, const T start, const T step, const size_t input_rows_count) const + { + auto end_column = checkAndGetColumn>(end_arg); + if (!end_column) + return nullptr; + + const auto & end_data = end_column->getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (start < end_data[row_idx] && step == 0) + throw Exception{ + "A call to function " + getName() + " overflows, the 3rd argument step can't be zero", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + + if (step > 0 && start <= end_data[row_idx]) + { + pre_values += start >= end_data[row_idx] ? 0 + : static_cast((end_data[row_idx] - start) / (step) + 1); + } + + if (step < 0 && start >= end_data[row_idx]) + { + pre_values += start <= end_data[row_idx] ? 0 + : static_cast((start - end_data[row_idx]) / (-step) + 1); + } + + if (pre_values < total_values) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnVector::create(total_values); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (step > 0) + { + for (T st = start, ed = end_data[row_idx]; st <= ed; st += step) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + } + } + else + { + for (T st = start, ed = end_data[row_idx]; st >= ed; st += step) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + } + } + out_offsets[row_idx] = offset; + } + + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + template + ColumnPtr executeConstStep(const IColumn * start_arg, const IColumn * end_arg, const T step, const size_t input_rows_count) const + { + auto start_column = checkAndGetColumn>(start_arg); + auto end_column = checkAndGetColumn>(end_arg); + if (!end_column || !start_column) + return nullptr; + + const auto & start_data = start_column->getData(); + const auto & end_data = end_column->getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (start_data[row_idx] < end_data[row_idx] && step == 0) + throw Exception{ + "A call to function " + getName() + " overflows, the 3rd argument step can't be zero", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + + if (step > 0 && start_data[row_idx] <= end_data[row_idx]) + { + pre_values += start_data[row_idx] >= end_data[row_idx] ? 0 + : static_cast((end_data[row_idx] - start_data[row_idx]) / (step) + 1); + } + + if (step < 0 && start_data[row_idx] >= end_data[row_idx]) + { + pre_values += start_data[row_idx] <= end_data[row_idx] ? 0 + : static_cast((start_data[row_idx] - end_data[row_idx]) / (-step) + 1); + } + + if (pre_values < total_values) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnVector::create(total_values); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (step > 0) + { + for (T st = start_data[row_idx], ed = end_data[row_idx]; st <= ed; st += step) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + } + } + else + { + for (T st = start_data[row_idx], ed = end_data[row_idx]; st >= ed; st += step) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + } + } + out_offsets[row_idx] = offset; + } + + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + template + ColumnPtr executeConstStart(const IColumn * end_arg, const IColumn * step_arg, const T start, const size_t input_rows_count) const + { + auto end_column = checkAndGetColumn>(end_arg); + auto step_column = checkAndGetColumn>(step_arg); + if (!end_column || !step_column) + return nullptr; + + const auto & end_data = end_column->getData(); + const auto & step_data = step_column->getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (start < end_data[row_idx] && step_data[row_idx] == 0) + throw Exception{ + "A call to function " + getName() + " overflows, the 3rd argument step can't be zero", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + + if (step_data[row_idx] > 0 && start <= end_data[row_idx]) + { + pre_values += start >= end_data[row_idx] ? 0 + : static_cast((end_data[row_idx] - start) / (step_data[row_idx]) + 1); + } + + if (step_data[row_idx] < 0 && start >= end_data[row_idx]) + { + pre_values += start <= end_data[row_idx] ? 0 + : static_cast((start - end_data[row_idx]) / (-step_data[row_idx]) + 1); + } + + if (pre_values < total_values) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnVector::create(total_values); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (step_data[row_idx] > 0) + { + for (T st = start, ed = end_data[row_idx]; st <= ed; st += step_data[row_idx]) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step_data[row_idx]) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + } + } + else + { + for (T st = start, ed = end_data[row_idx]; st >= ed; st += step_data[row_idx]) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step_data[row_idx]) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + } + } + out_offsets[row_idx] = offset; + } + + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + template + ColumnPtr + executeGeneric(const IColumn * start_col, const IColumn * end_col, const IColumn * step_col, const size_t input_rows_count) const + { + auto start_column = checkAndGetColumn>(start_col); + auto end_column = checkAndGetColumn>(end_col); + auto step_column = checkAndGetColumn>(step_col); + + if (!start_column || !end_column || !step_column) + return nullptr; + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + const auto & start_data = start_column->getData(); + const auto & end_start = end_column->getData(); + const auto & step_data = step_column->getData(); + + size_t total_values = 0; + size_t pre_values = 0; + + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (start_data[row_idx] < end_start[row_idx] && step_data[row_idx] == 0) + throw Exception{ + "A call to function " + getName() + " overflows, the 3rd argument step can't be zero", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + + if (step_data[row_idx] > 0 && start_data[row_idx] <= end_start[row_idx]) + { + pre_values += start_data[row_idx] >= end_start[row_idx] ? 0 + : static_cast((end_start[row_idx] - start_data[row_idx]) / (step_data[row_idx]) + 1); + } + + if (step_data[row_idx] < 0 && start_data[row_idx] >= end_start[row_idx]) + { + pre_values += start_data[row_idx] <= end_start[row_idx] ? 0 + : static_cast((start_data[row_idx] - end_start[row_idx]) / (-step_data[row_idx]) + 1); + } + + if (pre_values < total_values) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnVector::create(total_values); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (step_data[row_idx] > 0) + { + for (auto st = start_data[row_idx], ed = end_start[row_idx]; st <= ed; st += step_data[row_idx]) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step_data[row_idx]) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + } + } + else + { + for (auto st = start_data[row_idx], ed = end_start[row_idx]; st >= ed; st += step_data[row_idx]) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step_data[row_idx]) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + } + } + out_offsets[row_idx] = offset; + } + + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + ColumnPtr executeDateTime64(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); + const auto & start_col = arguments[0].column; + const auto & end_col = arguments[1].column; + Int64 step_value = 3600000000000; + + auto & start_data = typeid_cast &>(*start_col).getData(); + auto & end_data = typeid_cast &>(*end_col).getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments.size() > 2) + step_value = arguments[2].column->getInt(row_idx); + + if (start_data[row_idx] < end_data[row_idx] && step_value == 0) + throw Exception{ + "A call to function " + getName() + " overflows, the 3rd argument step can't be zero", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + + if (step_value > 0 && start_data[row_idx] <= end_data[row_idx]) + { + pre_values += start_data[row_idx] >= end_data[row_idx] ? 0 + : static_cast((end_data[row_idx] - start_data[row_idx]) / (step_value) + 1); + } + + if (step_value < 0 && start_data[row_idx] >= end_data[row_idx]) + { + pre_values += start_data[row_idx] <= end_data[row_idx] ? 0 + : static_cast((start_data[row_idx] - end_data[row_idx]) / (-step_value) + 1); + } + + if (pre_values < total_values) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnDecimal::create(total_values, 9); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_col->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments.size() > 2) + step_value = arguments[2].column->getInt(row_idx); + if (step_value > 0) + { + for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st <= ed; st += step_value) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step_value) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + } + } + else + { + for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st >= ed; st += step_value) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step_value) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + } + } + out_offsets[row_idx] = offset; + } + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + ColumnPtr executeInterval(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); + const auto & start_col = arguments[0].column; + const auto & end_col = arguments[1].column; + Int64 step_value = 3600000000000; + + auto & start_data = typeid_cast &>(*start_col).getData(); + auto & end_data = typeid_cast &>(*end_col).getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments.size() > 2) + step_value = arguments[2].column->getInt(row_idx); + + if (start_data[row_idx] < end_data[row_idx] && step_value == 0) + throw Exception{ + "A call to function " + getName() + " overflows, the 3rd argument step can't be zero", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + + if (step_value > 0 && start_data[row_idx] <= end_data[row_idx]) + { + pre_values += start_data[row_idx] >= end_data[row_idx] ? 0 + : static_cast((end_data[row_idx] - start_data[row_idx]) / (step_value) + 1); + } + + if (step_value < 0 && start_data[row_idx] >= end_data[row_idx]) + { + pre_values += start_data[row_idx] <= end_data[row_idx] ? 0 + : static_cast((start_data[row_idx] - end_data[row_idx]) / (-step_value) + 1); + } + if (pre_values < total_values) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto out = ColumnArray::create(std::make_shared(IntervalKind::Nanosecond)->createColumn()); + IColumn & out_data = out->getData(); + IColumn::Offsets & out_offsets = out->getOffsets(); + + out_data.reserve(input_rows_count * total_values); + out_offsets.resize(input_rows_count); + IColumn::Offset current_offset = 0; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments.size() > 2) + step_value = arguments[2].column->getInt(row_idx); + if (step_value > 0) + { + for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st <= ed; st += step_value) + { + out_data.insert(Field(st)); + current_offset++; + if (current_offset >= total_values) + break; + if (st > st + step_value) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + } + } + else + { + for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st >= ed; st += step_value) + { + out_data.insert(Field(st)); + current_offset++; + if (current_offset >= total_values) + break; + if (st < st + step_value) + throw Exception{ + "A call to function " + getName() + " overflows, investigate the values of arguments you are passing", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + } + } + out_offsets[row_idx] = current_offset; + } + + return out; + } +}; + +REGISTER_FUNCTION(KqlRange) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/Kusto/kqlBin.cpp b/src/Functions/Kusto/kqlBin.cpp new file mode 100644 index 000000000000..400ab106441e --- /dev/null +++ b/src/Functions/Kusto/kqlBin.cpp @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include +#include + +namespace +{ +DB::ColumnWithTypeAndName +interpretAsInterval(const DB::ContextPtr & context, const DB::ColumnWithTypeAndName & argument, const size_t input_rows_count) +{ + static constexpr auto NANOSECONDS_PER_SECOND = 1'000'000'000U; + + const DB::ColumnsWithTypeAndName multiply_args{ + argument, DB::createConstColumnWithTypeAndName(NANOSECONDS_PER_SECOND, argument.name)}; + const auto product = executeFunctionCall(context, "multiply", multiply_args, input_rows_count); + + const DB::ColumnsWithTypeAndName to_interval_args{asArgument(product, argument.name)}; + const auto interval = executeFunctionCall(context, "toIntervalNanosecond", to_interval_args, input_rows_count); + + return asArgument(interval, argument.name); +} +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlBin : public IFunction +{ +public: + static constexpr auto name = "kql_bin"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlBin(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlBin() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr +FunctionKqlBin::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto intermediate = std::invoke( + [this, &arguments, &input_rows_count] + { + const auto & round_to_argument = arguments.back(); + const auto & value_argument = arguments.front(); + const WhichDataType round_to_which_data_type(*round_to_argument.type); + const WhichDataType value_which_data_type(*value_argument.type); + + const auto & adjusted_round_to + = (value_which_data_type.isDateOrDate32OrDateTimeOrDateTime64() || value_which_data_type.isInterval()) && !round_to_which_data_type.isInterval() + ? interpretAsInterval(context, round_to_argument, input_rows_count) + : round_to_argument; + + if (value_which_data_type.isDateOrDate32OrDateTimeOrDateTime64()) + { + const auto adjusted_args = std::invoke( + [this, &adjusted_round_to, &input_rows_count, &value_argument, &value_which_data_type]() -> ColumnsWithTypeAndName + { + if (value_which_data_type.isDateTime64()) + return {value_argument, adjusted_round_to}; + + const ColumnsWithTypeAndName to_datetime64_args{ + value_argument, + createConstColumnWithTypeAndName(9, "scale"), + createConstColumnWithTypeAndName("UTC", "timezone")}; + + const auto as_datetime64 = executeFunctionCall(context, "toDateTime64", to_datetime64_args, input_rows_count); + return {asArgument(as_datetime64, "as_datetime64"), adjusted_round_to}; + }); + + return executeFunctionCall(context, "toStartOfIntervalOrNull", adjusted_args, input_rows_count); + } + + const ColumnsWithTypeAndName adjusted_args{value_argument, adjusted_round_to}; + const auto quotient = executeFunctionCall(context, "divide", adjusted_args, input_rows_count); + + const ColumnsWithTypeAndName floor_args{asArgument(quotient, adjusted_round_to.name)}; + const auto floored = executeFunctionCall(context, "floor", floor_args, input_rows_count); + + const ColumnsWithTypeAndName multiply_args{asArgument(floored, adjusted_round_to.name), adjusted_round_to}; + return executeFunctionCall(context, "multiply", multiply_args, input_rows_count); + }); + + const ColumnsWithTypeAndName conversion_args{ + asArgument(intermediate, "intermediate"), createConstColumnWithTypeAndName(result_type->getName(), "target_type")}; + return executeFunctionCall(context, "accurateCastOrNull", conversion_args, input_rows_count).first; +} + +DataTypePtr FunctionKqlBin::getReturnTypeImpl(const DataTypes & arguments) const +{ + const auto nested_type = std::invoke( + [this, &arguments]() -> DataTypePtr + { + const auto & value_argument = arguments.front(); + const auto & round_to_argument = arguments.back(); + if (const WhichDataType value_which_data_type(*value_argument); value_which_data_type.isInterval() || isNumber(value_which_data_type)) + { + const WhichDataType round_to_which_data_type(*round_to_argument); + return isNumber(value_which_data_type) && (round_to_which_data_type.isFloat() || round_to_which_data_type.isDecimal()) + ? round_to_argument + : value_argument; + } + else if (value_which_data_type.isDateOrDate32OrDateTimeOrDateTime64()) + return std::make_shared(9, "UTC"); + + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected Date, Date32, DateTime, DateTime64, Interval or Number", + value_argument->getName(), + getName()); + }); + + return makeNullable(nested_type); +} + +REGISTER_FUNCTION(KqlBin) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlBinAt.cpp b/src/Functions/Kusto/kqlBinAt.cpp new file mode 100644 index 000000000000..55c386e3429e --- /dev/null +++ b/src/Functions/Kusto/kqlBinAt.cpp @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlBinAt : public IFunction +{ +public: + static constexpr auto name = "kql_bin_at"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlBinAt(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlBinAt() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 3; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr FunctionKqlBinAt::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto get_or_convert_argument = [this, &input_rows_count](const ColumnWithTypeAndName & argument) + { + if (const WhichDataType which_data_type(*argument.type); which_data_type.isDateOrDate32() || which_data_type.isDateTime()) + { + const ColumnsWithTypeAndName to_datetime64_args{ + argument, + createConstColumnWithTypeAndName(9, "scale"), + createConstColumnWithTypeAndName("UTC", "timezone")}; + + return asArgument(executeFunctionCall(context, "toDateTime64", to_datetime64_args, input_rows_count), argument.name); + } + + return argument; + }; + + const auto & value_argument = get_or_convert_argument(arguments.front()); + const auto & round_to_argument = arguments[1]; + const auto & offset_argument = get_or_convert_argument(arguments.back()); + + const ColumnsWithTypeAndName subtraction_args{value_argument, offset_argument}; + const auto difference = executeFunctionCall(context, "minus", subtraction_args, input_rows_count); + + const ColumnsWithTypeAndName bin_args{asArgument(difference, "difference"), round_to_argument}; + const auto bin_result = executeFunctionCall(context, "kql_bin", bin_args, input_rows_count); + + const ColumnsWithTypeAndName addition_args{offset_argument, asArgument(bin_result, "bin_result")}; + const auto sum = executeFunctionCall(context, "plus", addition_args, input_rows_count); + + const ColumnsWithTypeAndName cast_args{ + asArgument(sum, "sum"), createConstColumnWithTypeAndName(result_type->getName(), "type")}; + return executeFunctionCall(context, "cast", cast_args, input_rows_count).first; +} + +DataTypePtr FunctionKqlBinAt::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const +{ + const auto & value_type = *arguments.front().type; + const auto & offset_type = *arguments.back().type; + + WhichDataType value_which_data_type(value_type); + WhichDataType offset_which_data_type(offset_type); + if ((value_which_data_type.isDateOrDate32OrDateTimeOrDateTime64() && offset_which_data_type.isDateOrDate32OrDateTimeOrDateTime64()) + || (value_which_data_type.isInterval() && offset_which_data_type.isInterval()) + || (isNumber(value_which_data_type) && isNumber(offset_which_data_type))) + { + const ColumnsWithTypeAndName bin_args{arguments.front(), arguments[1]}; + return FunctionFactory::instance().get("kql_bin", context)->build(bin_args)->getResultType(); + } + + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of third argument of function {}, expected {}", + offset_type.getName(), + getName(), + value_type.getFamilyName()); +} + +REGISTER_FUNCTION(KqlBinAt) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlDateTime.cpp b/src/Functions/Kusto/kqlDateTime.cpp new file mode 100644 index 000000000000..6b7f7da0231e --- /dev/null +++ b/src/Functions/Kusto/kqlDateTime.cpp @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB::ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +namespace +{ +enum class InputPolicy +{ + Arbitrary, + Constant +}; + +constexpr const char * getDateTimeParsingFunction(const InputPolicy input_policy) +{ + if (input_policy == InputPolicy::Arbitrary) + return "parseDateTime64BestEffortOrNull"; + else if (input_policy == InputPolicy::Constant) + return "parseDateTime64BestEffort"; + + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unhandled input policy: {}", magic_enum::enum_name(input_policy)); +} + +constexpr const char * getFunctionName(const InputPolicy input_policy) +{ + if (input_policy == InputPolicy::Arbitrary) + return "kql_todatetime"; + else if (input_policy == InputPolicy::Constant) + return "kql_datetime"; + + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unhandled input policy: {}", magic_enum::enum_name(input_policy)); +} +} + +namespace DB +{ +template +class FunctionKqlDateTime : public IFunction +{ +public: + static constexpr auto name = getFunctionName(input_policy); + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlDateTime(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlDateTime() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return makeNullable(std::make_shared(9, "UTC")); } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +template +ColumnPtr FunctionKqlDateTime::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto & argument = arguments.front(); + const ColumnsWithTypeAndName conversion_args{ + argument, + createConstColumnWithTypeAndName(9, "scale"), + createConstColumnWithTypeAndName("UTC", "timezone")}; + + const auto conversion_function + = WhichDataType(*argument.type).isStringOrFixedString() ? getDateTimeParsingFunction(input_policy) : "toDateTime64"; + const auto converted = executeFunctionCall(context, conversion_function, conversion_args, input_rows_count); + + const ColumnsWithTypeAndName addition_args{ + asArgument(converted, "converted"), + createConstColumnWithTypeAndName(50, "interval_50", IntervalKind::Nanosecond)}; + const auto sum = executeFunctionCall(context, "plus", addition_args, input_rows_count); + + const ColumnsWithTypeAndName to_start_of_interval_args{ + asArgument(sum, "sum"), createConstColumnWithTypeAndName(100, "interval_100", IntervalKind::Nanosecond)}; + const auto [rounded_column, _] = executeFunctionCall(context, "toStartOfInterval", to_start_of_interval_args, input_rows_count); + + return wrapInNullable(rounded_column, conversion_args, result_type, input_rows_count); +} + +REGISTER_FUNCTION(KqlDateTime) +{ + factory.registerFunction>(); + factory.registerFunction>(); +} +} diff --git a/src/Functions/Kusto/kqlToString.cpp b/src/Functions/Kusto/kqlToString.cpp new file mode 100644 index 000000000000..9295a5047f59 --- /dev/null +++ b/src/Functions/Kusto/kqlToString.cpp @@ -0,0 +1,92 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +class FunctionKqlToString : public IFunction +{ +public: + static constexpr auto name = "kql_tostring"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlToString(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlToString() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared(); } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr +FunctionKqlToString::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const +{ + const auto & argument = arguments.front(); + if (WhichDataType which_data_type(*argument.type); which_data_type.isInterval()) + { + static const auto TICKS_PER_DAY = ParserKQLTimespan::parse("1d").value(); + static const auto TICKS_PER_HOUR = ParserKQLTimespan::parse("1h").value(); + static const auto TICKS_PER_MINUTE = ParserKQLTimespan::parse("1m").value(); + static const auto TICKS_PER_SECOND = ParserKQLTimespan::parse("1s").value(); + + const auto & in_column = *argument.column; + auto out_column = ColumnString::create(); + auto & chars = out_column->getChars(); + auto & offsets = out_column->getOffsets(); + for (size_t i = 0; i < input_rows_count; ++i) + { + const auto value = in_column.getInt(i); + const auto abs_ticks = std::abs(value / 100); + + std::string timespan_as_string = value < 0 ? "-" : ""; + if (abs_ticks >= TICKS_PER_DAY) + timespan_as_string.append(std::format("{}.", abs_ticks / TICKS_PER_DAY)); + + timespan_as_string.append(std::format( + "{:02}:{:02}:{:02}", + (abs_ticks / TICKS_PER_HOUR) % 24, + (abs_ticks / TICKS_PER_MINUTE) % 60, + (abs_ticks / TICKS_PER_SECOND) % 60)); + + if (const auto fractional_second = abs_ticks % TICKS_PER_SECOND) + timespan_as_string.append(std::format(".{:07}", fractional_second)); + + const auto chars_old_length = chars.size(); + const auto str_length_with_terminator = timespan_as_string.length() + 1; + chars.resize(chars.size() + str_length_with_terminator); + std::copy(timespan_as_string.c_str(), timespan_as_string.c_str() + str_length_with_terminator, chars.data() + chars_old_length); + offsets.push_back(chars.size()); + } + + return out_column; + } + else if (which_data_type.isDateOrDate32() || which_data_type.isDateTime() || which_data_type.isDateTime64()) + { + const ColumnsWithTypeAndName to_datetime64_args{argument, createConstColumnWithTypeAndName(7, "scale")}; + const auto as_datetime64 = executeFunctionCall(context, "toDateTime64", to_datetime64_args, input_rows_count); + + const ColumnsWithTypeAndName format_datetime_args{ + asArgument(as_datetime64, "as_datetime64"), createConstColumnWithTypeAndName("%FT%T.%fZ", "format_string")}; + return executeFunctionCall(context, "formatDateTime", format_datetime_args, input_rows_count).first; + } + + return executeFunctionCall(context, "toString", arguments, input_rows_count).first; +} + +REGISTER_FUNCTION(KqlToString) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlToTimespan.cpp b/src/Functions/Kusto/kqlToTimespan.cpp new file mode 100644 index 000000000000..39923de9d187 --- /dev/null +++ b/src/Functions/Kusto/kqlToTimespan.cpp @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlToTimespan : public IFunction +{ +public: + static constexpr auto name = "kql_totimespan"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlToTimespan(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlToTimespan() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + +private: + ContextPtr context; +}; + +ColumnPtr FunctionKqlToTimespan::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto & argument = arguments.front(); + if (WhichDataType(*argument.type).isInterval()) + return wrapInNullable(argument.column, arguments, result_type, input_rows_count); + + const auto * in_column = typeid_cast(argument.column.get()); + if (!in_column) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected String", + argument.type->getName(), + getName()); + + auto out_column = result_type->createColumn(); + auto & out_column_as_nullable = assert_cast(*out_column); + auto & out_nested_column = assert_cast(out_column_as_nullable.getNestedColumn()); + + const auto size = in_column->size(); + auto & out_data = out_nested_column.getData(); + auto & out_null_map = out_column_as_nullable.getNullMapData(); + out_data.resize(size); + out_null_map.resize(size); + + const auto & in_chars = in_column->getChars(); + const auto & in_offsets = in_column->getOffsets(); + const auto * in_chars_data = reinterpret_cast(in_chars.data()); + size_t start = 0; + for (size_t i = 0; i < size; ++i) + { + const auto & offset = in_offsets[i]; + std::optional ticks; + const auto success = ParserKQLTimespan::tryParse({in_chars_data + start, offset - start - 1}, ticks); + out_data[i] = ticks.value_or(0) * 100; + out_null_map[i] = !ticks.has_value() || !success; + + start = offset; + } + + return out_column; +} + +DataTypePtr FunctionKqlToTimespan::getReturnTypeImpl(const DataTypes &) const +{ + return makeNullable(std::make_shared(IntervalKind::Nanosecond)); +} + +REGISTER_FUNCTION(KqlToTimespan) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index edb0df3ae34b..744f902158cc 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "Regexps.h" @@ -108,8 +109,12 @@ struct MatchImpl const ColumnString::Offsets & haystack_offsets, const String & needle, [[maybe_unused]] const ColumnPtr & start_pos_, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + const size_t haystack_size = haystack_offsets.size(); assert(haystack_size == res.size()); @@ -258,8 +263,12 @@ struct MatchImpl const ColumnString::Chars & haystack, size_t N, const String & needle, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + const size_t haystack_size = haystack.size() / N; assert(haystack_size == res.size()); @@ -417,8 +426,12 @@ struct MatchImpl const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offset, [[maybe_unused]] const ColumnPtr & start_pos_, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + const size_t haystack_size = haystack_offsets.size(); assert(haystack_size == needle_offset.size()); @@ -522,8 +535,12 @@ struct MatchImpl const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offset, [[maybe_unused]] const ColumnPtr & start_pos_, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + const size_t haystack_size = haystack.size()/N; assert(haystack_size == needle_offset.size()); diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index 4b02e78dc252..53873c18233e 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -156,7 +156,7 @@ struct MultiMatchAnyImpl memset(accum.data(), 0, accum.size()); for (size_t j = 0; j < needles.size(); ++j) { - MatchImpl::vectorConstant(haystack_data, haystack_offsets, String(needles[j].data(), needles[j].size()), nullptr, accum); + MatchImpl::vectorConstant(haystack_data, haystack_offsets, String(needles[j].data(), needles[j].size()), nullptr, accum, nullptr); for (size_t i = 0; i < res.size(); ++i) { if constexpr (FindAny) diff --git a/src/Functions/PositionImpl.h b/src/Functions/PositionImpl.h index 76f10373a581..ae7d95bcf61d 100644 --- a/src/Functions/PositionImpl.h +++ b/src/Functions/PositionImpl.h @@ -192,8 +192,12 @@ struct PositionImpl const ColumnString::Offsets & haystack_offsets, const std::string & needle, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + const UInt8 * const begin = haystack_data.data(); const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; @@ -269,8 +273,12 @@ struct PositionImpl std::string data, std::string needle, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + Impl::toLowerIfNeed(data); Impl::toLowerIfNeed(needle); @@ -303,8 +311,12 @@ struct PositionImpl const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + ColumnString::Offset prev_haystack_offset = 0; ColumnString::Offset prev_needle_offset = 0; @@ -363,10 +375,13 @@ struct PositionImpl const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { - /// NOTE You could use haystack indexing. But this is a rare case. + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + /// NOTE You could use haystack indexing. But this is a rare case. ColumnString::Offset prev_needle_offset = 0; size_t size = needle_offsets.size(); diff --git a/src/Functions/abs.cpp b/src/Functions/abs.cpp index a7afdbacad6f..84d6aae9b955 100644 --- a/src/Functions/abs.cpp +++ b/src/Functions/abs.cpp @@ -11,6 +11,7 @@ struct AbsImpl { using ResultType = std::conditional_t, A, typename NumberTraits::ResultOfAbs::Type>; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp index 2e3b79c67101..c5b22a79ecd0 100644 --- a/src/Functions/bitAnd.cpp +++ b/src/Functions/bitAnd.cpp @@ -16,7 +16,8 @@ template struct BitAndImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; - static constexpr const bool allow_fixed_string = true; + static const constexpr bool allow_fixed_string = true; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitBoolMaskAnd.cpp b/src/Functions/bitBoolMaskAnd.cpp index 2a0735e5ac80..94d02e65dc29 100644 --- a/src/Functions/bitBoolMaskAnd.cpp +++ b/src/Functions/bitBoolMaskAnd.cpp @@ -22,6 +22,7 @@ struct BitBoolMaskAndImpl { using ResultType = UInt8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitBoolMaskOr.cpp b/src/Functions/bitBoolMaskOr.cpp index e0acde17a62a..c66ee61ac2e4 100644 --- a/src/Functions/bitBoolMaskOr.cpp +++ b/src/Functions/bitBoolMaskOr.cpp @@ -22,6 +22,7 @@ struct BitBoolMaskOrImpl { using ResultType = UInt8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitCount.cpp b/src/Functions/bitCount.cpp index d425dd1dca2b..6b5ac8cbf3a4 100644 --- a/src/Functions/bitCount.cpp +++ b/src/Functions/bitCount.cpp @@ -13,7 +13,8 @@ template struct BitCountImpl { using ResultType = UInt8; - static constexpr bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType apply(A a) diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp index 75928c2a8af7..eb4060301054 100644 --- a/src/Functions/bitHammingDistance.cpp +++ b/src/Functions/bitHammingDistance.cpp @@ -9,6 +9,7 @@ struct BitHammingDistanceImpl { using ResultType = UInt8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitNot.cpp b/src/Functions/bitNot.cpp index b13becedc312..2399efb9fab1 100644 --- a/src/Functions/bitNot.cpp +++ b/src/Functions/bitNot.cpp @@ -18,6 +18,7 @@ struct BitNotImpl { using ResultType = typename NumberTraits::ResultOfBitNot::Type; static const constexpr bool allow_fixed_string = true; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType apply(A a) diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp index 40d5f41884e9..1f1933803dcf 100644 --- a/src/Functions/bitOr.cpp +++ b/src/Functions/bitOr.cpp @@ -15,7 +15,8 @@ template struct BitOrImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; - static constexpr const bool allow_fixed_string = true; + static const constexpr bool allow_fixed_string = true; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitRotateLeft.cpp b/src/Functions/bitRotateLeft.cpp index 8b99d45d9f01..33746c79348e 100644 --- a/src/Functions/bitRotateLeft.cpp +++ b/src/Functions/bitRotateLeft.cpp @@ -17,6 +17,7 @@ struct BitRotateLeftImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitRotateRight.cpp b/src/Functions/bitRotateRight.cpp index 0d84fbd9f64e..aca67ec9235c 100644 --- a/src/Functions/bitRotateRight.cpp +++ b/src/Functions/bitRotateRight.cpp @@ -17,6 +17,7 @@ struct BitRotateRightImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index b53c2b05da0a..72b2c9ca8012 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -17,6 +17,7 @@ struct BitShiftLeftImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = true; template diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 8134a64ac535..736bfc968982 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -18,6 +18,7 @@ struct BitShiftRightImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = true; template diff --git a/src/Functions/bitSwapLastTwo.cpp b/src/Functions/bitSwapLastTwo.cpp index e7f071606931..98a586831d03 100644 --- a/src/Functions/bitSwapLastTwo.cpp +++ b/src/Functions/bitSwapLastTwo.cpp @@ -19,7 +19,8 @@ template struct BitSwapLastTwoImpl { using ResultType = UInt8; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a) diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index ac21423ced66..1fb3bf550b72 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -18,6 +18,7 @@ struct BitTestImpl { using ResultType = UInt8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitWrapperFunc.cpp b/src/Functions/bitWrapperFunc.cpp index 83c89c753fc8..0f19de1f8846 100644 --- a/src/Functions/bitWrapperFunc.cpp +++ b/src/Functions/bitWrapperFunc.cpp @@ -19,7 +19,8 @@ template struct BitWrapperFuncImpl { using ResultType = UInt8; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]]) diff --git a/src/Functions/bitXor.cpp b/src/Functions/bitXor.cpp index 89aaf5eadd16..3bcfda084314 100644 --- a/src/Functions/bitXor.cpp +++ b/src/Functions/bitXor.cpp @@ -15,7 +15,8 @@ template struct BitXorImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; - static constexpr bool allow_fixed_string = true; + static const constexpr bool allow_fixed_string = true; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/dateTime64Diff.cpp b/src/Functions/dateTime64Diff.cpp new file mode 100644 index 000000000000..665eaf915576 --- /dev/null +++ b/src/Functions/dateTime64Diff.cpp @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +class FunctionDateTime64Diff : public IFunction +{ + using ColumnDateTime64 = ColumnDecimal; + +public: + static constexpr auto name = "dateTime64Diff"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionDateTime64Diff(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionDateTime64Diff() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr FunctionDateTime64Diff::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto & lhs_arg = arguments.front(); + const auto & rhs_arg = arguments.back(); + const auto * lhs_type = checkAndGetDataType(lhs_arg.type.get()); + const auto * rhs_type = checkAndGetDataType(rhs_arg.type.get()); + if (!lhs_type || !rhs_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unexpected arguments of function {}", getName()); + + const auto common_scale = std::max(lhs_type->getScale(), rhs_type->getScale()); + const auto scale_arg = createConstColumnWithTypeAndName(common_scale, "scale"); + const auto convert_to_decimal = [this, &input_rows_count, &scale_arg](const ColumnWithTypeAndName & argument) + { + const ColumnsWithTypeAndName cast_args{argument, scale_arg}; + return executeFunctionCall(context, "toDecimal64", cast_args, input_rows_count); + }; + + const auto lhs_arg_as_decimal = convert_to_decimal(lhs_arg); + const auto rhs_arg_as_decimal = convert_to_decimal(rhs_arg); + const ColumnsWithTypeAndName subtraction_args{asArgument(lhs_arg_as_decimal, "lhs"), asArgument(rhs_arg_as_decimal, "rhs")}; + const auto difference = executeFunctionCall(context, "minus", subtraction_args, input_rows_count); + + const ColumnsWithTypeAndName to_decimal128_args{asArgument(difference, "difference"), scale_arg}; + const auto as_decimal128 = executeFunctionCall(context, "toDecimal128", to_decimal128_args, input_rows_count); + + const ColumnsWithTypeAndName scale_args{ + asArgument(as_decimal128, "difference"), createConstColumnWithTypeAndName(1'000'000'000, "multiplier")}; + const auto scaled = executeFunctionCall(context, "multiply", scale_args, input_rows_count); + + const ColumnsWithTypeAndName to_int64_args{asArgument(scaled, "scaled")}; + const auto as_int64 = executeFunctionCall(context, "toInt64", to_int64_args, input_rows_count); + + const ColumnsWithTypeAndName cast_args{ + asArgument(as_int64, "as_int64"), createConstColumnWithTypeAndName(result_type->getName(), "target_type")}; + return executeFunctionCall(context, "cast", cast_args, input_rows_count).first; +} + +DataTypePtr FunctionDateTime64Diff::getReturnTypeImpl(const DataTypes & arguments) const +{ + const auto & lhs = arguments.front(); + const auto & rhs = arguments.back(); + if (!WhichDataType(*lhs).isDateTime64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected DateTime64", + lhs->getName(), + getName()); + + if (!WhichDataType(*rhs).isDateTime64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}, expected DateTime64", + rhs->getName(), + getName()); + + return std::make_shared(IntervalKind::Nanosecond); +} + +REGISTER_FUNCTION(DateTime64Diff) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/divide.cpp b/src/Functions/divide.cpp index 1d042e19b9fe..2f8751590901 100644 --- a/src/Functions/divide.cpp +++ b/src/Functions/divide.cpp @@ -13,6 +13,7 @@ struct DivideFloatingImpl { using ResultType = typename NumberTraits::ResultOfFloatingPointDivision::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/divide/divideImpl.cpp b/src/Functions/divide/divideImpl.cpp index 940f4b35df9e..966d5777c1df 100644 --- a/src/Functions/divide/divideImpl.cpp +++ b/src/Functions/divide/divideImpl.cpp @@ -1,6 +1,7 @@ /// This translation unit should be compiled multiple times /// with different values of NAMESPACE and machine flags (sse2, avx2). +/// See also #if defined(__AVX2__) #define REG_SIZE 32 #define LIBDIVIDE_AVX2 diff --git a/src/Functions/extract.cpp b/src/Functions/extract.cpp index 5d539e03daea..617594cad8dc 100644 --- a/src/Functions/extract.cpp +++ b/src/Functions/extract.cpp @@ -70,4 +70,63 @@ REGISTER_FUNCTION(Extract) factory.registerFunction(); } +struct KqlExtractImpl +{ + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + const std::string & pattern, + unsigned capture, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + res_data.reserve(data.size() / 5); + res_offsets.resize(offsets.size()); + + const Regexps::Regexp regexp = Regexps::createRegexp(pattern); + + OptimizedRegularExpression::MatchVec matches; + matches.reserve(capture + 1); + size_t prev_offset = 0; + size_t res_offset = 0; + + for (size_t i = 0; i < offsets.size(); ++i) + { + size_t cur_offset = offsets[i]; + + unsigned count + = regexp.match(reinterpret_cast(&data[prev_offset]), cur_offset - prev_offset - 1, matches, capture + 1); + if (count > capture && matches[capture].offset != std::string::npos) + { + const auto & match = matches[capture]; + res_data.resize(res_offset + match.length + 1); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &data[prev_offset + match.offset], match.length); + res_offset += match.length; + } + else + { + res_data.resize(res_offset + 1); + } + + res_data[res_offset] = 0; + ++res_offset; + res_offsets[i] = res_offset; + + prev_offset = cur_offset; + } + } +}; + +struct NameKqlExtract +{ + static constexpr auto name = "kql_extract"; +}; + +using FunctionKqlExtract = KqlStringSearchToString; + +REGISTER_FUNCTION(KqlExtract) +{ + factory.registerFunction(); +} + } diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp index 4e96391bccda..b1ea9a7fb9a5 100644 --- a/src/Functions/factorial.cpp +++ b/src/Functions/factorial.cpp @@ -18,6 +18,7 @@ struct FactorialImpl using ResultType = UInt64; static const constexpr bool allow_decimal = false; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 4db04d61d847..4c24239a06c9 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -48,7 +48,6 @@ template <> struct ActionValueTypeMap { using ActionValueTyp template <> struct ActionValueTypeMap { using ActionValueType = UInt16; }; template <> struct ActionValueTypeMap { using ActionValueType = Int32; }; template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; -// TODO(vnemkov): to add sub-second format instruction, make that DateTime64 and do some math in Action. template <> struct ActionValueTypeMap { using ActionValueType = Int64; }; @@ -113,16 +112,16 @@ class FunctionFormatDateTimeImpl : public IFunction class Action { public: - using Func = void (*)(char *, Time, const DateLUTImpl &); + using Func = void (*)(char *, Time, UInt64, UInt32, const DateLUTImpl &); Func func; size_t shift; explicit Action(Func func_, size_t shift_ = 0) : func(func_), shift(shift_) {} - void perform(char *& target, Time source, const DateLUTImpl & timezone) + void perform(char *& target, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone) { - func(target, source, timezone); + func(target, source, fractional_second, scale, timezone); target += shift; } @@ -148,30 +147,30 @@ class FunctionFormatDateTimeImpl : public IFunction } public: - static void noop(char *, Time, const DateLUTImpl &) + static void noop(char *, Time, UInt64 , UInt32 , const DateLUTImpl &) { } - static void century(char * target, Time source, const DateLUTImpl & timezone) + static void century(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { auto year = ToYearImpl::execute(source, timezone); auto century = year / 100; writeNumber2(target, century); } - static void dayOfMonth(char * target, Time source, const DateLUTImpl & timezone) + static void dayOfMonth(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToDayOfMonthImpl::execute(source, timezone)); } - static void americanDate(char * target, Time source, const DateLUTImpl & timezone) + static void americanDate(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToMonthImpl::execute(source, timezone)); writeNumber2(target + 3, ToDayOfMonthImpl::execute(source, timezone)); writeNumber2(target + 6, ToYearImpl::execute(source, timezone) % 100); } - static void dayOfMonthSpacePadded(char * target, Time source, const DateLUTImpl & timezone) + static void dayOfMonthSpacePadded(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { auto day = ToDayOfMonthImpl::execute(source, timezone); if (day < 10) @@ -180,101 +179,107 @@ class FunctionFormatDateTimeImpl : public IFunction writeNumber2(target, day); } - static void ISO8601Date(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + static void ISO8601Date(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT { writeNumber4(target, ToYearImpl::execute(source, timezone)); writeNumber2(target + 5, ToMonthImpl::execute(source, timezone)); writeNumber2(target + 8, ToDayOfMonthImpl::execute(source, timezone)); } - static void dayOfYear(char * target, Time source, const DateLUTImpl & timezone) + static void dayOfYear(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber3(target, ToDayOfYearImpl::execute(source, timezone)); } - static void month(char * target, Time source, const DateLUTImpl & timezone) + static void month(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToMonthImpl::execute(source, timezone)); } - static void dayOfWeek(char * target, Time source, const DateLUTImpl & timezone) + static void dayOfWeek(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { *target += ToDayOfWeekImpl::execute(source, timezone); } - static void dayOfWeek0To6(char * target, Time source, const DateLUTImpl & timezone) + static void dayOfWeek0To6(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { auto day = ToDayOfWeekImpl::execute(source, timezone); *target += (day == 7 ? 0 : day); } - static void ISO8601Week(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + static void ISO8601Week(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT { writeNumber2(target, ToISOWeekImpl::execute(source, timezone)); } - static void ISO8601Year2(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + static void ISO8601Year2(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT { writeNumber2(target, ToISOYearImpl::execute(source, timezone) % 100); } - static void ISO8601Year4(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + static void ISO8601Year4(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT { writeNumber4(target, ToISOYearImpl::execute(source, timezone)); } - static void year2(char * target, Time source, const DateLUTImpl & timezone) + static void year2(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToYearImpl::execute(source, timezone) % 100); } - static void year4(char * target, Time source, const DateLUTImpl & timezone) + static void year4(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber4(target, ToYearImpl::execute(source, timezone)); } - static void hour24(char * target, Time source, const DateLUTImpl & timezone) + static void hour24(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToHourImpl::execute(source, timezone)); } - static void hour12(char * target, Time source, const DateLUTImpl & timezone) + static void hour12(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { auto x = ToHourImpl::execute(source, timezone); writeNumber2(target, x == 0 ? 12 : (x > 12 ? x - 12 : x)); } - static void minute(char * target, Time source, const DateLUTImpl & timezone) + static void minute(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToMinuteImpl::execute(source, timezone)); } - static void AMPM(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + static void AMPM(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT { auto hour = ToHourImpl::execute(source, timezone); if (hour >= 12) *target = 'P'; } - static void hhmm24(char * target, Time source, const DateLUTImpl & timezone) + static void hhmm24(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToHourImpl::execute(source, timezone)); writeNumber2(target + 3, ToMinuteImpl::execute(source, timezone)); } - static void second(char * target, Time source, const DateLUTImpl & timezone) + static void second(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToSecondImpl::execute(source, timezone)); } - static void ISO8601Time(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + static void fractionalSecond(char * target, Time /*source*/, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & /*timezone*/) + { + for (Int64 i = scale, value = fractional_second; i > 0; --i, value /= 10) + target[i - 1] += value % 10; + } + + static void ISO8601Time(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT { writeNumber2(target, ToHourImpl::execute(source, timezone)); writeNumber2(target + 3, ToMinuteImpl::execute(source, timezone)); writeNumber2(target + 6, ToSecondImpl::execute(source, timezone)); } - static void timezoneOffset(char * target, Time source, const DateLUTImpl & timezone) + static void timezoneOffset(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { auto offset = TimezoneOffsetImpl::execute(source, timezone); if (offset < 0) @@ -287,7 +292,7 @@ class FunctionFormatDateTimeImpl : public IFunction writeNumber2(target + 3, offset % 3600 / 60); } - static void quarter(char * target, Time source, const DateLUTImpl & timezone) + static void quarter(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { *target += ToQuarterImpl::execute(source, timezone); } @@ -426,9 +431,15 @@ class FunctionFormatDateTimeImpl : public IFunction String pattern = pattern_column->getValue(); + UInt32 scale [[maybe_unused]] = 0; + if constexpr (std::is_same_v) + { + scale = times->getScale(); + } + using T = typename ActionValueTypeMap::ActionValueType; std::vector> instructions; - String pattern_to_fill = parsePattern(pattern, instructions); + String pattern_to_fill = parsePattern(pattern, instructions, scale); size_t result_size = pattern_to_fill.size(); const DateLUTImpl * time_zone_tmp = nullptr; @@ -444,12 +455,6 @@ class FunctionFormatDateTimeImpl : public IFunction const DateLUTImpl & time_zone = *time_zone_tmp; const auto & vec = times->getData(); - UInt32 scale [[maybe_unused]] = 0; - if constexpr (std::is_same_v) - { - scale = times->getScale(); - } - auto col_res = ColumnString::create(); auto & dst_data = col_res->getChars(); auto & dst_offsets = col_res->getOffsets(); @@ -484,16 +489,16 @@ class FunctionFormatDateTimeImpl : public IFunction { if constexpr (std::is_same_v) { + const auto c = DecimalUtils::split(vec[i], scale); for (auto & instruction : instructions) { - const auto c = DecimalUtils::split(vec[i], scale); - instruction.perform(pos, static_cast(c.whole), time_zone); + instruction.perform(pos, static_cast(c.whole), c.fractional, scale, time_zone); } } else { for (auto & instruction : instructions) - instruction.perform(pos, static_cast(vec[i]), time_zone); + instruction.perform(pos, static_cast(vec[i]), 0, 0, time_zone); } dst_offsets[i] = pos - begin; @@ -504,7 +509,7 @@ class FunctionFormatDateTimeImpl : public IFunction } template - String parsePattern(const String & pattern, std::vector> & instructions) const + String parsePattern(const String & pattern, std::vector> & instructions, UInt32 scale) const { String result; @@ -573,6 +578,16 @@ class FunctionFormatDateTimeImpl : public IFunction result.append(" 0"); break; + // Fractional seconds + case 'f': + { + /// If the time data type has no fractional part, then we print '0' as the fractional part. + const auto actual_scale = std::max(1, scale); + instructions.emplace_back(&Action::fractionalSecond, actual_scale); + result.append(actual_scale, '0'); + break; + } + // Short YYYY-MM-DD date, equivalent to %Y-%m-%d 2001-08-23 case 'F': instructions.emplace_back(&Action::ISO8601Date, 10); diff --git a/src/Functions/greatest.cpp b/src/Functions/greatest.cpp index 93fd7e24853f..a9d4637884af 100644 --- a/src/Functions/greatest.cpp +++ b/src/Functions/greatest.cpp @@ -12,6 +12,7 @@ struct GreatestBaseImpl { using ResultType = NumberTraits::ResultOfGreatest; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template @@ -43,6 +44,7 @@ struct GreatestSpecialImpl { using ResultType = make_unsigned_t; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/hasToken.cpp b/src/Functions/hasToken.cpp index 646ff0b54f73..ba1ff9b3ab59 100644 --- a/src/Functions/hasToken.cpp +++ b/src/Functions/hasToken.cpp @@ -1,26 +1,32 @@ +#include "FunctionFactory.h" #include "FunctionsStringSearch.h" -#include #include "HasTokenImpl.h" -#include +#include -namespace DB -{ namespace { - struct NameHasToken { static constexpr auto name = "hasToken"; }; -using FunctionHasToken = FunctionsStringSearch>; +struct NameHasTokenOrNull +{ + static constexpr auto name = "hasTokenOrNull"; +}; +using FunctionHasToken = DB::FunctionsStringSearch>; +using FunctionHasTokenOrNull = DB:: + FunctionsStringSearch, DB::ExecutionErrorPolicy::Null>; } REGISTER_FUNCTION(HasToken) { - factory.registerFunction(); -} + factory.registerFunction( + {"Performs lookup of needle in haystack using tokenbf_v1 index."}, DB::FunctionFactory::CaseSensitive); + factory.registerFunction( + {"Performs lookup of needle in haystack using tokenbf_v1 index. Returns null if needle is ill-formed."}, + DB::FunctionFactory::CaseSensitive); } diff --git a/src/Functions/hasTokenCaseInsensitive.cpp b/src/Functions/hasTokenCaseInsensitive.cpp index 0012ea3e1483..4fc19ca07844 100644 --- a/src/Functions/hasTokenCaseInsensitive.cpp +++ b/src/Functions/hasTokenCaseInsensitive.cpp @@ -1,27 +1,34 @@ +#include "FunctionFactory.h" #include "FunctionsStringSearch.h" -#include #include "HasTokenImpl.h" -#include +#include -namespace DB -{ namespace { - struct NameHasTokenCaseInsensitive { static constexpr auto name = "hasTokenCaseInsensitive"; }; -using FunctionHasTokenCaseInsensitive - = FunctionsStringSearch>; +struct NameHasTokenCaseInsensitiveOrNull +{ + static constexpr auto name = "hasTokenCaseInsensitiveOrNull"; +}; +using FunctionHasTokenCaseInsensitive + = DB::FunctionsStringSearch>; +using FunctionHasTokenCaseInsensitiveOrNull = DB::FunctionsStringSearch< + DB::HasTokenImpl, + DB::ExecutionErrorPolicy::Null>; } REGISTER_FUNCTION(HasTokenCaseInsensitive) { - factory.registerFunction(); -} + factory.registerFunction( + {"Performs case insensitive lookup of needle in haystack using tokenbf_v1 index."}, DB::FunctionFactory::CaseInsensitive); + factory.registerFunction( + {"Performs case insensitive lookup of needle in haystack using tokenbf_v1 index. Returns null if needle is ill-formed."}, + DB::FunctionFactory::CaseInsensitive); } diff --git a/src/Functions/intDivOrZero.cpp b/src/Functions/intDivOrZero.cpp index 96ff6ea80fc4..708186dadfb4 100644 --- a/src/Functions/intDivOrZero.cpp +++ b/src/Functions/intDivOrZero.cpp @@ -10,6 +10,7 @@ struct DivideIntegralOrZeroImpl { using ResultType = typename NumberTraits::ResultOfIntegerDivision::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/intExp10.cpp b/src/Functions/intExp10.cpp index 6e59a41c50be..31d4c584bf7d 100644 --- a/src/Functions/intExp10.cpp +++ b/src/Functions/intExp10.cpp @@ -17,7 +17,8 @@ template struct IntExp10Impl { using ResultType = UInt64; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType apply([[maybe_unused]] A a) diff --git a/src/Functions/intExp2.cpp b/src/Functions/intExp2.cpp index e39647b6c38e..35f646ef233b 100644 --- a/src/Functions/intExp2.cpp +++ b/src/Functions/intExp2.cpp @@ -18,7 +18,8 @@ template struct IntExp2Impl { using ResultType = UInt64; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType apply([[maybe_unused]] A a) diff --git a/src/Functions/least.cpp b/src/Functions/least.cpp index f5680d4d468f..2de1805dc2cc 100644 --- a/src/Functions/least.cpp +++ b/src/Functions/least.cpp @@ -12,6 +12,7 @@ struct LeastBaseImpl { using ResultType = NumberTraits::ResultOfLeast; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template @@ -43,6 +44,7 @@ struct LeastSpecialImpl { using ResultType = std::make_signed_t; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp index 04877a42b18d..32e8e053df67 100644 --- a/src/Functions/minus.cpp +++ b/src/Functions/minus.cpp @@ -10,6 +10,7 @@ struct MinusImpl { using ResultType = typename NumberTraits::ResultOfSubtraction::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index be052b25af4f..be6cb989d10d 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -1,16 +1,7 @@ #include #include -#if defined(__SSE2__) -# define LIBDIVIDE_SSE2 -#elif defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__) -# define LIBDIVIDE_AVX512 -#elif defined(__AVX2__) -# define LIBDIVIDE_AVX2 -#elif defined(__aarch64__) && defined(__ARM_NEON) -# define LIBDIVIDE_NEON -#endif - +#include #include diff --git a/src/Functions/moduloOrZero.cpp b/src/Functions/moduloOrZero.cpp index 3551ae74c5f1..dae58a7f7dd1 100644 --- a/src/Functions/moduloOrZero.cpp +++ b/src/Functions/moduloOrZero.cpp @@ -4,14 +4,12 @@ namespace DB { -namespace -{ - template struct ModuloOrZeroImpl { using ResultType = typename NumberTraits::ResultOfModulo::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template @@ -39,8 +37,6 @@ struct ModuloOrZeroImpl struct NameModuloOrZero { static constexpr auto name = "moduloOrZero"; }; using FunctionModuloOrZero = BinaryArithmeticOverloadResolver; -} - REGISTER_FUNCTION(ModuloOrZero) { factory.registerFunction(); diff --git a/src/Functions/multiply.cpp b/src/Functions/multiply.cpp index 4dc8cd10f317..535c0a1758de 100644 --- a/src/Functions/multiply.cpp +++ b/src/Functions/multiply.cpp @@ -11,6 +11,7 @@ struct MultiplyImpl { using ResultType = typename NumberTraits::ResultOfAdditionMultiplication::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/negate.cpp b/src/Functions/negate.cpp index 731c0766e9db..8ae3eb08ac57 100644 --- a/src/Functions/negate.cpp +++ b/src/Functions/negate.cpp @@ -9,7 +9,8 @@ template struct NegateImpl { using ResultType = std::conditional_t, A, typename NumberTraits::ResultOfNegate::Type>; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp index cd9cf6cec5c3..4f792e569ce8 100644 --- a/src/Functions/plus.cpp +++ b/src/Functions/plus.cpp @@ -10,6 +10,7 @@ struct PlusImpl { using ResultType = typename NumberTraits::ResultOfAdditionMultiplication::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; static const constexpr bool is_commutative = true; diff --git a/src/Functions/roundAge.cpp b/src/Functions/roundAge.cpp index d2503bb6938a..aaa1131da639 100644 --- a/src/Functions/roundAge.cpp +++ b/src/Functions/roundAge.cpp @@ -10,7 +10,8 @@ template struct RoundAgeImpl { using ResultType = UInt8; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType apply(A x) diff --git a/src/Functions/roundDuration.cpp b/src/Functions/roundDuration.cpp index 62d35ea194d2..4fa78ceec835 100644 --- a/src/Functions/roundDuration.cpp +++ b/src/Functions/roundDuration.cpp @@ -10,7 +10,8 @@ template struct RoundDurationImpl { using ResultType = UInt16; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType apply(A x) diff --git a/src/Functions/roundToExp2.cpp b/src/Functions/roundToExp2.cpp index 7893773fb617..67762ab6cccd 100644 --- a/src/Functions/roundToExp2.cpp +++ b/src/Functions/roundToExp2.cpp @@ -63,7 +63,8 @@ template struct RoundToExp2Impl { using ResultType = T; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline T apply(T x) diff --git a/src/Functions/sign.cpp b/src/Functions/sign.cpp index 60ad6ba53654..63a9f31af396 100644 --- a/src/Functions/sign.cpp +++ b/src/Functions/sign.cpp @@ -10,6 +10,7 @@ struct SignImpl { using ResultType = Int8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 3054cf280d9b..f116f89afd1d 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -28,7 +29,11 @@ namespace ErrorCodes namespace { - constexpr auto function_name = "toStartOfInterval"; + enum class ExecutionErrorPolicy + { + Null, + Throw + }; template struct Transform; @@ -36,22 +41,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfYearInterval(DayNum(d), years); } - static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); } - static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); } - static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); } @@ -60,22 +65,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); } - static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); } - static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); } - static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); } @@ -84,22 +89,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfMonthInterval(DayNum(d), months); } - static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); } - static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); } - static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); } @@ -108,22 +113,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfWeekInterval(DayNum(d), weeks); } - static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); } - static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); } - static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); } @@ -132,22 +137,22 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64, const char*) { return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); } - static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64, const char*) { return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); } - static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64, const char*) { return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); } - static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); } @@ -156,16 +161,16 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfHourInterval(t, hours); } - static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); } @@ -174,16 +179,16 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfMinuteInterval(t, minutes); } - static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); } @@ -192,16 +197,16 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfSecondInterval(t, seconds); } - static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); } @@ -210,13 +215,13 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateTimeIsNotSupported(function_name); } - static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier, const char*) { if (scale_multiplier < 1000) { @@ -247,13 +252,13 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateTimeIsNotSupported(function_name); } - static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier, const char*) { if (scale_multiplier < 1000000) { @@ -284,13 +289,13 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateTimeIsNotSupported(function_name); } - static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier, const char*) { if (scale_multiplier < 1000000000) { @@ -310,12 +315,23 @@ namespace } }; +template class FunctionToStartOfInterval : public IFunction { public: static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static constexpr auto name = function_name; + static constexpr auto name = std::invoke( + [] + { + if (execution_error_policy == ExecutionErrorPolicy::Null) + return "toStartOfIntervalOrNull"; + else if (execution_error_policy == ExecutionErrorPolicy::Throw) + return "toStartOfInterval"; + + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unhandled execution policy"); + }); + String getName() const override { return name; } bool isVariadic() const override { return true; } @@ -387,36 +403,43 @@ class FunctionToStartOfInterval : public IFunction ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } - if (result_type_is_date) - return std::make_shared(); - else if (result_type_is_datetime) - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); - else - { - auto scale = 0; + auto return_type = std::invoke( + [&arguments, &interval_type, &result_type_is_date, &result_type_is_datetime]() -> std::shared_ptr + { + if (result_type_is_date) + return std::make_shared(); + else if (result_type_is_datetime) + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + else + { + auto scale = 0; - if (interval_type->getKind() == IntervalKind::Nanosecond) - scale = 9; - else if (interval_type->getKind() == IntervalKind::Microsecond) - scale = 6; - else if (interval_type->getKind() == IntervalKind::Millisecond) - scale = 3; + if (interval_type->getKind() == IntervalKind::Nanosecond) + scale = 9; + else if (interval_type->getKind() == IntervalKind::Microsecond) + scale = 6; + else if (interval_type->getKind() == IntervalKind::Millisecond) + scale = 3; - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); - } + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + } + }); + + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + return makeNullable(return_type); + return return_type; } bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t) const override { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); - auto result_column = dispatchForColumns(time_column, interval_column, result_type, time_zone); - return result_column; + return dispatchForColumns(time_column, interval_column, result_type, time_zone); } bool hasInformationAboutMonotonicity() const override @@ -481,10 +504,8 @@ class FunctionToStartOfInterval : public IFunction if (!interval_column_const_int64) throw Exception( "Illegal column for second argument of function " + getName() + ", must be a const interval of time.", ErrorCodes::ILLEGAL_COLUMN); - Int64 num_units = interval_column_const_int64->getValue(); - if (num_units <= 0) - throw Exception("Value for second argument of function " + getName() + " must be positive.", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + const auto num_units = interval_column_const_int64->getValue(); switch (interval_type->getKind()) { case IntervalKind::Nanosecond: @@ -521,18 +542,54 @@ class FunctionToStartOfInterval : public IFunction using ToFieldType = typename ToDataType::FieldType; const auto & time_data = time_column_type.getData(); - size_t size = time_data.size(); + const auto size = time_data.size(); auto result_col = result_type->createColumn(); - auto *col_to = assert_cast(result_col.get()); - auto & result_data = col_to->getData(); - result_data.resize(size); + auto [result_null_map_data, result_value_data] = std::invoke( + [&result_col]() -> std::pair + { + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + { + auto & nullable_column = assert_cast(*result_col); + auto & nested_column = assert_cast(nullable_column.getNestedColumn()); + return {&nullable_column.getNullMapData(), nested_column.getData()}; + } + else if constexpr (execution_error_policy == ExecutionErrorPolicy::Throw) + { + auto & target_column = assert_cast(*result_col); + return {nullptr, target_column.getData()}; + } + }); + + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + result_null_map_data->resize(size, true); + + result_value_data.resize(size); + if (num_units <= 0) + { + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + return result_col; + else if constexpr (execution_error_policy == ExecutionErrorPolicy::Throw) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for second argument of function {} must be positive.", getName()); + } - Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); + const auto scale_multiplier = DecimalUtils::scaleMultiplier(scale); for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast( - Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); + { + try + { + result_value_data[i] + = static_cast(Transform::execute(time_data[i], num_units, time_zone, scale_multiplier, name)); + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + (*result_null_map_data)[i] = false; + } + catch (...) + { + if constexpr (execution_error_policy == ExecutionErrorPolicy::Throw) + throw; + } + } return result_col; } @@ -542,7 +599,8 @@ class FunctionToStartOfInterval : public IFunction REGISTER_FUNCTION(ToStartOfInterval) { - factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); } } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 22229c0d6c21..bc93abff5345 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #include #include +#include #include #include @@ -1831,7 +1833,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( ssize_t where_step_num = -1; ssize_t having_step_num = -1; - auto finalize_chain = [&](ExpressionActionsChain & chain) + auto finalize_chain = [&](ExpressionActionsChain & chain) -> ColumnsWithTypeAndName { if (prewhere_step_num >= 0) { @@ -1852,7 +1854,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); + auto res = chain.getLastStep().getResultColumns(); chain.clear(); + return res; }; { @@ -1970,7 +1974,55 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (settings.group_by_use_nulls) query_analyzer.appendGroupByModifiers(before_aggregation, chain, only_types); - finalize_chain(chain); + auto columns_before_aggregation = finalize_chain(chain); + + /// Here we want to check that columns after aggregation have the same type as + /// were promised in query_analyzer.aggregated_columns + /// Ideally, they should be equal. In practice, this may be not true. + /// As an example, we don't build sets for IN inside ExpressionAnalysis::analyzeAggregation, + /// so that constant folding for expression (1 in 1) will not work. This may change the return type + /// for functions with LowCardinality argument: function "substr(toLowCardinality('abc'), 1 IN 1)" + /// should usually return LowCardinality(String) when (1 IN 1) is constant, but without built set + /// for (1 IN 1) constant is not propagated and "substr" returns String type. + /// See 02503_in_lc_const_args_bug.sql + /// + /// As a temporary solution, we add converting actions to the next chain. + /// Hopefully, later we can + /// * use a new analyzer where this issue is absent + /// * or remove ExpressionActionsChain completely and re-implement its logic on top of the query plan + { + for (auto & col : columns_before_aggregation) + if (!col.column) + col.column = col.type->createColumn(); + + Block header_before_aggregation(std::move(columns_before_aggregation)); + + auto keys = query_analyzer.aggregationKeys().getNames(); + const auto & aggregates = query_analyzer.aggregates(); + + bool has_grouping = query_analyzer.group_by_kind != GroupByKind::ORDINARY; + auto actual_header = Aggregator::Params::getHeader( + header_before_aggregation, /*only_merge*/ false, keys, aggregates, /*final*/ true); + actual_header = AggregatingStep::appendGroupingColumn( + std::move(actual_header), keys, has_grouping, settings.group_by_use_nulls); + + Block expected_header; + for (const auto & expected : query_analyzer.aggregated_columns) + expected_header.insert(ColumnWithTypeAndName(expected.type, expected.name)); + + if (!blocksHaveEqualStructure(actual_header, expected_header)) + { + auto converting = ActionsDAG::makeConvertingActions( + actual_header.getColumnsWithTypeAndName(), + expected_header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name, + true); + + auto & step = chain.lastStep(query_analyzer.aggregated_columns); + auto & actions = step.actions(); + actions = ActionsDAG::merge(std::move(*actions), std::move(*converting)); + } + } if (query_analyzer.appendHaving(chain, only_types || !second_stage)) { diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index b5b8ae81366a..720a179a9d47 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes { extern const int TABLE_IS_READ_ONLY; extern const int SUPPORT_IS_DISABLED; + extern const int BAD_ARGUMENTS; } @@ -58,8 +59,7 @@ BlockIO InterpreterDeleteQuery::execute() auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto merge_tree = std::dynamic_pointer_cast(table); - if (!merge_tree) + if (table->supportsDelete()) { /// Convert to MutationCommand MutationCommands mutation_commands; @@ -75,39 +75,45 @@ BlockIO InterpreterDeleteQuery::execute() table->mutate(mutation_commands, getContext()); return {}; } + else if (table->supportsLightweightDelete()) + { + if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it"); + + /// Convert to MutationCommand + MutationCommands mutation_commands; + MutationCommand mut_command; + + /// Build "UPDATE _row_exists = 0 WHERE predicate" query + mut_command.type = MutationCommand::Type::UPDATE; + mut_command.predicate = delete_query.predicate; + + auto command = std::make_shared(); + command->type = ASTAlterCommand::UPDATE; + command->predicate = delete_query.predicate; + command->update_assignments = std::make_shared(); + auto set_row_does_not_exist = std::make_shared(); + set_row_does_not_exist->column_name = LightweightDeleteDescription::FILTER_COLUMN.name; + auto zero_value = std::make_shared(DB::Field(UInt8(0))); + set_row_does_not_exist->children.push_back(zero_value); + command->update_assignments->children.push_back(set_row_does_not_exist); + command->children.push_back(command->predicate); + command->children.push_back(command->update_assignments); + mut_command.column_to_update_expression[set_row_does_not_exist->column_name] = zero_value; + mut_command.ast = command->ptr(); + + mutation_commands.emplace_back(mut_command); + + table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); + table->mutate(mutation_commands, getContext()); - if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it"); - - /// Convert to MutationCommand - MutationCommands mutation_commands; - MutationCommand mut_command; - - /// Build "UPDATE _row_exists = 0 WHERE predicate" query - mut_command.type = MutationCommand::Type::UPDATE; - mut_command.predicate = delete_query.predicate; - - auto command = std::make_shared(); - command->type = ASTAlterCommand::UPDATE; - command->predicate = delete_query.predicate; - command->update_assignments = std::make_shared(); - auto set_row_does_not_exist = std::make_shared(); - set_row_does_not_exist->column_name = LightweightDeleteDescription::FILTER_COLUMN.name; - auto zero_value = std::make_shared(DB::Field(UInt8(0))); - set_row_does_not_exist->children.push_back(zero_value); - command->update_assignments->children.push_back(set_row_does_not_exist); - command->children.push_back(command->predicate); - command->children.push_back(command->update_assignments); - mut_command.column_to_update_expression[set_row_does_not_exist->column_name] = zero_value; - mut_command.ast = command->ptr(); - - mutation_commands.emplace_back(mut_command); - - table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); - table->mutate(mutation_commands, getContext()); - - return {}; + return {}; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "DELETE query is not supported for table"); + } } } diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 26b8bce1f4a7..e363366b1a8b 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -434,6 +434,13 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) const ProjectionsDescription & projections_desc = metadata_snapshot->getProjections(); NamesAndTypesList all_columns = columns_desc.getAllPhysical(); + /// Add _row_exists column if it is physically present in the part + if (auto part_storage = dynamic_pointer_cast(storage)) + { + if (part_storage->hasLightweightDeletedMask()) + all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); + } + NameSet updated_columns; bool materialize_ttl_recalculate_only = materializeTTLRecalculateOnly(storage); diff --git a/src/Interpreters/createBlockSelector.cpp b/src/Interpreters/createBlockSelector.cpp index fce9833ddfbe..659fc483373b 100644 --- a/src/Interpreters/createBlockSelector.cpp +++ b/src/Interpreters/createBlockSelector.cpp @@ -5,16 +5,7 @@ #include -#if defined(__SSE2__) -# define LIBDIVIDE_SSE2 -#elif defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__) -# define LIBDIVIDE_AVX512 -#elif defined(__AVX2__) -# define LIBDIVIDE_AVX2 -#elif defined(__aarch64__) && defined(__ARM_NEON) -# define LIBDIVIDE_NEON -#endif - +#include #include diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 2bd204a0d424..d26bd1fa3b33 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -357,12 +357,26 @@ static std::tuple executeQueryImpl( /// Parse the query from string. try { - if (settings.dialect == Dialect::kusto && !internal) + const Dialect & dialect = settings.dialect; + + if (dialect == Dialect::kusto && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else if (dialect == Dialect::kusto_auto && !internal) + { + try { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + catch(...) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } } else { diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73d46593e042..1648abdbf55d 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -4,6 +4,7 @@ add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) add_headers_and_sources(clickhouse_parsers ./Kusto) +add_headers_and_sources(clickhouse_parsers ./Kusto/KustoFunctions) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 74d142924596..153046a610d9 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -46,7 +46,7 @@ #include #include - +#include namespace DB { @@ -109,30 +109,38 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserSelectWithUnionQuery select; ParserExplainQuery explain; - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - ++pos; - ASTPtr result_node = nullptr; + ParserKeyword s_kql("KQL"); - if (ASTPtr select_node; select.parse(pos, select_node, expected)) + if (s_kql.ignore(pos, expected)) { - result_node = std::move(select_node); + if (!ParserKQLTaleFunction().parse(pos, result_node, expected)) + return false; } - else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected)) + else { - /// Replace SELECT * FROM (EXPLAIN SELECT ...) with SELECT * FROM viewExplain(EXPLAIN SELECT ...) - result_node = buildSelectFromTableFunction(makeASTFunction("viewExplain", explain_node)); - } - else - { - return false; - } + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; + if (ASTPtr select_node; select.parse(pos, select_node, expected)) + { + result_node = std::move(select_node); + } + else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected)) + { + /// Replace SELECT * FROM (EXPLAIN SELECT ...) with SELECT * FROM viewExplain(EXPLAIN SELECT ...) + result_node = buildSelectFromTableFunction(makeASTFunction("viewExplain", explain_node)); + } + else + { + return false; + } + + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + } node = std::make_shared(); node->children.push_back(result_node); @@ -142,6 +150,16 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + /// 'kql(' is used for subuquery in Kusto, should not be treated as an identifier if kql followd by ( + ParserKeyword s_kql("KQL"); + if (s_kql.ignore(pos, expected)) + { + if (pos->type == TokenType::OpeningRoundBracket) + { --pos; + return false; + } + --pos; + } /// Identifier in backquotes or in double quotes if (pos->type == TokenType::QuotedIdentifier) { diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index cbbee4a04e3f..d726d77239ef 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -662,6 +662,26 @@ class Layer } else { + /// enable using subscript operator for kql_array_sort + if (cur_op.function_name == "arrayElement" && !operands.empty()) + { + auto* first_arg_as_node = operands.front()->as(); + if (first_arg_as_node) + { + if (first_arg_as_node->name == "kql_array_sort_asc" || first_arg_as_node->name == "kql_array_sort_desc") + { + cur_op.function_name = "tupleElement"; + cur_op.type = OperatorType::TupleElement; + } + else if (first_arg_as_node->name == "arrayElement" && !first_arg_as_node->arguments->children.empty()) + { + auto arg_inside = first_arg_as_node->arguments->children[0]->as(); + if (arg_inside && (arg_inside->name == "kql_array_sort_asc" || arg_inside->name == "kql_array_sort_desc")) + first_arg_as_node->name = "tupleElement"; + } + } + } + function = makeASTFunction(cur_op); if (!popLastNOperands(function->children[0]->children, cur_op.arity)) @@ -2449,8 +2469,9 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos { layers.back()->pushOperand(std::move(tmp)); } - else if (pos->type == TokenType::OpeningRoundBracket) + else if (pos->type == TokenType::OpeningRoundBracket || String(pos->begin , pos->end) == "kql") { + if (subquery_parser.parse(pos, tmp, expected)) { layers.back()->pushOperand(std::move(tmp)); diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md new file mode 100644 index 000000000000..a42dec76aeaf --- /dev/null +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -0,0 +1,1371 @@ +## KQL implemented features + +# January XX, 2023 +## Functions +- [range()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/rangefunction) +Difference from ADX: + Return empty array [] if range is empty, while ADX return NULL + The maxamum number of elements of array is 1000000 (limitation of clickhouse), 1,048,576 in ADX +``` +print '-- range function int, int, int --'; +print range(1, 10, 2); +print '-- range function int, int --'; +print range(1, 10); +print '-- range function float, float, float --'; +print range(1.2, 10.3, 2.2); +print '-- range function postive float, float, int --'; +print range(1.2, 10.3, 2); +print '-- range function postive float, int, float --'; +print range(1.2, 10, 2.2); +print '-- range function postive integer, int, float --'; +print range(1, 10, 2.2); +print '-- range function postive intger, float, float --'; +print range(1, 10.5, 2.2); +print '-- range function postive float, int, int --'; +print range(1.2, 10, 2); +print '-- range function postive int, int, negative int --'; +print range(12, 3, -2); +print '-- range function postive float, int, negative float --'; +print range(12.8, 3, -2.3); +print '-- range function datetime, datetime, timespan --'; +print range(datetime('2001-01-01'), datetime('2001-01-02'), 5h); +print '-- range function datetime, datetime, negative timespan --'; +print range(datetime('2001-01-03'), datetime('2001-01-02'), -5h); +print '-- range function datetime, datetime --'; +print range(datetime('2001-01-01'), datetime('2001-01-02')); +print '-- range function timespan, timespan, timespan --'; +print range(1h, 5h, 2h); +print '-- range function timespan, timespan --'; +print range(1h, 5h); +print '-- range function timespan, timespan, negative timespan --'; +print range(11h, 5h, -2h); +print '-- range function float timespan, timespan, timespan --'; +print range(1.5h, 5h, 2h); +print '-- range function endofday, endofday, timespan --'; +print range(endofday(datetime(2017-01-01 10:10:17)), endofday(datetime(2017-01-03 10:10:17)), 1d); +``` + +## Case Insensitive Operators +- [in~](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/inoperator) + `print t = 'a' in~ ('A', 'b', 'c')` + `Customers | where FirstName in~ ((Customers | project FirstName | where FirstName == 'Peter'))` +- [!in~](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/not-in-operator) + `print t = 'a' !in~ (dynamic(['A', 'b', 'c']))` + `Customers | where FirstName !in~ ('peter', 'apple')` +- [=~](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/equals-operator) + `Customers | where FirstName =~ 'peter' and LastName =~ 'naRA'` +- [!~](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/not-equals-operator) + `Customers | where FirstName !~ 'nEyMaR' and LastName =~ 'naRA'` +## Aggregate Functions +- [take_any()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/take-any-aggfunction) + ``` + Note: * is not currently a supported argument. + ``` +- [take_anyif()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/take-anyif-aggfunction) +## Operator +- [range](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/rangeoperator) + `range LastWeek from ago(7d) to now() step 1d` + `range Steps from 1 to 8 step 3` +- [top-nested](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/topnestedoperator) + + ``` + CREATE TABLE sales + (salesdate String,salesperson String,region String,amount UInt32) ENGINE = Memory; + + INSERT INTO sales VALUES ( '12/31/1995','Robert','ON-Ontario',1); + INSERT INTO sales VALUES ( '12/31/1995','Joseph','ON-Ontario',2); + INSERT INTO sales VALUES ( '12/31/1995','Joseph','QC-Quebec',3); + INSERT INTO sales VALUES ( '12/31/1995','Joseph','MA-Manitoba',4); + INSERT INTO sales VALUES ( '12/31/1995','Steven','QC-Quebec',5); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','ON-Ontario',6); + INSERT INTO sales VALUES ( '03/29/1996','Robert','QC-Quebec',7); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','ON-Ontario',8); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','BC-British Columbia',9); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','QC-Quebec',10); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','MA-Manitoba',11); + INSERT INTO sales VALUES ( '03/29/1996','Steven','ON-Ontario',12); + INSERT INTO sales VALUES ( '03/29/1996','Steven','QC-Quebec',13); + INSERT INTO sales VALUES ( '03/29/1996','Steven','MA-Manitoba',14); + INSERT INTO sales VALUES ( '03/30/1996','Robert','ON-Ontario',15); + INSERT INTO sales VALUES ( '03/30/1996','Robert','QC-Quebec',16); + INSERT INTO sales VALUES ( '03/30/1996','Robert','MA-Manitoba',17); + INSERT INTO sales VALUES ( '03/30/1996','Joseph','ON-Ontario',18); + INSERT INTO sales VALUES ( '03/30/1996','Joseph','BC-British Columbia',19); + INSERT INTO sales VALUES ( '03/30/1996','Joseph','QC-Quebec',20); + INSERT INTO sales VALUES ( '03/30/1996','Joseph','MA-Manitoba',21); + INSERT INTO sales VALUES ( '03/30/1996','Steven','ON-Ontario',22); + INSERT INTO sales VALUES ( '03/30/1996','Steven','QC-Quebec',23); + INSERT INTO sales VALUES ( '03/30/1996','Steven','MA-Manitoba',24); + INSERT INTO sales VALUES ( '03/31/1996','Robert','MA-Manitoba',25); + INSERT INTO sales VALUES ( '03/31/1996','Thomas','ON-Ontario',26); + INSERT INTO sales VALUES ( '03/31/1996','Thomas','BC-British Columbia',27); + INSERT INTO sales VALUES ( '03/31/1996','Thomas','QC-Quebec',28); + INSERT INTO sales VALUES ( '03/31/1996','Thomas','MA-Manitoba',29); + INSERT INTO sales VALUES ( '03/31/1996','Steven','ON-Ontario',30); + + print '-- top 3 regions by sales--'; + sales | top-nested 3 of region by sum(amount); + + print '-- top 2 salespeople in each of these regions?--'; + sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount); + + print '--top 3 and other regions by sales--'; + sales | top-nested 3 of region with others = 'all other region' by sum(amount); + + print '--top 3 and other regions by sales and top 2 and other salespeople in each of these regions--'; + sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount); + + print '--top 3 and other regions by sales and top 2 salespeople in each of these regions--'; + sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson by sum(amount) + + print '--top 3 regions by sales and top 2 and other salespeople in each of these regions--'; + sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount); + + print '--top 3 regions by difference between max sales and min sales--'; + sales | top-nested 3 of region by sum(amount) - min(amount); + + print '-- top 3 regions using abbreviations by sales--'; + sales | top-nested 3 of substring(region, 0, 2) by sum(amount); + + print '-- all top regions by sales--'; + sales | top-nested of region by sum(amount); + ``` + +## Bugs +- [KQL Phase 2 - base64_encode_fromguid encodes strings as opposed to binary] +- [KQL Phase 2: summarize with bin and format_datetime] +- [make_datetime creates wrong date time] +- [KQL Phase 2: summarize using bin has different result than Azure Data Explorer using the same sample data] +- [KQL Phase 3: datetime should be rounded in certain cases] +- [kql_bin does not accept DateTime type] +- [KQL Phase 2 - totimespan should return null when convertion fails.] +- [reverse() with datetime and timespan arguments needs to be improved.] +- [String operator has throws exception when needle has white space or separator characters] + + +# December 7, 2022 + +## Functions +- [count_distinct](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/count-distinct-aggfunction) + `Customers | summarize count_distinct(Education);` +- [count_distinctif](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/count-distinctif-aggfunction) + `Customers | summarize count_distinctif(Education, Age > 30);` +- [iff](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/ifffunction) + `Customers | extend t = iff(Age <= 10, "smaller", "bigger");` +- [iif](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/iiffunction) + `Customers | extend t = iif(Age <= 10, "smaller", "bigger");` +## bug fixed +- [indexOf function doesn't work for extended parameters] +- [Create generic function for time arithmetic] +- [KQL Phase 2: tolong should return the number of ticks when supplied with a timespan] + +# November 23, 2022 + +## Operator +- [join](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/joinoperator?pivots=azuredataexplorer) + ``` + CREATE TABLE X (Key String, Value1 Int64) ENGINE = Memory; + INSERT INTO X VALUES ('a',1); + INSERT INTO X VALUES ('b',2); + INSERT INTO X VALUES ('b',3); + INSERT INTO X VALUES ('c',4); + + CREATE TABLE Y (Key String, Value2 Int64) ENGINE = Memory; + INSERT INTO Y VALUES ('b',10); + INSERT INTO Y VALUES ('c',20); + INSERT INTO Y VALUES ('c',30); + INSERT INTO Y VALUES ('d',40); + + Join flavor : + + Default join is innerunique + X | join Y on $left.Key == $right.Key ; + X | join kind=innerunique Y on Key ; + + Inner-join + X | join kind=inner Y on Key ; + + Left outer-join + X | join kind=leftouter Y on Key ; + + Right outer-join + X | join kind=rightouter Y on Key ; + + Full outer-join + X | join kind=fullouter Y on Key ; + + Left anti-join + X | join kind=leftanti Y on Key ; + + Right anti-join + X | join kind=rightanti Y on Key ; + + Left semi-join + X | join kind=leftsemi Y on Key ; + + Right semi-join + X | join kind=rightsemi Y on Key ; + ``` + **Deviation from ADX** + Becasue of the limitation between KQL and SQL. the result may different from ADX.(KQL-CH take the result of ClickHouse) + - columns + ADX : common columns are duplicatedc in output + KQL-CH : only one column for common columns + - column name + ADX : column with same name (not common) ->column1 + KQL-CH : column with same name (not common) -> right_.column + - filters + ADX: Kusto is optimized to push filters that come after the join, towards the appropriate join side, left or right, when possible + KQL-CH: because in the domanin of KQL, does not know the schema of tables, so the push need to manualy done by user, like: + ``` + t1|join kind = innerunique t2 on key | where value == 'val1.2' + ``` + need to chang as the fowllowing by user(if user want) : + ``` + t1| where value == 'val1.2' | join kind = innerunique t2 on key + ``` + - semi join flavor + ADX : only returns left side or right side columns + KQL-CH : returns columns from both side + - Join hints : not supported yet +- [lookup](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/lookupoperator) + lookup is a subset of join, only support : kind=leftouter and kind=inner. if kind unspecified, kind=leftouter + ``` + DROP TABLE IF EXISTS FactTable; + CREATE TABLE FactTable (Row String, Personal String, Family String) ENGINE = Memory; + INSERT INTO FactTable VALUES ('1', 'Bill', 'Gates'); + INSERT INTO FactTable VALUES ('2', 'Bill', 'Clinton'); + INSERT INTO FactTable VALUES ('3', 'Bill', 'Clinton'); + INSERT INTO FactTable VALUES ('4', 'Steve', 'Ballmer'); + INSERT INTO FactTable VALUES ('5', 'Tim', 'Cook'); + + DROP TABLE IF EXISTS DimTable; + CREATE TABLE DimTable (Personal String, Family String, Alias String) ENGINE = Memory; + INSERT INTO DimTable VALUES ('Bill', 'Gates', 'billg'); + INSERT INTO DimTable VALUES ('Bill', 'Clinton', 'billc'); + INSERT INTO DimTable VALUES ('Steve', 'Ballmer', 'steveb'); + INSERT INTO DimTable VALUES ('Tim', 'Cook', 'timc'); + + FactTable | lookup kind=leftouter DimTable on Personal, Family + + FactTable | lookup kind=inner DimTable on Personal, Family + ``` + +## Bugs fixed + - [Incorrect Regx convertion] + - [KQL phase 2 - timespan calculation results in exception] + - [KQL phase 2 - format_timespan returns incorrect results] + - [Bin function should support time intervals less than 1 second] + - [KQL Phase 2: datetime subtraction results in exception] + - [Timespan() doesn't parse bareword arguments.] + - [KQL-phase2 distinct operator does not support alias] + +# November 7, 2022 +## Improvement +- [array_sort_asc](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysortascfunction) and [array_sort_desc](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysortdescfunction) + ``` + Returns the same number of arrays as in the input, with the first array sorted in ascending order, and the remaining arrays ordered to match the reordered first array. + + null will be returned for every array that differs in length from the first one. + ``` + Becasue array in ClickHouse is not nullable, so an array with a single NULL ( `[NULL]`) is returned instead of a null if array that differs in length from the first one: + ``` + array_sort_asc(dynamic([2, 1, 3]), dynamic([20, 40, 30]), dynamic([100, 200])) -> [1,2,3,NULL],[10,20,30,40],[NULL] + ``` + the result can be used as a condition + ``` + DROP TABLE IF EXISTS visit; + CREATE TABLE visit(pageid UInt8, ip_country Array(Nullable(String)), hit Array(Int64),duration Array(Int64)) ENGINE = Memory; + INSERT INTO visit VALUES (1,['CA', 'US','FR','Eng'], [11,16,12,20],[100,500,300,200]); + INSERT INTO visit VALUES (2,['Japan', 'Gem','FR','Eng'], [31,22,33,10],[510,410,310,210]); + INSERT INTO visit VALUES (3,['CA', 'Gem','Japan','Eng'], [25,10,23,11],[120,110,130]); + INSERT INTO visit VALUES (4,['CA', 'Gem',null,'Eng'], [5,10,3,2],[220,320,310,150]); + INSERT INTO visit VALUES (5,['FR', null,'US','Eng'], [16,12,23,10],[210,250,110,260]); + + visit | project *, array_sort_asc(ip_country, hit, duration) + ┌─pageid─┬─ip_country─────────────────┬─hit───────────┬─duration──────────┬─kql_array_sort_asc(ip_country, hit, duration)────────────────┐ + │ 2 │ ['Japan','Gem','FR','Eng'] │ [31,22,33,10] │ [510,410,310,210] │ (['Eng','FR','Gem','Japan'],[10,33,22,31],[210,310,410,510]) │ + └────────┴────────────────────────────┴───────────────┴───────────────────┴──────────────────────────────────────────────────────────────┘ + ┌─pageid─┬─ip_country─────────────┬─hit───────────┬─duration──────────┬─kql_array_sort_asc(ip_country, hit, duration)────────────┐ + │ 1 │ ['CA','US','FR','Eng'] │ [11,16,12,20] │ [100,500,300,200] │ (['CA','Eng','FR','US'],[11,20,12,16],[100,200,300,500]) │ + └────────┴────────────────────────┴───────────────┴───────────────────┴──────────────────────────────────────────────────────────┘ + ┌─pageid─┬─ip_country──────────────┬─hit────────┬─duration──────────┬─kql_array_sort_asc(ip_country, hit, duration)──────────┐ + │ 4 │ ['CA','Gem',NULL,'Eng'] │ [5,10,3,2] │ [220,320,310,150] │ (['CA','Eng','Gem',NULL],[5,2,10,3],[220,150,320,310]) │ + └────────┴─────────────────────────┴────────────┴───────────────────┴────────────────────────────────────────────────────────┘ + ┌─pageid─┬─ip_country─────────────────┬─hit───────────┬─duration──────┬─kql_array_sort_asc(ip_country, hit, duration)─────┐ + │ 3 │ ['CA','Gem','Japan','Eng'] │ [25,10,23,11] │ [120,110,130] │ (['CA','Eng','Gem','Japan'],[25,11,10,23],[NULL]) │ + └────────┴────────────────────────────┴───────────────┴───────────────┴───────────────────────────────────────────────────┘ + ┌─pageid─┬─ip_country─────────────┬─hit───────────┬─duration──────────┬─kql_array_sort_asc(ip_country, hit, duration)────────────┐ + │ 5 │ ['FR',NULL,'US','Eng'] │ [16,12,23,10] │ [210,250,110,260] │ (['Eng','FR','US',NULL],[10,16,23,12],[260,210,110,250]) │ + └────────┴────────────────────────┴───────────────┴───────────────────┴──────────────────────────────────────────────────────────┘ + + visit | where isnull((array_sort_asc(ip_country, hit, duration))[2][0]) + ┌─pageid─┬─ip_country─────────────────┬─hit───────────┬─duration──────┐ + │ 3 │ ['CA','Gem','Japan','Eng'] │ [25,10,23,11] │ [120,110,130] │ + └────────┴────────────────────────────┴───────────────┴───────────────┘ + ``` + + the following behavious are same as Azure Data Explorer + if no alias specified, the functions return a single tuple includes arrays. can use array sbscripon to access the element inside. for exapmple: + ``` + print array_sort_asc(dynamic([2, 1, 3]), dynamic([20, 40, 30]), dynamic([100, 200]))[0] -> [1,2,3] + ``` + if a signle alias is used the firt array as an column is returned : + ``` + print t = array_sort_asc(dynamic([2, 1, 3]), dynamic([20, 40, 30]), dynamic([100, 200])) + ┌─t───────┐ + │ [1,2,3] │ + └─────────┘ + ``` + if a n aliasies are used the first n arrays as columns are returned : + ``` + print 5, (t,w) = array_sort_asc(dynamic([2, 1, 3]), dynamic([20, 40, 30]), dynamic([100, 200])) + ┌─5─┬─t───────┬─w──────────┐ + │ 5 │ [1,2,3] │ [40,20,30] │ + └───┴─────────┴────────────┘ + ``` +## New Functions +- [case](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/casefunction) + `Customers | extend t = case(Age <= 10, "A", Age <= 20, "B", Age <= 30, "C", "D");` +## Bug fixed +- [summarize crash if aggregation function is missing] + ``` + fixed with throw exception: + + Exception on client: + Code: 62. DB::Exception: Syntax error near keyword "by". (SYNTAX_ERROR) + ``` +- [make_datetime creates wrong date time] + +- [todecimal() doesn't work with column arguments] + + + +# October 25, 2022 +## New Operators +- [count](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/countoperator) +`Customers | count;` +`Customers | where Age< 30 | count;` +`Customers | where Age< 30 | limit 2 | count;` +`Customers | where Age< 30 | limit 2 | count | project Count;` + +- [top](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/topoperator) +`Customers | top 3 by Age;` +`Customers | top 3 by Age desc;` +`Customers | top 3 by Age asc | order by FirstName;` +`Customers | top 3 by FirstName desc nulls first;` +`Customers | top 3 by FirstName desc nulls last;` +`Customers | top 3 by Age | top 2 by FirstName;` + +- [top-hitters](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/tophittersoperator) +`Customers | top-hitters a = 2 of Age by extra;` +`Customers | top-hitters 2 of Age;` +`Customers | top-hitters 2 of Age by extra | top-hitters 2 of Age | order by Age;` +`Customers | top-hitters 2 of Age by extra | where Age > 30;` +`Customers | top-hitters 2 of Age by extra | where approximate_sum_extra < 200;` +`Customers | top-hitters 2 of Age | where approximate_count_Age > 2;` + +## Bugs fixed +- [parse_version needs to return null when parameter is empty string] +- [Different expressions with the same alias in function substring] +- [parse_version needs to return null when parameter is empty string] +- [parse_url() output mismatch for empty string] +- [array_sum and array_length return incorrect results] + +# October 9, 2022 + +## operator +- [distinct](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/distinctoperator) + `Customers | distinct *` + `Customers | distinct Occupation` + `Customers | distinct Occupation, Education` + `Customers | where Age <30 | distinct Occupation, Education` + `Customers | where Age <30 | order by Age| distinct Occupation, Education` + +## String functions +- [reverse](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/reversefunction) + `print reverse(123)` + `print reverse(123.34)` + `print reverse('clickhouse')` + `print reverse(3h)` + `print reverse(datetime(2017-1-1 12:23:34))` + +- [parse_command_line](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-command-line) + `print parse_command_line('echo \"hello world!\" print$?', \"Windows\")` + +- [parse_csv](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parsecsvfunction) + `print result=parse_csv('aa,b,cc')` + `print result_multi_record=parse_csv('record1,a,b,c\nrecord2,x,y,z')` + +- [parse_json](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parsejsonfunction) + `print parse_json( dynamic([1, 2, 3]))` + `print parse_json('{"a":123.5, "b":"{\\"c\\":456}"}')` + +- [extract_json](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction) + `print extract_json( "$.a" , '{"a":123, "b":"{\\"c\\":456}"}' , typeof(int))` + +- [parse_version](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-versionfunction) + `print parse_version('1')` + `print parse_version('1.2.3.40')` + +## Bug fixed +- [correct array index in expression] + array index should start with 0 +- [Summarize should generate alias or use correct columns] + - if bin is used , the column should be in select list if no alias include + - if no column included in aggregate functions, ( like count() ), should has alias with fun name + '_',e.g count_ + - if column name included in aggregate functions, should have fun name + "_" + column name , like count(Age) -> count_Age + - if argument of an aggregate functions is an exprision, Columns1 ... Columnsn should be used as alias + ``` + Customers | summarize count() by bin(Age, 10) + ┌─Age─┬─count_─┐ + │ 40 │ 2 │ + │ 20 │ 6 │ + │ 30 │ 4 │ + └─────┴────────┘ + Customers | summarize count(Age) by bin(Age, 10) + ┌─Age─┬─count_Age─┐ + │ 40 │ 2 │ + │ 20 │ 6 │ + │ 30 │ 4 │ + └─────┴───────────┘ + Customers | summarize count(Age+1) by bin(Age+1, 10) + ┌─Columns1─┬─count_─┐ + │ 40 │ 2 │ + │ 20 │ 6 │ + │ 30 │ 4 │ + └──────────┴────────┘ + ``` +- [extend doesn't replace existing columns] + +- [throw exception if use quoted string as alias] + +- [repeat() doesn't work with count argument as negative value] + +- [substring() doesn't work right with negative offsets] +- [endofmonth() doesn't return correct result] + +- [split() outputs array instead of string] + +- [split() returns empty string when arg goes out of bound] + +- [split() doesn't work with negative index] + + +# September 26, 2022 +## Bug fixed : +["select * from kql" results in syntax error] +[Parsing ipv4 with arrayStringConcat throws exception] +[CH Client crashes on invalid function name] +[extract() doesn't work right with 4th argument i.e typeof()] +[parse_ipv6_mask return incorrect results] +[timespan returns wrong output in seconds] +[timespan doesn't work for nanoseconds and tick] +[totimespan() doesn't work for nanoseconds and tick timespan unit] +[data types should throw exception in certain cases] +[decimal does not support scientific notation] +[extend statement causes client core dumping] +[extend crashes with array sorting] +[Core dump happens when WHERE keyword doesn't follow field name] +[Null values are missing in the result of `make_list_with_nulls'] +[trim functions use non-unique aliases] +[format_ipv4_mask returns incorrect mask value] + +# September 12, 2022 +## Extend operator +https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extendoperator +`T | extend T | extend duration = endTime - startTime` +`T | project endTime, startTime | extend duration = endTime - startTime` +## Array functions +- [array_reverse](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array-reverse-function) + `print array_reverse(dynamic(["this", "is", "an", "example"])) == dynamic(["example","an","is","this"])` + +- [array_rotate_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array_rotate_leftfunction) + `print array_rotate_left(dynamic([1,2,3,4,5]), 2) == dynamic([3,4,5,1,2])` + `print array_rotate_left(dynamic([1,2,3,4,5]), -2) == dynamic([4,5,1,2,3])` + +- [array_rotate_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array_rotate_rightfunction) + `print array_rotate_right(dynamic([1,2,3,4,5]), -2) == dynamic([3,4,5,1,2])` + `print array_rotate_right(dynamic([1,2,3,4,5]), 2) == dynamic([4,5,1,2,3])` + +- [array_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array_shift_leftfunction) + `print array_shift_left(dynamic([1,2,3,4,5]), 2) == dynamic([3,4,5,null,null])` + `print array_shift_left(dynamic([1,2,3,4,5]), -2) == dynamic([null,null,1,2,3])` + `print array_shift_left(dynamic([1,2,3,4,5]), 2, -1) == dynamic([3,4,5,-1,-1])` + `print array_shift_left(dynamic(['a', 'b', 'c']), 2) == dynamic(['c','',''])` + +- [array_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array_shift_rightfunction) + `print array_shift_right(dynamic([1,2,3,4,5]), -2) == dynamic([3,4,5,null,null])` + `print array_shift_right(dynamic([1,2,3,4,5]), 2) == dynamic([null,null,1,2,3])` + `print array_shift_right(dynamic([1,2,3,4,5]), -2, -1) == dynamic([3,4,5,-1,-1])` + `print array_shift_right(dynamic(['a', 'b', 'c']), -2) == dynamic(['c','',''])` + +- [pack_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/packarrayfunction) + `print x = 1, y = x * 2, z = y * 2, pack_array(x,y,z)` + + Please note that only arrays of elements of the same type may be created at this time. The underlying reasons are explained under the release note section of the `dynamic` data type. + +- [repeat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/repeatfunction) + `print repeat(1, 0) == dynamic([])` + `print repeat(1, 3) == dynamic([1, 1, 1])` + `print repeat("asd", 3) == dynamic(['asd', 'asd', 'asd'])` + `print repeat(timespan(1d), 3) == dynamic([86400, 86400, 86400])` + `print repeat(true, 3) == dynamic([true, true, true])` + +- [zip](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/zipfunction) + `print zip(dynamic([1,3,5]), dynamic([2,4,6]))` + + Please note that only arrays of the same type are supported in our current implementation. The underlying reasons are explained under the release note section of the `dynamic` data type. + +## Data types + - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) + `print isnull(dynamic(null))` + `print dynamic(1) == 1` + `print dynamic(timespan(1d)) == 86400` + `print dynamic([1, 2, 3])` + `print dynamic([[1], [2], [3]])` + `print dynamic(['a', "b", 'c'])` + + According to the KQL specifications `dynamic` is a literal, which means that no function calls are permitted. Expressions producing literals such as `datetime` and `timespan` and their aliases (ie. `date` and `time`, respectively) along with nested `dynamic` literals are allowed. + + Please note that our current implementation supports only scalars and arrays made up of elements of the same type. Support for mixed types and property bags is deferred for now, based on our understanding of the required effort and discussion with representatives of the QRadar team. + +## Mathematical functions + - [isnan](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/isnanfunction) + `print isnan(double(nan)) == true` + `print isnan(4.2) == false` + `print isnan(4) == false` + `print isnan(real(+inf)) == false` + +## Set functions +Please note that functions returning arrays with set semantics may return them in any particular order, which may be subject to change in the future. + + - [jaccard_index](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/jaccard-index-function) + `print jaccard_index(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3, 4, 4, 4])) == 0.75` + `print jaccard_index(dynamic([1, 2, 3]), dynamic([])) == 0` + `print jaccard_index(dynamic([]), dynamic([1, 2, 3, 4])) == 0` + `print isnan(jaccard_index(dynamic([]), dynamic([])))` + `print jaccard_index(dynamic([1, 2, 3]), dynamic([4, 5, 6, 7])) == 0` + `print jaccard_index(dynamic(['a', 's', 'd']), dynamic(['f', 'd', 's', 'a'])) == 0.75` + `print jaccard_index(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])) == 0.25` + + - [set_difference](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/setdifferencefunction) + `print set_difference(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])) == dynamic([])` + `print array_sort_asc(set_difference(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[1] == dynamic([4, 5, 6])` + `print set_difference(dynamic([4]), dynamic([1, 2, 3])) == dynamic([4])` + `print array_sort_asc(set_difference(dynamic([1, 2, 3, 4, 5]), dynamic([5]), dynamic([2, 4])))[1] == dynamic([1, 3])` + `print array_sort_asc(set_difference(dynamic([1, 2, 3]), dynamic([])))[1] == dynamic([1, 2, 3])` + `print array_sort_asc(set_difference(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[1] == dynamic(['d', 's'])` + `print array_sort_asc(set_difference(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[1] == dynamic(['Chewbacca', 'Han Solo'])` + + - [set_has_element](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/sethaselementfunction) + `print set_has_element(dynamic(["this", "is", "an", "example"]), "example") == true` + `print set_has_element(dynamic(["this", "is", "an", "example"]), "examplee") == false` + `print set_has_element(dynamic([1, 2, 3]), 2) == true` + `print set_has_element(dynamic([1, 2, 3, 4.2]), 4) == false` + + - [set_intersect](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/setintersectfunction) + `print array_sort_asc(set_intersect(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3])` + `print array_sort_asc(set_intersect(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3])` + `print set_intersect(dynamic([4]), dynamic([1, 2, 3])) == dynamic([])` + `print set_intersect(dynamic([1, 2, 3, 4, 5]), dynamic([1, 3, 5]), dynamic([2, 5])) == dynamic([5])` + `print set_intersect(dynamic([1, 2, 3]), dynamic([])) == dynamic([])` + `print set_intersect(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])) == dynamic(['a'])` + `print set_intersect(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])) == dynamic(['Darth Vader'])` + + - [set_union](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/setunionfunction) + `print array_sort_asc(set_union(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3])` + `print array_sort_asc(set_union(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3, 4, 5, 6])` + `print array_sort_asc(set_union(dynamic([4]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3, 4])` + `print array_sort_asc(set_union(dynamic([1, 3, 4]), dynamic([5]), dynamic([2, 4])))[1] == dynamic([1, 2, 3, 4, 5])` + `print array_sort_asc(set_union(dynamic([1, 2, 3]), dynamic([])))[1] == dynamic([1, 2, 3])` + `print array_sort_asc(set_union(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[1] == dynamic(['a', 'd', 'f', 's'])` + `print array_sort_asc(set_union(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[1] == dynamic(['Chewbacca', 'Darth Sidious', 'Darth Vader', 'Han Solo'])` + +# August 29, 2022 + +## **mv-expand operator** +https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/mvexpandoperator +Note: **expand on array columns only** +- test cases + ``` + CREATE TABLE T + ( + a UInt8, + b Array(String), + c Array(Int8), + d Array(Int8) + ) ENGINE = Memory; + + INSERT INTO T VALUES (1, ['Salmon', 'Steak','Chicken'],[1,2,3,4],[5,6,7,8]) + + T | mv-expand c + T | mv-expand c, d + T | mv-expand b | mv-expand c + T | mv-expand c to typeof(bool) + T | mv-expand with_itemindex=index b, c, d + T | mv-expand array_concat(c,d) + T | mv-expand x = c, y = d + T | mv-expand xy = array_concat(c, d) + T | mv-expand with_itemindex=index c,d to typeof(bool) + ``` + +## **make-series operator** +https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-seriesoperator + +- test case make-series on datetime column + ``` + CREATE TABLE T + ( + Supplier Nullable(String), + Fruit String , + Price Float64, + Purchase Date + ) ENGINE = Memory; + + INSERT INTO T VALUES ('Aldi','Apple',4,'2016-09-10'); + INSERT INTO T VALUES ('Costco','Apple',2,'2016-09-11'); + INSERT INTO T VALUES ('Aldi','Apple',6,'2016-09-10'); + INSERT INTO T VALUES ('Costco','Snargaluff',100,'2016-09-12'); + INSERT INTO T VALUES ('Aldi','Apple',7,'2016-09-12'); + INSERT INTO T VALUES ('Aldi','Snargaluff',400,'2016-09-11'); + INSERT INTO T VALUES ('Costco','Snargaluff',104,'2016-09-12'); + INSERT INTO T VALUES ('Aldi','Apple',5,'2016-09-12'); + INSERT INTO T VALUES ('Aldi','Snargaluff',600,'2016-09-11'); + INSERT INTO T VALUES ('Costco','Snargaluff',200,'2016-09-10'); + ``` + Have from and to + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit + ``` + Has from , without to + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) step 1d by Supplier, Fruit + ``` + Without from , has to + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase to datetime(2016-09-13) step 1d by Supplier, Fruit + ``` + Without from , without to + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit + ``` + Without by clause + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d + ``` + Without aggregation alias + ``` + T | make-series avg(Price) default=0 on Purchase step 1d by Supplier, Fruit + ``` + Has group expression alias + ``` + T | make-series avg(Price) default=0 on Purchase step 1d by Supplier_Name = Supplier, Fruit + ``` + Use different step value + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 3d by Supplier, Fruit + ``` +- test case make-series on numeric column + ``` + CREATE TABLE T2 + ( + Supplier Nullable(String), + Fruit String , + Price Int32, + Purchase Int32 + ) ENGINE = Memory; + + INSERT INTO T2 VALUES ('Aldi','Apple',4,10); + INSERT INTO T2 VALUES ('Costco','Apple',2,11); + INSERT INTO T2 VALUES ('Aldi','Apple',6,10); + INSERT INTO T2 VALUES ('Costco','Snargaluff',100,12); + INSERT INTO T2 VALUES ('Aldi','Apple',7,12); + INSERT INTO T2 VALUES ('Aldi','Snargaluff',400,11); + INSERT INTO T2 VALUES ('Costco','Snargaluff',104,12); + INSERT INTO T2 VALUES ('Aldi','Apple',5,12); + INSERT INTO T2 VALUES ('Aldi','Snargaluff',600,11); + INSERT INTO T2 VALUES ('Costco','Snargaluff',200,10); + ``` + Have from and to + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit; + ``` + Has from , without to + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 step 1.0 by Supplier, Fruit; + ``` + Without from , has to + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase to 18 step 4.0 by Supplier, Fruit; + ``` + Without from , without to + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0 by Supplier, Fruit; + ``` + Without by clause + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0; + ``` + +## Aggregate Functions +- [bin](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binfunction) + `print bin(4.5, 1)` + `print bin(time(16d), 7d)` + `print bin(datetime(1970-05-11 13:45:07), 1d)` +- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) + `Customers | summarize t = stdev(Age) by FirstName` + +- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction) + `Customers | summarize t = stdevif(Age, Age < 10) by FirstName` + +- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction) + `Customers | summarize t = binary_all_and(Age) by FirstName` + +- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction) + `Customers | summarize t = binary_all_or(Age) by FirstName` + +- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction) + `Customers | summarize t = binary_all_xor(Age) by FirstName` + +- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` + +- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` + +- [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize t = percentile(Age, 50) by FirstName` + +- [percentilew](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` + +## Dynamic functions +- [array_sort_asc](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysortascfunction) + **Only support the constant dynamic array.** + **Returns an array. So, each element of the input has to be of same datatype.** + `print t = array_sort_asc(dynamic([null, 'd', 'a', 'c', 'c']))` + `print t = array_sort_asc(dynamic([4, 1, 3, 2]))` + `print t = array_sort_asc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))` + `print t = array_sort_asc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world']))` + `print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , false)` + `print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2)` + `print t = array_sort_asc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]) , false)` + `print t = array_sort_asc( dynamic([null, null, null]) , false)` + `print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), 1 > 2)` + `print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30, 50, 3]), 1 > 2)` + +- [array_sort_desc](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysortdescfunction) **(only support the constant dynamic array)** + + `print t = array_sort_desc(dynamic([null, 'd', 'a', 'c', 'c']))` + `print t = array_sort_desc(dynamic([4, 1, 3, 2]))` + `print t = array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))` + `print t = array_sort_desc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world']))` + `print t = array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , false)` + `print t = array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2)` + `print t = array_sort_desc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]) , false)` + `print t = array_sort_desc( dynamic([null, null, null]) , false)` + `print t = array_sort_desc(dynamic([2, 1, null, 3]), dynamic([20, 10, 40, 30]), 1 > 2)` + `print t = array_sort_desc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50, 3]), 1 > 2)` + +- [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) + `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` + +- [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` + `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` + `print t = array_iif(dynamic([true, false, true, false, true]), dynamic([1.1, 2.2, 3.3, 4.4, 5.5]), 999.99);` + `print t = array_iif(dynamic([true, false, true, false, true]), 90, dynamic([1, 3]));` + +- [array_slice](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayslicefunction) + `print array_slice(dynamic([1,2,3]), 1, 2) == dynamic([2, 3])` + `print array_slice(dynamic([1,2,3,4,5]), 2, -1) == dynamic([3, 4, 5])` + `print array_slice(dynamic([1,2,3,4,5]), -3, -2) == dynamic([3, 4])` + +- [array_split](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysplitfunction) + `print array_split(dynamic([1,2,3,4,5]), 2) == dynamic([[1,2],[3,4,5]])` + `print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])) == dynamic([[1],[2,3],[4,5]])` + +## DateTimeFunctions + +- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) + `print ago(2h)` + +- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) + `print endofday(datetime(2017-01-01 10:10:17), -1)` + `print endofday(datetime(2017-01-01 10:10:17), 1)` + `print endofday(datetime(2017-01-01 10:10:17))` + +- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) + `print endofmonth(datetime(2017-01-01 10:10:17), -1)` + `print endofmonth(datetime(2017-01-01 10:10:17), 1)` + `print endofmonth(datetime(2017-01-01 10:10:17))` + +- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) + `print endofweek(datetime(2017-01-01 10:10:17), 1)` + `print endofweek(datetime(2017-01-01 10:10:17), -1)` + `print endofweek(datetime(2017-01-01 10:10:17))` + +- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) + `print endofyear(datetime(2017-01-01 10:10:17), -1)` + `print endofyear(datetime(2017-01-01 10:10:17), 1)` + `print endofyear(datetime(2017-01-01 10:10:17))` + +- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) + `print make_datetime(2017,10,01)` + `print make_datetime(2017,10,01,12,10)` + `print make_datetime(2017,10,01,12,11,0.1234567)` + +- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) + `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` + `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` + `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` + +- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) + `print unixtime_microseconds_todatetime(1546300800000000)` + +- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) + `print unixtime_milliseconds_todatetime(1546300800000)` + +- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) + `print unixtime_nanoseconds_todatetime(1546300800000000000)` + +- [datetime_part](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-partfunction) + `print datetime_part('day', datetime(2017-10-30 01:02:03.7654321))` + +- [datetime_add](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-addfunction) + `print datetime_add('day',1,datetime(2017-10-30 01:02:03.7654321))` + +- [format_timespan](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-timespanfunction) + `print format_timespan(time(1d), 'd-[hh:mm:ss]')` + `print format_timespan(time('12:30:55.123'), 'ddddd-[hh:mm:ss.ffff]')` + +- [format_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-datetimefunction) + `print format_datetime(todatetime('2009-06-15T13:45:30.6175425'), 'yy-M-dd [H:mm:ss.fff]')` + `print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s tt')` + +- [todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todatetimefunction) + `print todatetime('2014-05-25T08:20:03.123456Z')` + `print todatetime('2014-05-25 20:03.123')` + +- [totimespan] (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/totimespanfunction) + ` print totimespan('0.01:34:23')` + `print totimespan(1d)` + +# August 15, 2022 + **double quote support** + ``print res = strcat("double ","quote")`` +## Aggregate functions + - [bin_at](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binatfunction) + `print res = bin_at(6.5, 2.5, 7)` + `print res = bin_at(1h, 1d, 12h)` + `print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))` + `print res = bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0))` + + - [array_index_of](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayindexoffunction) + *Supports only basic lookup. Do not support start_index, length and occurrence* + `print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')` + `print output = array_index_of(dynamic([1, 2, 3]), 2)` + - [array_sum](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array-sum-function) + `print output = array_sum(dynamic([2, 5, 3]))` + `print output = array_sum(dynamic([2.5, 5.5, 3]))` + - [array_length](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraylengthfunction) + `print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))` + `print output = array_length(dynamic([1, 2, 3]))` + +## Conversion +- [tobool / toboolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/toboolfunction) + `print tobool(true) == true` + `print toboolean(false) == false` + `print tobool(0) == false` + `print toboolean(19819823) == true` + `print tobool(-2) == true` + `print isnull(toboolean('a'))` + `print tobool('true') == true` + `print toboolean('false') == false` + +- [todouble / toreal](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todoublefunction) + `print todouble(4) == 4` + `print toreal(4.2) == 4.2` + `print isnull(todouble('a'))` + `print toreal('-0.3') == -0.3` + +- [toint](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tointfunction) + `print isnull(toint('a'))` + `print toint(4) == 4` + `print toint('4') == 4` + `print isnull(toint(4.2))` + +- [tostring](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tostringfunction) + `print tostring(123) == '123'` + `print tostring('asd') == 'asd'` + +## Data Types + - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) + *Supports only 1D array* + `print output = dynamic(['a', 'b', 'c'])` + `print output = dynamic([1, 2, 3])` + +- [bool,boolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/bool) + `print bool(1)` + `print boolean(0)` + +- [datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/datetime) + `print datetime(2015-12-31 23:59:59.9)` + `print datetime('2015-12-31 23:59:59.9')` + `print datetime("2015-12-31:)` + +- [guid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/guid) + `print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)` + `print guid('74be27de-1e4e-49d9-b579-fe0b331d3642')` + `print guid('74be27de1e4e49d9b579fe0b331d3642')` + +- [int](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/int) + `print int(1)` + +- [long](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/long) + `print long(16)` + +- [real](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/real) + `print real(1)` + +- [timespan ,time](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/timespan) + **Note** the timespan is used for calculating datatime, so the output is in seconds. e.g. time(1h) = 3600 + `print 1d` + `print 30m` + `print time('0.12:34:56.7')` + `print time(2h)` + `print timespan(2h)` + + +## StringFunctions + +- [base64_encode_fromguid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64-encode-fromguid-function) +`print Quine = base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')` +- [base64_decode_toarray](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64_decode_toarrayfunction) +`print base64_decode_toarray('S3VzdG8=')` +- [base64_decode_toguid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64-decode-toguid-function) +`print base64_decode_toguid('YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi')` +- [replace_regex](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/replace-regex-function) +`print replace_regex('Hello, World!', '.', '\\0\\0')` +- [has_any_index](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-any-index-function) +`print idx = has_any_index('this is an example', dynamic(['this', 'example']))` +- [translate](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/translatefunction) +`print translate('krasp', 'otsku', 'spark')` +- [trim](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimfunction) +`print trim('--', '--https://bing.com--')` +- [trim_end](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimendfunction) +`print trim_end('.com', 'bing.com')` +- [trim_start](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimstartfunction) +`print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))` + +## DateTimeFunctions +- [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) + `print startofyear(datetime(2017-01-01 10:10:17), -1)` + `print startofyear(datetime(2017-01-01 10:10:17), 0)` + `print startofyear(datetime(2017-01-01 10:10:17), 1)` +- [weekofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/weekofyearfunction) + `print week_of_year(datetime(2020-12-31))` + `print week_of_year(datetime(2020-06-15))` + `print week_of_year(datetime(1970-01-01))` + `print week_of_year(datetime(2000-01-01))` + +- [startofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofweekfunction) + `print startofweek(datetime(2017-01-01 10:10:17), -1)` + `print startofweek(datetime(2017-01-01 10:10:17), 0)` + `print startofweek(datetime(2017-01-01 10:10:17), 1)` + +- [startofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofmonthfunction) + `print startofmonth(datetime(2017-01-01 10:10:17), -1)` + `print startofmonth(datetime(2017-01-01 10:10:17), 0)` + `print startofmonth(datetime(2017-01-01 10:10:17), 1)` + +- [startofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofdayfunction) + `print startofday(datetime(2017-01-01 10:10:17), -1)` + `print startofday(datetime(2017-01-01 10:10:17), 0)` + `print startofday(datetime(2017-01-01 10:10:17), 1)` + +- [monthofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/monthofyearfunction) + `print monthofyear(datetime("2015-12-14"))` + +- [hourofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/hourofdayfunction) + `print hourofday(datetime(2015-12-14 18:54:00))` + +- [getyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getyearfunction) + `print getyear(datetime(2015-10-12))` + +- [getmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getmonthfunction) + `print getmonth(datetime(2015-10-12))` + +- [dayofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofyearfunction) + `print dayofyear(datetime(2015-12-14))` + +- [dayofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofmonthfunction) + `print (datetime(2015-12-14))` + +- [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) + `print unixtime_seconds_todatetime(1546300800)` + +- [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) + `print dayofweek(datetime(2015-12-20))` + +- [now](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/nowfunction) + `print now()` + `print now(2d)` + `print now(-2h)` + `print now(5microseconds)` + `print now(5seconds)` + `print now(6minutes)` + `print now(-2d) ` + `print now(time(1d))` + + +## Binary functions +- [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) + `print binary_and(15, 3) == 3` + `print binary_and(1, 2) == 0` +- [binary_not](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-notfunction) + `print binary_not(1) == -2` +- [binary_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-orfunction) + `print binary_or(3, 8) == 11` + `print binary_or(1, 2) == 3` +- [binary_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-leftfunction) + `print binary_shift_left(1, 1) == 2` + `print binary_shift_left(1, 64) == 1` +- [binary_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-rightfunction) + `print binary_shift_right(1, 1) == 0` + `print binary_shift_right(1, 64) == 1` +- [binary_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-xorfunction) + `print binary_xor(1, 3) == 2` +- [bitset_count_ones](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/bitset-count-onesfunction) + `print bitset_count_ones(42) == 3` + +## IP functions +- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) + `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` + `print format_ipv4(3232236031, 24) == '192.168.1.0'` +- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) + `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` + `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` +- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) + `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` + `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` +- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) + `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` + `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` +- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) + `print ipv6_compare('::ffff:7f00:1', '127.0.0.1') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') < 0` + `print ipv6_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == 0` +- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) + `print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false` + `print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true` +- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) + `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` + `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` + `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` + `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` +- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6-maskfunction) + `print parse_ipv6_mask('127.0.0.1', 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'` + `print parse_ipv6_mask('fe80::85d:e82c:9446:7994', 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'` + +# August 1, 2022 + +**The config setting to allow modify dialect setting**. + - Set dialect setting in server configuration XML at user level(` users.xml `). This sets the ` dialect ` at server startup and CH will do query parsing for all users with ` default ` profile acording to dialect value. + + For example: + ` + + + random + kusto_auto + ` + + - Query can be executed with HTTP client as below once dialect is set in users.xml + ` echo "KQL query" | curl -sS "http://localhost:8123/?" --data-binary @- ` + + - To execute the query using clickhouse-client , Update clickhouse-client.xml as below and connect clickhouse-client with --config-file option (` clickhouse-client --config-file= `) + + ` + kusto_auto + ` + + OR + pass dialect setting with '--'. For example : + ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` + +- **strcmp** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcmpfunction) + `print strcmp('abc','ABC')` + +- **parse_url** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parseurlfunction) + `print Result = parse_url('scheme://username:password@www.google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')` + +- **parse_urlquery** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parseurlqueryfunction) + `print Result = parse_urlquery('k1=v1&k2=v2&k3=v3')` + +- **print operator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/printoperator) + `print x=1, s=strcat('Hello', ', ', 'World!')` + +- **Aggregate Functions:** + - [make_list()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makelist-aggfunction) + `Customers | summarize t = make_list(FirstName) by FirstName` + `Customers | summarize t = make_list(FirstName, 10) by FirstName` + - [make_list_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makelistif-aggfunction) + `Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName` + - [make_list_with_nulls()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-list-with-nulls-aggfunction) + `Customers | summarize t = make_list_with_nulls(Age) by FirstName` + - [make_set()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makeset-aggfunction) + `Customers | summarize t = make_set(FirstName) by FirstName` + `Customers | summarize t = make_set(FirstName, 10) by FirstName` + - [make_set_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makesetif-aggfunction) + `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` + +## IP functions + +- **The following functions now support arbitrary expressions as their argument:** + - [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) + - [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) + - [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) + +# July 17, 2022 + +## Renamed dialect from sql_dialect to dialect + +`set dialect='clickhouse'` +`set dialect='kusto'` +`set dialect='kusto_auto'` + +## IP functions +- [parse_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4function) + `"Customers | project parse_ipv4('127.0.0.1')"` +- [parse_ipv6](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6function) + `"Customers | project parse_ipv6('127.0.0.1')"` + +Please note that the functions listed below only take constant parameters for now. Further improvement is to be expected to support expressions. + +- [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) + `"Customers | project ipv4_is_private('192.168.1.6/24')"` + `"Customers | project ipv4_is_private('192.168.1.6')"` +- [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) + `"Customers | project ipv4_is_in_range('127.0.0.1', '127.0.0.1')"` + `"Customers | project ipv4_is_in_range('192.168.1.6', '192.168.1.1/24')"` +- [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) + `"Customers | project ipv4_netmask_suffix('192.168.1.1/24')"` + `"Customers | project ipv4_netmask_suffix('192.168.1.1')"` + +## string functions +- **support subquery for `in` orerator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) + (subquery need to be wraped with bracket inside bracket) + + `Customers | where Age in ((Customers|project Age|where Age < 30))` + Note: case-insensitive not supported yet +- **has_all** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator) + `Customers|where Occupation has_any ('Skilled','abcd')` + note : subquery not supported yet +- **has _any** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator) + `Customers|where Occupation has_all ('Skilled','abcd')` + note : subquery not supported yet +- **countof** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) + `Customers | project countof('The cat sat on the mat', 'at')` + `Customers | project countof('The cat sat on the mat', 'at', 'normal')` + `Customers | project countof('The cat sat on the mat', 'at', 'regex')` +- **extract** ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real))` + +- **extract_all** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction) + + `Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20')` + note: captureGroups not supported yet + +- **split** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) + `Customers | project split('aa_bb', '_')` + `Customers | project split('aaa_bbb_ccc', '_', 1)` + `Customers | project split('', '_')` + `Customers | project split('a__b', '_')` + `Customers | project split('aabbcc', 'bb')` + +- **strcat_delim** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction) + `Customers | project strcat_delim('-', '1', '2', 'A') , 1s)` + `Customers | project strcat_delim('-', '1', '2', strcat('A','b'))` + note: only support string now. + +- **indexof** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction) + `Customers | project indexof('abcdefg','cde')` + `Customers | project indexof('abcdefg','cde',2)` + `Customers | project indexof('abcdefg','cde',6)` + note: length and occurrence not supported yet + + + + +# July 4, 2022 + +## sql_dialect + +- default is `clickhouse` + `set sql_dialect='clickhouse'` +- only process kql + `set sql_dialect='kusto'` +- process both kql and CH sql + `set sql_dialect='kusto_auto'` +## KQL() function + + - create table + `CREATE TABLE kql_table4 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName,Age);` + verify the content of `kql_table` + `select * from kql_table` + + - insert into table + create a tmp table: + ``` + CREATE TABLE temp + ( + FirstName Nullable(String), + LastName String, + Age Nullable(UInt8) + ) ENGINE = Memory; + ``` + `INSERT INTO temp select * from kql(Customers|project FirstName,LastName,Age);` + verify the content of `temp` + `select * from temp` + + - Select from kql() + `Select * from kql(Customers|project FirstName)` + +## KQL operators: + - Tabular expression statements + `Customers` + - Select Column + `Customers | project FirstName,LastName,Occupation` + - Limit returned results + `Customers | project FirstName,LastName,Occupation | take 1 | take 3` + - sort, order + `Customers | order by Age desc , FirstName asc` + - Filter + `Customers | where Occupation == 'Skilled Manual'` + - summarize + `Customers |summarize max(Age) by Occupation` + +## KQL string operators and functions + - contains + `Customers |where Education contains 'degree'` + - !contains + `Customers |where Education !contains 'degree'` + - contains_cs + `Customers |where Education contains 'Degree'` + - !contains_cs + `Customers |where Education !contains 'Degree'` + - endswith + `Customers | where FirstName endswith 'RE'` + - !endswith + `Customers | where !FirstName endswith 'RE'` + - endswith_cs + `Customers | where FirstName endswith_cs 're'` + - !endswith_cs + `Customers | where FirstName !endswith_cs 're'` + - == + `Customers | where Occupation == 'Skilled Manual'` + - != + `Customers | where Occupation != 'Skilled Manual'` + - has + `Customers | where Occupation has 'skilled'` + - !has + `Customers | where Occupation !has 'skilled'` + - has_cs + `Customers | where Occupation has 'Skilled'` + - !has_cs + `Customers | where Occupation !has 'Skilled'` + - hasprefix + `Customers | where Occupation hasprefix_cs 'Ab'` + - !hasprefix + `Customers | where Occupation !hasprefix_cs 'Ab'` + - hasprefix_cs + `Customers | where Occupation hasprefix_cs 'ab'` + - !hasprefix_cs + `Customers | where Occupation! hasprefix_cs 'ab'` + - hassuffix + `Customers | where Occupation hassuffix 'Ent'` + - !hassuffix + `Customers | where Occupation !hassuffix 'Ent'` + - hassuffix_cs + `Customers | where Occupation hassuffix 'ent'` + - !hassuffix_cs + `Customers | where Occupation hassuffix 'ent'` + - in + `Customers |where Education in ('Bachelors','High School')` + - !in + `Customers | where Education !in ('Bachelors','High School')` + - matches regex + `Customers | where FirstName matches regex 'P.*r'` + - startswith + `Customers | where FirstName startswith 'pet'` + - !startswith + `Customers | where FirstName !startswith 'pet'` + - startswith_cs + `Customers | where FirstName startswith_cs 'pet'` + - !startswith_cs + `Customers | where FirstName !startswith_cs 'pet'` + + - base64_encode_tostring() + `Customers | project base64_encode_tostring('Kusto1') | take 1` + - base64_decode_tostring() + `Customers | project base64_decode_tostring('S3VzdG8x') | take 1` + - isempty() + `Customers | where isempty(LastName)` + - isnotempty() + `Customers | where isnotempty(LastName)` + - isnotnull() + `Customers | where isnotnull(FirstName)` + - isnull() + `Customers | where isnull(FirstName)` + - url_decode() + `Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1` + - url_encode() + `Customers | project url_encode('https://www.test.com/hello word') | take 1` + - substring() + `Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))` + - strcat() + `Customers | project name = strcat(FirstName, ' ', LastName)` + - strlen() + `Customers | project FirstName, strlen(FirstName)` + - strrep() + `Customers | project strrep(FirstName,2,'_')` + - toupper() + `Customers | project toupper(FirstName)` + - tolower() + `Customers | project tolower(FirstName)` + + ## Aggregate Functions + - arg_max() + - arg_min() + - avg() + - avgif() + - count() + - countif() + - max() + - maxif() + - min() + - minif() + - sum() + - sumif() + - dcount() + - dcountif() + - bin diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp new file mode 100644 index 000000000000..4998cf080962 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -0,0 +1,351 @@ +#include "KQLFunctionFactory.h" + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +namespace DB::ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int SYNTAX_ERROR; +extern const int UNKNOWN_FUNCTION; +} + +namespace +{ +constexpr DB::TokenType determineClosingPair(const DB::TokenType token_type) +{ + if (token_type == DB::TokenType::OpeningCurlyBrace) + return DB::TokenType::ClosingCurlyBrace; + else if (token_type == DB::TokenType::OpeningRoundBracket) + return DB::TokenType::ClosingRoundBracket; + else if (token_type == DB::TokenType::OpeningSquareBracket) + return DB::TokenType::ClosingSquareBracket; + + throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Unhandled token: {}", magic_enum::enum_name(token_type)); +} + +constexpr bool isClosingBracket(const DB::TokenType token_type) +{ + return token_type == DB::TokenType::ClosingCurlyBrace || token_type == DB::TokenType::ClosingRoundBracket + || token_type == DB::TokenType::ClosingSquareBracket; +} + +constexpr bool isOpeningBracket(const DB::TokenType token_type) +{ + return token_type == DB::TokenType::OpeningCurlyBrace || token_type == DB::TokenType::OpeningRoundBracket + || token_type == DB::TokenType::OpeningSquareBracket; +} +} + +namespace DB +{ +bool IParserKQLFunction::convert(String & out, IParser::Pos & pos) +{ + return wrapConvertImpl( + pos, + IncreaseDepthTag{}, + [&] + { + bool res = convertImpl(out, pos); + if (!res) + out = ""; + return res; + }); +} + +bool IParserKQLFunction::directMapping( + String & out, IParser::Pos & pos, const std::string_view ch_fn, const Interval & argument_count_interval) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + out.append(ch_fn.data(), ch_fn.length()); + out.push_back('('); + + int argument_count = 0; + const auto begin = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos != begin) + out.append(", "); + + if (const auto argument = getOptionalArgument(fn_name, pos)) + { + ++argument_count; + out.append(*argument); + } + + if (pos->type == TokenType::ClosingRoundBracket) + { + if (!argument_count_interval.IsWithinBounds(argument_count)) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "{}: between {} and {} arguments are expected, but {} were provided", + fn_name, + argument_count_interval.Min(), + argument_count_interval.Max(), + argument_count); + + out.push_back(')'); + return true; + } + } + + out.clear(); + pos = begin; + return false; +} + +String IParserKQLFunction::generateUniqueIdentifier() +{ + // This particular random generator hits each number exactly once before looping over. + // Because of this, it's sufficient for queries consisting of up to 2^16 (= 65536) distinct function calls. + // Reference: https://www.pcg-random.org/using-pcg-cpp.html#insecure-generators + static pcg16_once_insecure random_generator; + return std::to_string(random_generator()); +} + +String IParserKQLFunction::getArgument(const String & function_name, DB::IParser::Pos & pos, const ArgumentState argument_state) +{ + if (auto optionalArgument = getOptionalArgument(function_name, pos, argument_state)) + return std::move(*optionalArgument); + + throw Exception(std::format("Required argument was not provided in {}", function_name), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); +} + +std::vector IParserKQLFunction::getArguments(const String & function_name, DB::IParser::Pos & pos, const ArgumentState argument_state) +{ + std::vector arguments; + while (auto argument = getOptionalArgument(function_name, pos, argument_state)) + arguments.push_back(std::move(*argument)); + + return arguments; +} + +String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser::Pos & pos) +{ + int32_t round_bracket_count = 0, square_bracket_count = 0; + if (pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) + return {}; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Need more argument(s) in function: " + fn_name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + std::vector tokens; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++round_bracket_count; + if (pos->type == TokenType::ClosingRoundBracket) + --round_bracket_count; + + if (pos->type == TokenType::OpeningSquareBracket) + ++square_bracket_count; + if (pos->type == TokenType::ClosingSquareBracket) + --square_bracket_count; + + if (!KQLOperators().convert(tokens, pos)) + { + if (pos->type == TokenType::BareWord) + { + tokens.push_back(IParserKQLFunction::getExpression(pos)); + } + else if ( + pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket + || pos->type == TokenType::ClosingSquareBracket) + { + if (pos->type == TokenType::Comma) + break; + if (pos->type == TokenType::ClosingRoundBracket && round_bracket_count == -1) + break; + if (pos->type == TokenType::ClosingSquareBracket && square_bracket_count == 0) + break; + tokens.push_back(String(pos->begin, pos->end)); + } + else + { + String token; + if (pos->type == TokenType::QuotedIdentifier) + token = "'" + String(pos->begin + 1, pos->end - 1) + "'"; + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + String array_index; + while (!pos->isEnd() && pos->type != TokenType::ClosingSquareBracket) + { + array_index += getExpression(pos); + ++pos; + } + token = std::format("[ {0} >=0 ? {0} + 1 : {0}]", array_index); + } + else + token = String(pos->begin, pos->end); + + tokens.push_back(token); + } + } + + ++pos; + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) + { + if (pos->type == TokenType::Comma) + break; + if (pos->type == TokenType::ClosingRoundBracket && round_bracket_count == -1) + break; + if (pos->type == TokenType::ClosingSquareBracket && square_bracket_count == 0) + break; + } + } + + String converted_arg; + for (const auto & token : tokens) + converted_arg.append((converted_arg.empty() ? "" : " ") + token); + + return converted_arg; +} + +std::optional +IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, const ArgumentState argument_state) +{ + if (const auto type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket) + return {}; + + ++pos; + if (const auto type = pos->type; type == DB::TokenType::ClosingRoundBracket || type == DB::TokenType::ClosingSquareBracket) + return {}; + + if (argument_state == ArgumentState::Parsed) + return getConvertedArgument(function_name, pos); + + if (argument_state != ArgumentState::Raw) + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Argument extraction is not implemented for {}::{}", + magic_enum::enum_type_name(), + magic_enum::enum_name(argument_state)); + + const auto* begin = pos->begin; + std::stack scopes; + while (!pos->isEnd() && (!scopes.empty() || (pos->type != DB::TokenType::Comma && pos->type != DB::TokenType::ClosingRoundBracket))) + { + const auto token_type = pos->type; + if (isOpeningBracket(token_type)) + scopes.push(token_type); + else if (isClosingBracket(token_type)) + { + if (scopes.empty() || determineClosingPair(scopes.top()) != token_type) + throw Exception( + DB::ErrorCodes::SYNTAX_ERROR, "Unmatched token: {} when parsing {}", magic_enum::enum_name(token_type), function_name); + + scopes.pop(); + } + + ++pos; + } + + return std::string(begin, pos->begin); +} + +String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) +{ + String fn_name(pos->begin, pos->end); + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + --pos; + return ""; + } + return fn_name; +} + +String IParserKQLFunction::kqlCallToExpression( + const std::string_view function_name, const std::initializer_list params, const uint32_t max_depth) +{ + return kqlCallToExpression(function_name, std::span(params), max_depth); +} + +String IParserKQLFunction::kqlCallToExpression( + const std::string_view function_name, const std::span params, const uint32_t max_depth) +{ + const auto params_str = std::accumulate( + std::cbegin(params), + std::cend(params), + String(), + [](String acc, const std::string_view param) + { + if (!acc.empty()) + acc.append(", "); + + acc.append(param.data(), param.length()); + return acc; + }); + + const auto kql_call = std::format("{}({})", function_name, params_str); + DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); + DB::IParser::Pos tokens_pos(call_tokens, max_depth); + return DB::IParserKQLFunction::getExpression(tokens_pos); +} + +void IParserKQLFunction::validateEndOfFunction(const String & fn_name, IParser::Pos & pos) +{ + if (pos->type != TokenType::ClosingRoundBracket) + throw Exception("Too many arguments in function: " + fn_name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); +} + +String IParserKQLFunction::getExpression(IParser::Pos & pos) +{ + String arg(pos->begin, pos->end); + if (pos->type == TokenType::BareWord) + { + const auto fun = KQLFunctionFactory::get(arg); + if (String new_arg; fun && fun->convert(new_arg, pos)) + { + validateEndOfFunction(arg, pos); + arg = std::move(new_arg); + } + else + { + if (!fun) + { + ++pos; + if (pos->type == TokenType::OpeningRoundBracket) + { + if (Poco::toLower(arg) != "and" && Poco::toLower(arg) != "or") + throw Exception(arg + " is not a supported kusto function", ErrorCodes::UNKNOWN_FUNCTION); + } + --pos; + } + + if (std::optional ticks; ParserKQLTimespan::tryParse(extractTokenWithoutQuotes(pos), ticks) && ticks) + arg = kqlTicksToInterval(ticks); + } + } + else if (pos->type == TokenType::QuotedIdentifier) + arg = "'" + String(pos->begin + 1, pos->end - 1) + "'"; + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + String array_index; + while (!pos->isEnd() && pos->type != TokenType::ClosingSquareBracket) + { + array_index += getExpression(pos); + ++pos; + } + arg = std::format("[ {0} >=0 ? {0} + 1 : {0}]", array_index); + } + + return arg; +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h new file mode 100644 index 000000000000..f398ec8ea87b --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -0,0 +1,87 @@ +#pragma once + +#include + +#include + +namespace DB +{ +class Interval +{ +public: + using Representation = int; + + Interval(const Representation min_, const Representation max_) : max(max_), min(min_) { } + + Representation Max() const { return max; } + Representation Min() const { return min; } + bool IsWithinBounds(const Representation value) const { return min <= value && value <= max; } + + static constexpr auto max_bound = std::numeric_limits::max(); + static constexpr auto min_bound = std::numeric_limits::min(); + +private: + Representation max = max_bound; + Representation min = min_bound; +}; + +class IParserKQLFunction +{ +public: + enum class ArgumentState + { + Parsed, + Raw + }; + + template + ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, const F & func) + { + IParser::Pos begin = pos; + bool res = func(); + if (!res) + pos = begin; + return res; + } + + struct IncreaseDepthTag + { + }; + + template + ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, IncreaseDepthTag, const F & func) + { + IParser::Pos begin = pos; + pos.increaseDepth(); + bool res = func(); + pos.decreaseDepth(); + if (!res) + pos = begin; + return res; + } + + bool convert(String & out, IParser::Pos & pos); + virtual const char * getName() const = 0; + virtual ~IParserKQLFunction() = default; + + static String generateUniqueIdentifier(); + static String getArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed); + static std::vector + getArguments(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed); + static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); + static String getExpression(IParser::Pos & pos); + static String getKQLFunctionName(IParser::Pos & pos); + static std::optional + getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed); + static String + kqlCallToExpression(std::string_view function_name, std::initializer_list params, uint32_t max_depth); + static String kqlCallToExpression(std::string_view function_name, std::span params, uint32_t max_depth); + +protected: + virtual bool convertImpl(String & out, IParser::Pos & pos) = 0; + + static bool directMapping( + String & out, IParser::Pos & pos, std::string_view ch_fn, const Interval & argument_count_interval = {0, Interval::max_bound}); + static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos); +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp new file mode 100644 index 000000000000..512cd7f58cb3 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -0,0 +1,528 @@ +#include "KQLAggregationFunctions.h" + +#include + +#include + +namespace DB +{ + +bool ArgMax::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "argMax"); +} + +bool ArgMin::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "argMin"); +} + +bool Avg::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "avg"); +} + +bool AvgIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "avgIf"); +} + +bool BinaryAllAnd::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "groupBitAnd"); +} + +bool BinaryAllOr::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "groupBitOr"); +} + +bool BinaryAllXor::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "groupBitXor"); +} + +bool BuildSchema::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool Count::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "count"); +} + +bool CountIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "countIf"); +} + +bool DCount::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + String value = getConvertedArgument(fn_name, pos); + + out = "count ( DISTINCT " + value + " ) "; + return true; +} + +bool DCountIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + String value = getConvertedArgument(fn_name, pos); + ++pos; + String condition = getConvertedArgument(fn_name, pos); + out = "countIf ( DISTINCT " + value + " , " + condition + " ) "; + return true; +} + +bool MakeBag::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool MakeBagIf::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool MakeList::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name, pos); + out = "groupArrayIf(" + max_size + ")(" + expr + " , " + expr + " IS NOT NULL)"; + } + else + out = "groupArrayIf(" + expr + " , " + expr + " IS NOT NULL)"; + return true; +} + +bool MakeListIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + ++pos; + const auto predicate = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name, pos); + out = "groupArrayIf(" + max_size + ")(" + expr + " , " + predicate + " )"; + } + else + out = "groupArrayIf(" + expr + " , " + predicate + " )"; + return true; +} + +bool MakeListWithNulls::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto column_name = getConvertedArgument(fn_name, pos); + out = "arrayConcat(groupArray(" + column_name + "), arrayMap(x -> null, range(0, toUInt32(count(*)-length( groupArray(" + column_name + + ") )),1)))"; + return true; +} + +bool MakeSet::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name, pos); + out = "groupUniqArray(" + max_size + ")(" + expr + ")"; + } + else + out = "groupUniqArray(" + expr + ")"; + return true; +} + +bool MakeSetIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + ++pos; + const auto predicate = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name, pos); + out = "groupUniqArrayIf(" + max_size + ")(" + expr + " , " + predicate + " )"; + } + else + out = "groupUniqArrayIf(" + expr + " , " + predicate + " )"; + return true; +} + +bool Max::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "max"); +} + +bool MaxIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "maxIf"); +} + +bool Min::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "min"); +} + +bool MinIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "minIf"); +} + +bool Percentile::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name, pos); + trim(column_name); + + ++pos; + String value = getConvertedArgument(fn_name, pos); + trim(value); + + out = "quantile(" + value + "/100)(" + column_name + ")"; + return true; +} + +bool Percentilew::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name, pos); + trim(bucket_column); + + ++pos; + String frequency_column = getConvertedArgument(fn_name, pos); + trim(frequency_column); + + ++pos; + String value = getConvertedArgument(fn_name, pos); + trim(value); + + out = "quantileExactWeighted( " + value + "/100)(" + bucket_column + "," + frequency_column + ")"; + return true; +} + +bool Percentiles::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name, pos); + trim(column_name); + String expr = "quantiles("; + String value; + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::Comma) + { + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + expr += ", "; + } + else + ++pos; + } + out = expr + " )(" + column_name + ")"; + return true; +} + +bool PercentilesArray::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name, pos); + trim(column_name); + String expr = "quantiles("; + String value; + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" && pos->type != TokenType::OpeningRoundBracket + && pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket) + { + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + + if (pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket) + expr += ", "; + ++pos; + } + else + { + ++pos; + } + } + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + --pos; + + expr.pop_back(); + expr.pop_back(); + expr = expr + ")(" + column_name + ")"; + out = expr; + return true; +} + +bool Percentilesw::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name, pos); + trim(bucket_column); + + ++pos; + String frequency_column = getConvertedArgument(fn_name, pos); + trim(frequency_column); + + String expr = "quantilesExactWeighted( "; + String value; + + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::Comma) + { + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + expr += ", "; + } + else + ++pos; + } + expr = expr + ")(" + bucket_column + "," + frequency_column + ")"; + out = expr; + return true; +} + +bool PercentileswArray::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name, pos); + trim(bucket_column); + + ++pos; + String frequency_column = getConvertedArgument(fn_name, pos); + trim(frequency_column); + + String expr = "quantilesExactWeighted("; + String value; + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" && pos->type != TokenType::OpeningRoundBracket + && pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket) + { + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + + if (pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket) + expr += ", "; + ++pos; + } + else + { + ++pos; + } + } + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + --pos; + + expr.pop_back(); + expr.pop_back(); + expr = expr + ")(" + bucket_column + "," + frequency_column + ")"; + out = expr; + return true; +} + +bool Stdev::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + out = "sqrt(varSamp(" + expr + "))"; + return true; +} + +bool StdevIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const auto predicate = getConvertedArgument(fn_name, pos); + out = "sqrt(varSampIf(" + expr + ", " + predicate + "))"; + return true; +} + +bool Sum::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "sum"); +} + +bool SumIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "sumIf"); +} + +bool TakeAny::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + String expr; + String arg; + const auto begin = pos; + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos != begin) + expr.append(", "); + ++pos; + arg = getConvertedArgument(fn_name, pos); + expr = expr + "any(" + arg + ")"; + } + out = expr; + return true; +} + +bool TakeAnyIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const auto predicate = getConvertedArgument(fn_name, pos); + out = "anyIf(" + expr + ", " + predicate + ")"; + return true; +} + +bool Variance::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool VarianceIf::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool CountDistinct::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const String expr = getArgument(fn_name, pos); + out = std::format("count(DISTINCT {})", expr); + + return true; +} + + +bool CountDistinctIf::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const String expr = getArgument(fn_name, pos); + const String predicate = getArgument(fn_name, pos); + out = std::format("countIf(DISTINCT {}, {})", expr, predicate); + + return true; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h new file mode 100644 index 000000000000..5e752b4c9c03 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h @@ -0,0 +1,281 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class ArgMax : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "arg_max()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArgMin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "arg_min()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Avg : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "avg()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class AvgIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "avgif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllAnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_and()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllOr : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_or()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllXor : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_xor()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BuildSchema : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "buildschema()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Count : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "count()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "countif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DCount : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcount()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DCountIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcountif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeBag : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_bag()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeBagIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_bag_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeList : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeListIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeListWithNulls : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list_with_nulls()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeSet : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_set()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeSetIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_set_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Max : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "max()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MaxIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "maxif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Min : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "min()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MinIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "minif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentile : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentile()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentilew : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilew()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentiles : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentiles()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PercentilesArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentiles_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentilesw : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilesw()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PercentileswArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilesw_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Stdev : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "stdev()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StdevIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "stdevif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Sum : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sum()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SumIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sumif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TakeAny : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "take_any()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TakeAnyIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "take_anyif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Variance : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "variance()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class VarianceIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "varianceif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountDistinct : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "count_distinct()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountDistinctIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "count_distinctif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp new file mode 100644 index 000000000000..5779f0ed3708 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp @@ -0,0 +1,84 @@ +#include "KQLBinaryFunctions.h" + +#include + +namespace DB +{ + +bool BinaryAnd::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitAnd(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; +} + +bool BinaryNot::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + out = std::format("bitNot(cast({0}, 'Int64'))", value); + return true; +} + +bool BinaryOr::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitOr(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; +} + +bool BinaryShiftLeft::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format("if({1} < 0, null, bitShiftLeft(cast({0}, 'Int64'), {1}))", value, count); + return true; +} + +bool BinaryShiftRight::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format("if({1} < 0, null, bitShiftRight(cast({0}, 'Int64'), {1}))", value, count); + return true; +} + +bool BinaryXor::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitXor(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; +} + +bool BitsetCountOnes::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "bitCount"); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h new file mode 100644 index 000000000000..591c0fd236e6 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h @@ -0,0 +1,57 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class BinaryAnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_and()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryNot : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_not()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryOr : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_or()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryShiftLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_shift_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryShiftRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_shift_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryXor : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_xor()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BitsetCountOnes : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bitset_count_ones()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp new file mode 100644 index 000000000000..48041b84952d --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -0,0 +1,103 @@ +#include "KQLCastingFunctions.h" + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +bool ToBool::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format( + "multiIf(toString({0}) = 'true', true, " + "toString({0}) = 'false', false, toInt64OrNull(toString({0})) != 0)", + param); + return true; +} + +bool ToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_todatetime"); +} + +bool ToDouble::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("toFloat64OrNull(toString({0})) / if(toTypeName({0}) = 'IntervalNanosecond', 100, 1)", param); + return true; +} + +bool ToInt::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("toInt32OrNull(toString({0})) / if(toTypeName({0}) = 'IntervalNanosecond', 100, 1)", param); + return true; +} + +bool ToLong::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("toInt64OrNull(toString({0})) / if(toTypeName({0}) = 'IntervalNanosecond', 100, 1)", param); + return true; +} + +bool ToString::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format("ifNull(kql_tostring({0}), '')", argument); + return true; +} + +bool ToTimeSpan::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_totimespan"); +} + +bool ToDecimal::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral || pos->type == TokenType::Number) + { + --pos; + const auto arg = getArgument(fn_name, pos); + const auto scale = std::format("if(position({0}::String,'e') = 0 , ( countSubstrings({0}::String, '.') = 1 ? length(substr({0}::String, position({0}::String,'.') + 1)) : 0 ) , toUInt64(multiIf((position({0}::String,'e+') as x) >0 , substr({0}::String,x+2) , (position({0}::String,'e-') as y )>0 , substr({0}::String,y+2) , position({0}::String,'e-') = 0 AND position({0}::String,'e+') =0 AND position({0}::String,'e')>0, substr({0}::String,position({0}::String,'e')+1) , 0::String)))", arg); + out = std::format("toTypeName({0}) = 'String' OR toTypeName({0}) = 'FixedString' ? toDecimal128OrNull({0}::String , abs(34 - ({1}::UInt8))) : toDecimal128OrNull({0}::String , abs(17 - ({1}::UInt8)))", arg, scale); + } + else + { + --pos; + const auto arg = getArgument(fn_name, pos); + out = std::format("toDecimal128OrNull({0}::Nullable(String), 17) / if(toTypeName({0}) = 'IntervalNanosecond', 100, 1)", arg); + } + + return true; +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h new file mode 100644 index 000000000000..72d5602dfcb3 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h @@ -0,0 +1,62 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class ToBool : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tobool()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todatetime()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToDouble : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todouble()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToInt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toint()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToLong : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tolong()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tostring()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "totimespan()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToDecimal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todecimal()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp new file mode 100644 index 000000000000..eaf52a15b01d --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -0,0 +1,216 @@ +#include "KQLDataTypeFunctions.h" + +#include +#include + +#include +#include + +#include +#include +#include + +namespace DB::ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int SYNTAX_ERROR; +} + +namespace +{ +bool mapToAccurateCast(std::string & out, DB::IParser::Pos & pos, const std::string_view type_name) +{ + const auto function_name = DB::IParserKQLFunction::getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + ++pos; + if (const auto & type = pos->type; type == DB::TokenType::QuotedIdentifier || type == DB::TokenType::StringLiteral) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "String cannot be parsed as a literal of type {}", type_name); + + --pos; + + const auto arg = DB::IParserKQLFunction::getArgument(function_name, pos); + out = std::format( + "if(toTypeName({0}) = 'IntervalNanosecond' or isNull(accurateCastOrNull({0}, '{1}')) != isNull({0}), " + "accurateCastOrNull(throwIf(true, 'Failed to parse {1} literal'), '{1}'), accurateCastOrNull({0}, '{1}'))", + arg, + type_name); + + return true; +} +} + +namespace DB +{ +bool DatatypeBool::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToAccurateCast(out, pos, "Bool"); +} + +bool DatatypeDatetime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto argument = extractLiteralArgumentWithoutQuotes(fn_name, pos); + const auto mutated_argument = std::invoke( + [&argument] + { + if (Int64 value; (boost::conversion::try_lexical_convert(argument, value) && (value < 1900 || value > 2261)) + || Poco::toLower(argument) == "null") + return argument; + + return "'" + argument + "'"; + }); + + out = std::format("kql_datetime({})", mutated_argument); + return true; +} + +bool DatatypeDynamic::convertImpl(String & out, IParser::Pos & pos) +{ + static const std::unordered_set ALLOWED_FUNCTIONS{"date", "datetime", "dynamic", "time", "timespan"}; + + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + ++pos; + if (pos->type == TokenType::OpeningCurlyBrace) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Property bags are not supported for now in {}", function_name); + + while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket) + { + if (const auto token_type = pos->type; token_type == TokenType::BareWord || token_type == TokenType::Number + || token_type == TokenType::QuotedIdentifier || token_type == TokenType::StringLiteral) + { + if (const std::string_view token(pos->begin, pos->end); token_type == TokenType::BareWord && !ALLOWED_FUNCTIONS.contains(token)) + { + ++pos; + if (pos->type != TokenType::ClosingRoundBracket && pos->type != TokenType::ClosingSquareBracket + && pos->type != TokenType::Comma) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Expression {} is not supported inside {}", token, function_name); + + --pos; + } + + out.append(getConvertedArgument(function_name, pos)); + } + else + { + out.append(pos->begin, pos->end); + ++pos; + } + } + + return true; +} + +bool DatatypeGuid::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String guid_str; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + guid_str = String(pos->begin + 1, pos->end - 1); + else + { + auto start = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + if (pos->type == TokenType::ClosingRoundBracket) + break; + } + --pos; + guid_str = String(start->begin, pos->end); + } + out = std::format("toUUIDOrNull('{}')", guid_str); + ++pos; + return true; +} + +bool DatatypeInt::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToAccurateCast(out, pos, "Int32"); +} + +bool DatatypeLong::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToAccurateCast(out, pos, "Int64"); +} + +bool DatatypeReal::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToAccurateCast(out, pos, "Float64"); +} + +bool DatatypeTimespan::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto argument = extractLiteralArgumentWithoutQuotes(fn_name, pos); + const auto ticks = ParserKQLTimespan::parse(argument); + out = kqlTicksToInterval(ticks); + + return true; +} + +bool DatatypeDecimal::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String arg; + int scale = 0; + int precision = 34; + + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + throw Exception("Failed to parse String as decimal Literal: " + fn_name, ErrorCodes::BAD_ARGUMENTS); + + --pos; + arg = getArgument(fn_name, pos); + + //NULL expr returns NULL not execption + static const std::regex expr{"^[0-9]+e[+-]?[0-9]+"}; + bool is_string = std::any_of(arg.begin(), arg.end(), ::isalpha) && Poco::toUpper(arg) != "NULL" && !(std::regex_match(arg, expr)); + if (is_string) + throw Exception("Failed to parse String as decimal Literal: " + fn_name, ErrorCodes::BAD_ARGUMENTS); + + if (std::regex_match(arg, expr)) + { + auto exponential_pos = arg.find("e"); + if (arg[exponential_pos + 1] == '+' || arg[exponential_pos + 1] == '-') + scale = std::stoi(arg.substr(exponential_pos + 2, arg.length())); + else + scale = std::stoi(arg.substr(exponential_pos + 1, arg.length())); + + out = std::format("toDecimal128({}::String,{})", arg, scale); + return true; + } + + if (const auto dot_pos = arg.find("."); dot_pos != String::npos) + { + const auto length = static_cast(std::ssize(arg.substr(0, dot_pos - 1))); + scale = std::max(precision - length, 0); + } + if (is_string) + throw Exception("Failed to parse String as decimal Literal: " + fn_name, ErrorCodes::BAD_ARGUMENTS); + + if (scale < 0 || Poco::toUpper(arg) == "NULL") + out = "NULL"; + else + out = std::format("toDecimal128({}::String,{})", arg, scale); + + return true; +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h new file mode 100644 index 000000000000..f2a5013ed1f8 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h @@ -0,0 +1,69 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class DatatypeBool : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bool(),boolean()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDatetime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime(),date()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "guid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeInt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "int()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeLong : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "long()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeReal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "real(),double()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeTimespan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "timespan(), time()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDecimal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "decimal()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp new file mode 100644 index 000000000000..2c80a383800e --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -0,0 +1,526 @@ +#include "KQLDateTimeFunctions.h" + +#include + +#include + +#include +#include +#include +#include + +namespace +{ +bool mapToEndOfPeriod(std::string & out, DB::IParser::Pos & pos, const std::string_view period) +{ + const auto function_name = DB::IParserKQLFunction::getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto datetime = DB::IParserKQLFunction::getArgument(function_name, pos, DB::IParserKQLFunction::ArgumentState::Raw); + const auto offset = DB::IParserKQLFunction::getOptionalArgument(function_name, pos, DB::IParserKQLFunction::ArgumentState::Raw); + out = std::format( + "minus({}, {})", + DB::IParserKQLFunction::kqlCallToExpression( + std::format("startof{}", Poco::toLower(std::string(period))), + {datetime, std::format("{} + 1", offset.value_or("0"))}, + pos.max_depth), + DB::IParserKQLFunction::kqlCallToExpression("timespan", {"1tick"}, pos.max_depth)); + return true; +} + +bool mapToStartOfPeriod(std::string & out, DB::IParser::Pos & pos, const std::string_view period) +{ + const auto function_name = DB::IParserKQLFunction::getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto datetime = DB::IParserKQLFunction::getArgument(function_name, pos); + const auto offset = DB::IParserKQLFunction::getOptionalArgument(function_name, pos); + out = std::format("kql_todatetime(add{0}s(toStartOf{0}({1}), {2}))", period, datetime, offset.value_or("0")); + return true; +} +} + +namespace DB::ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int SYNTAX_ERROR; +} + +namespace DB +{ +bool Ago::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto offset = getOptionalArgument(function_name, pos, ArgumentState::Raw); + out = kqlCallToExpression( + "now", {std::format("-1 * {}", offset.value_or(kqlCallToExpression("timespan", {"0"}, pos.max_depth)))}, pos.max_depth); + return true; +} + +bool DatetimeAdd::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto period = getArgument(fn_name, pos); + //remove quotes from period. + trim(period); + if (period.front() == '\"' || period.front() == '\'') + { + //period.remove + period.erase( 0, 1); // erase the first quote + period.erase( period.size() - 1); // erase the last quote + } + + const auto offset = getArgument(fn_name, pos); + const auto datetime = getArgument(fn_name, pos); + + out = std::format("date_add({}, {}, {})",period,offset,datetime); + + return true; + +}; + +bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String part = Poco::toUpper(getConvertedArgument(fn_name, pos)); + trim(part); + if (part.front() == '\"' || part.front() == '\'') + { + //period.remove + part.erase( 0, 1); // erase the first quote + part.erase( part.size() - 1); // erase the last quote + } + String date; + if (pos->type == TokenType::Comma) + { + ++pos; + date = getConvertedArgument(fn_name, pos); + } + String format; + + if (part == "YEAR") + format = "%G"; + else if (part == "QUARTER") + format = "%Q"; + else if (part == "MONTH") + format = "%m"; + else if (part == "WEEK_OF_YEAR") + format = "%V"; + else if (part == "DAY") + format = "%e"; + else if (part == "DAYOFYEAR") + format = "%j"; + else if (part == "HOUR") + format = "%I"; + else if (part == "MINUTE") + format = "%M"; + else if (part == "SECOND") + format = "%S"; + else + throw Exception("Unexpected argument " + part + " for " + fn_name, ErrorCodes::SYNTAX_ERROR); + + out = std::format("formatDateTime({}, '{}')", date, format); + return true; +} + +bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto period = getArgument(fn_name, pos); + const auto datetime_lhs = getArgument(fn_name, pos); + const auto datetime_rhs = getArgument(fn_name, pos); + out = std::format("dateDiff({}, {}, {})", period, datetime_rhs, datetime_lhs); + + return true; +} + +bool DayOfMonth::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toDayOfMonth"); +} + +bool DayOfWeek::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto datetime = getArgument(fn_name, pos); + out = std::format("(toDayOfWeek({}) % 7) * {}", datetime, kqlCallToExpression("timespan", {"1d"}, pos.max_depth)); + + return true; +} + +bool DayOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toDayOfYear"); +} + +bool EndOfMonth::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToEndOfPeriod(out, pos, "Month"); +} + +bool EndOfDay::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToEndOfPeriod(out, pos, "Day"); +} + +bool EndOfWeek::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToEndOfPeriod(out, pos, "Week"); +} + +bool EndOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToEndOfPeriod(out, pos, "Year"); +} + +bool FormatDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String formatspecifier; + ++pos; + const auto datetime = getConvertedArgument(fn_name, pos); + ++pos; + auto format = getConvertedArgument(fn_name, pos); + trim(format); + + //remove quotes and end space from format argument. + if (format.front() == '\"' || format.front() == '\'') + { + format.erase( 0, 1); // erase the first quote + format.erase( format.size() - 1); // erase the last quote + } + + std::vector res; + getTokens(format, res); + std::string::size_type i = 0; + size_t decimal =0; + while (i < format.size()) + { + char c = format[i]; + if (!isalpha(c)) + { + //delimeter + if (c == ' ' || c == '-' || c == '_' || c == '[' || c == ']' || c == '/' || c == ',' || c == '.' || c == ':') + formatspecifier = formatspecifier + c; + else + throw Exception("Invalid format delimeter in function:" + fn_name, ErrorCodes::SYNTAX_ERROR); + ++i; + } + else + { + //format specifier + String arg = res.back(); + + if (arg == "y" || arg == "yy") + formatspecifier = formatspecifier + "%y"; + else if (arg == "yyyy") + formatspecifier = formatspecifier + "%Y"; + else if (arg == "M" || arg == "MM") + formatspecifier = formatspecifier + "%m"; + else if (arg == "s" || arg == "ss") + formatspecifier = formatspecifier + "%S"; + else if (arg == "m" || arg == "mm") + formatspecifier = formatspecifier + "%M"; + else if (arg == "h" || arg == "hh") + formatspecifier = formatspecifier + "%I"; + else if (arg == "H" || arg == "HH") + formatspecifier = formatspecifier + "%H"; + else if (arg == "d") + formatspecifier = formatspecifier + "%e"; + else if (arg == "dd") + formatspecifier = formatspecifier + "%d"; + else if (arg == "tt") + formatspecifier = formatspecifier + "%p"; + else if (arg.starts_with('f') || arg.starts_with('F')) + decimal = arg.size(); + else + throw Exception("Format specifier " + arg + " in function:" + fn_name + "is not supported", ErrorCodes::SYNTAX_ERROR); + res.pop_back(); + i = i + arg.size(); + } + } + if (decimal > 0 && formatspecifier.find('.') != String::npos) + { + + out = std::format("concat(" + "substring(toString(formatDateTime( {0} , '{1}')),1, position(toString(formatDateTime({0},'{1}')),'.')) ," + "substring(substring(toString({0}), position(toString({0}),'.')+1),1,{2})," + "substring(toString(formatDateTime( {0},'{1}')), position(toString(formatDateTime({0},'{1}')),'.')+1 ,length (toString(formatDateTime({0},'{1}'))))) ", datetime, formatspecifier,decimal); + } + else + out = std::format("formatDateTime( {0},'{1}')",datetime, formatspecifier); + + return true; +} + +bool FormatTimeSpan::convertImpl(String & out, IParser::Pos & pos) +{ + static const std::unordered_set ALLOWED_DELIMITERS{' ', '/', '-', ':', ',', '.', '_', '[', ']'}; + static const std::unordered_map, bool, int, std::optional>> + ATTRIBUTES_BY_FORMAT_CHARACTER{ + {'d', {"1d", std::nullopt, false, 8, "leftPad"}}, + {'f', {"1tick", 10'000'000, true, 7, "rightPad"}}, + {'F', {"1tick", 10'000'000, true, 7, std::nullopt}}, + {'h', {"1h", 24, false, 2, "leftPad"}}, + {'H', {"1h", 24, false, 2, "leftPad"}}, + {'m', {"1m", 60, false, 2, "leftPad"}}, + {'s', {"1s", 60, false, 2, "leftPad"}}}; + + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto timespan = getArgument(fn_name, pos); + const auto format = getArgument(fn_name, pos); + if (std::ssize(format) < 3 || format.front() != format.back() || format.front() != '\'') + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected non-empty string literal as the second argument to {}", fn_name); + + std::string current_streak; + std::string delimited_parts; + const auto convert_streak = [¤t_streak, ×pan, &delimited_parts, &pos] + { + while (!current_streak.empty()) + { + if (!delimited_parts.empty()) + delimited_parts.append(", "); + + const auto attributes_it = ATTRIBUTES_BY_FORMAT_CHARACTER.find(current_streak.front()); + if (attributes_it == ATTRIBUTES_BY_FORMAT_CHARACTER.cend()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format character: {}", current_streak.front()); + + const auto & [timespan_unit, modulus, should_truncate, max_length, pad_function] = attributes_it->second; + const auto streak_length = std::ssize(current_streak); + const auto part_length = std::min(streak_length, static_cast(max_length)); + current_streak.erase(current_streak.cbegin(), current_streak.cbegin() + part_length); + + auto expression = std::format("intDiv({}, {})", timespan, kqlCallToExpression("timespan", {timespan_unit}, pos.max_depth)); + expression = std::format("toString({})", modulus ? std::format("modulo({}, {})", expression, *modulus) : expression); + if (should_truncate) + expression = std::format("substring({}, 1, {})", expression, part_length); + + delimited_parts.append( + pad_function ? std::format("if(length({1}) < {2}, {0}({1}, {2}, '0'), {1})", *pad_function, expression, part_length) + : expression); + } + }; + + for (const auto & c : std::string_view(format.cbegin() + 1, format.cend() - 1)) + { + if (ALLOWED_DELIMITERS.contains(c)) + { + convert_streak(); + delimited_parts.append(std::format(", '{}'", c)); + } + else if (ATTRIBUTES_BY_FORMAT_CHARACTER.contains(c)) + { + if (!current_streak.empty() && current_streak.back() != c) + convert_streak(); + + current_streak.push_back(c); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected character '{}' in format string of {}", c, fn_name); + } + + convert_streak(); + out = "concat(" + delimited_parts + ", '')"; + return true; +} + +bool GetMonth::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toMonth"); +} + +bool GetYear::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toYear"); +} + +bool HoursOfDay::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toHour"); +} + +bool MakeTimeSpan::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto arg1 = getArgument(fn_name, pos); + const auto arg2 = getArgument(fn_name, pos); + const auto arg3 = getOptionalArgument(fn_name, pos); + const auto arg4 = getOptionalArgument(fn_name, pos); + + const auto & [day, hour, minute, second] + = std::invoke([&arg1, &arg2, &arg3, &arg4] + { return arg4 ? std::make_tuple(arg1, arg2, *arg3, *arg4) : std::make_tuple("0", arg1, arg2, arg3.value_or("0")); }); + + out = std::format( + "{} * {} + {} * {} + {} * {} + {} * {}", + day, + kqlCallToExpression("timespan", {"1d"}, pos.max_depth), + hour, + kqlCallToExpression("timespan", {"1h"}, pos.max_depth), + minute, + kqlCallToExpression("timespan", {"1m"}, pos.max_depth), + second, + kqlCallToExpression("timespan", {"1s"}, pos.max_depth)); + + return true; +} + +bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto year = getArgument(fn_name, pos); + const auto month = getArgument(fn_name, pos); + const auto day = getArgument(fn_name, pos); + const auto hour = getOptionalArgument(fn_name, pos); + const auto minute = getOptionalArgument(fn_name, pos); + const auto second = getOptionalArgument(fn_name, pos); + out = std::format( + "if({0} between 1900 and 2261 and {1} between 1 and 12 and {3} between 0 and 59 and {4} between 0 and 59 and {5} >= 0 and {5} < 60 " + " and isNotNull(toModifiedJulianDayOrNull(concat(leftPad(toString({0}), 4, '0'), '-', leftPad(toString({1}), 2, '0'), '-', leftPad(toString({2}), 2, '0')))), " + "toDateTime64OrNull(toString(makeDateTime64({0}, {1}, {2}, {3}, {4}, truncate({5}), ({5} - truncate({5})) * 1e7, 7, 'UTC')), 9), null)", + year, + month, + day, + hour.value_or("0"), + minute.value_or("0"), + second.value_or("0")); + + return true; +} + +bool Now::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto offset = getOptionalArgument(fn_name, pos); + out = "now64(9, 'UTC')" + (offset ? " + " + *offset : ""); + + return true; +} + +bool StartOfDay::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToStartOfPeriod(out, pos, "Day"); +} + +bool StartOfMonth::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToStartOfPeriod(out, pos, "Month"); +} + +bool StartOfWeek::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToStartOfPeriod(out, pos, "Week"); +} + +bool StartOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToStartOfPeriod(out, pos, "Year"); +} + +bool UnixTimeMicrosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto value = getArgument(fn_name, pos); + out = std::format("kql_todatetime(fromUnixTimestamp64Micro({}, 'UTC'))", value); + + return true; +} + +bool UnixTimeMillisecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto value = getArgument(fn_name, pos); + out = std::format("kql_todatetime(fromUnixTimestamp64Milli({}, 'UTC'))", value); + + return true; +} + +bool UnixTimeNanosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto value = getArgument(fn_name, pos); + out = std::format("kql_todatetime(fromUnixTimestamp64Nano({}, 'UTC'))", value); + + return true; +} + +bool UnixTimeSecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} accepts only long, int and double type of arguments", fn_name); + + const auto expression = getConvertedArgument(fn_name, pos); + out = std::format( + "if(toTypeName(assumeNotNull({0})) in ['Int32', 'Int64', 'Float64', 'UInt32', 'UInt64'], " + "kql_todatetime({0}), kql_todatetime(throwIf(true, '{1} only accepts int, long and double type of arguments')))", + expression, + fn_name); + + return true; +} + +bool WeekOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + const String time_str = getConvertedArgument(fn_name, pos); + out = std::format("toWeek({},3,'UTC')", time_str); + return true; +} + +bool MonthOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toMonth"); +} + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h new file mode 100644 index 000000000000..ada707f45b89 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -0,0 +1,235 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Ago : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ago()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimeAdd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_add()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimePart : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_part()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimeDiff : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_diff()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class GetMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "getmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class GetYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "getyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class HoursOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "hourofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Now : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "now()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeMicrosecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_microseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeMillisecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_milliseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeNanosecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_nanoseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeSecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_seconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class WeekOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "week_of_year()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MonthOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "monthofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +void inline getTokens(String format, std::vector & res ) +{ + String str = format; + String token; + auto pos = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzQWERTYUIOPASDFGHJKLZXCVBNM"); + while (pos != String::npos) + { + if (pos != 0) + { + // Found a token + token = str.substr(0, pos); + res.insert(res.begin(), token); + } + str.erase(0, pos+1); // Always remove pos+1 to get rid of delimiter + pos = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzQWERTYUIOPASDFGHJKLZXCVBNM"); + } + // Cover the last (or only) token + if (str.length() > 0) + { + token = str; + res.insert(res.begin(), token); + } +} + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp new file mode 100644 index 000000000000..d10b99ee6e88 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -0,0 +1,361 @@ +#include "KQLDynamicFunctions.h" + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int SYNTAX_ERROR; +} + +bool ArrayConcat::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "arrayConcat"); +} + +bool ArrayIif::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_ArrayIif"); +} + +bool ArrayIndexOf::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto array = getArgument(fn_name, pos); + const auto needle = getArgument(fn_name, pos); + out = "minus(indexOf(" + array + ", " + needle + "), 1)"; + + return true; +} + +bool ArrayLength::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + out = std::format("arrayLastIndex(x -> true, {0})", array); + + return true; +} + +bool ArrayReverse::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "arrayReverse"); +} + +bool ArrayRotateLeft::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format( + "arrayMap(x -> {0}[moduloOrZero(x + length({0}) + moduloOrZero({1}, toInt64(length({0}))), length({0})) + 1], " + "range(0, length({0})))", + array, + count); + + return true; +} + +bool ArrayRotateRight::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos, ArgumentState::Raw); + const auto count = getArgument(function_name, pos, ArgumentState::Raw); + out = kqlCallToExpression("array_rotate_left", {array, "-1 * " + count}, pos.max_depth); + + return true; +} + +bool ArrayShiftLeft::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + const auto fill = getOptionalArgument(function_name, pos); + out = std::format( + "arrayResize(if({1} > 0, arraySlice({0}, {1} + 1), arrayConcat(arrayWithConstant(abs({1}), fill_value_{3}), {0})), " + "length({0}), if(isNull({2}) and (extract(toTypeName({0}), 'Array\\((.*)\\)') as element_type_{3}) = 'String', " + "defaultValueOfTypeName(if(element_type_{3} = 'Nothing', 'Nullable(Nothing)', element_type_{3})), {2}) as fill_value_{3})", + array, + count, + fill.value_or("null"), + generateUniqueIdentifier()); + + return true; +} + +bool ArrayShiftRight::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos, ArgumentState::Raw); + const auto count = getArgument(function_name, pos, ArgumentState::Raw); + const auto fill = getOptionalArgument(function_name, pos, ArgumentState::Raw); + + const auto negated_count = "-1 * " + count; + out = kqlCallToExpression( + "array_shift_left", + fill ? std::initializer_list{array, negated_count, *fill} + : std::initializer_list{array, negated_count}, + pos.max_depth); + + return true; +} + +bool ArraySlice::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto start = getArgument(function_name, pos); + const auto end = getArgument(function_name, pos); + + out = std::format( + "arraySlice({0}, plus(1, if({1} >= 0, {1}, arrayMax([-length({0}), {1}]) + length({0}))) as offset_{3}, " + " plus(1, if({2} >= 0, {2}, arrayMax([-length({0}), {2}]) + length({0}))) - offset_{3} + 1)", + array, + start, + end, + generateUniqueIdentifier()); + + return true; +} + +bool ArraySortAsc::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_array_sort_asc"); +} + +bool ArraySortDesc::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_array_sort_desc"); +} + +bool ArraySplit::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto indices = getArgument(function_name, pos); + + out = std::format( + "if(empty(arrayMap(x -> if(x >= 0, x, arrayMax([0, x + length({0})::Int64])), flatten([{1}])) as indices_{2}), [{0}], " + "arrayConcat([arraySlice({0}, 1, indices_{2}[1])], arrayMap(i -> arraySlice({0}, indices_{2}[i] + 1, " + "if(i = length(indices_{2}), length({0})::Int64, indices_{2}[i + 1]::Int64) - indices_{2}[i]), " + "range(1, length(indices_{2}) + 1))))", + array, + indices, + generateUniqueIdentifier()); + + return true; +} + +bool ArraySum::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format( + "if(multiSearchAny(extract(toTypeName(arrayMap(x -> assumeNotNull(x), arrayFilter(x -> isNotNull(x), {0}))), " + "'Array\\((.*)\\)'), ['Bool', 'Decimal', 'Float', 'Int', 'Nothing', 'UInt']), " + "arraySum(x -> toFloat64OrDefault(x), {0}), null)", + argument, + generateUniqueIdentifier()); + + return true; +} + +bool BagKeys::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool BagMerge::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool BagRemoveKeys::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool JaccardIndex::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); + out = std::format( + "divide(length({0}), length({1}))", + kqlCallToExpression("set_intersect", {lhs, rhs}, pos.max_depth), + kqlCallToExpression("set_union", {lhs, rhs}, pos.max_depth)); + + return true; +} + +bool Pack::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool PackAll::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool PackArray::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "array", {1, Interval::max_bound}); +} + +bool Repeat::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + String value = getArgument(function_name, pos); + String count = getArgument(function_name, pos); + + value.erase(remove(value.begin(), value.end(), ' '), value.end()); + count.erase(remove(count.begin(), count.end(), ' '), count.end()); + + if (count.empty()) + throw Exception("number of arguments do not match in function: " + function_name, ErrorCodes::SYNTAX_ERROR); + else + out = "if(" + count + " < 0, [NULL], " + std::format("arrayWithConstant(abs({1}), {0}))", value, count); + + return true; +} + +bool SetDifference::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = std::invoke( + [&function_name, &pos] + { + std::vector arrays{getArgument(function_name, pos, ArgumentState::Raw)}; + while (auto next_array = getOptionalArgument(function_name, pos, ArgumentState::Raw)) + arrays.push_back(*next_array); + + return kqlCallToExpression("set_union", std::vector(arrays.cbegin(), arrays.cend()), pos.max_depth); + }); + + out = std::format("arrayFilter(x -> not has({1}, x), arrayDistinct({0}))", lhs, rhs); + + return true; +} + +bool SetHasElement::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "has"); +} + +bool SetIntersect::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "arrayIntersect"); +} + +bool SetUnion::convertImpl(String & out, IParser::Pos & pos) +{ + if (!directMapping(out, pos, "arrayConcat")) + return false; + + out = std::format("arrayDistinct({0})", out); + + return true; +} + +bool TreePath::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool Zip::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto arguments = getArguments(function_name, pos); + if (const auto size = arguments.size(); size < 2 || size > 16) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Between 2 and 16 arguments are expected, but {} were provided", size); + + const auto unique_identifier = generateUniqueIdentifier(); + const auto resized_arguments = std::invoke( + [&arguments, &unique_identifier] + { + String lengths; + for (int i = 0; i < std::ssize(arguments); ++i) + { + lengths.append(i > 0 ? ", " : ""); + lengths.append(std::format( + "length(if(match(toTypeName({0}), 'Array\\(Nullable\\(.*\\)\\)'), {0}, " + "cast({0}, concat('Array(', extract(toTypeName(if(length({0}) = 0, [NULL], {0})), 'Array\\((.*)\\)'), ')'))) as arg{1}_{2})", + arguments[i], + i, + unique_identifier)); + } + + auto result = std::format("arrayResize(arg0_{1}, arrayMax([{0}]) as max_length_{1}, null)", lengths, unique_identifier); + for (int i = 1; i < std::ssize(arguments); ++i) + result.append(std::format(", arrayResize(arg{0}_{1}, max_length_{1}, null)", i, unique_identifier)); + + return result; + }); + + out = std::format("arrayMap(t -> [untuple(t)], arrayZip({0}))", resized_arguments); + + return true; +} + +bool Range::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_range"); +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h new file mode 100644 index 000000000000..553ed57332c8 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h @@ -0,0 +1,210 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class ArrayConcat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_concat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayIif : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_iif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayIndexOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_index_of()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayLength : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_length()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayReverse : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_reverse()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayRotateLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_rotate_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayRotateRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_rotate_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayShiftLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_shift_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayShiftRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_shift_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySlice : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_slice()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySortAsc : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sort_asc()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySortDesc : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sort_desc()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySplit : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_split()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySum : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sum()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagKeys : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_keys()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagMerge : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_merge()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagRemoveKeys : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_remove_keys()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class JaccardIndex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "jaccard_index()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Pack : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PackAll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack_all()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PackArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Repeat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "repeat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetDifference : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_difference()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetHasElement : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_has_element()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetIntersect : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_intersect()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetUnion : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_union()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TreePath : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "treepath()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Zip : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "zip()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Range : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "range()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp new file mode 100644 index 000000000000..7153177086a9 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -0,0 +1,1054 @@ +#include "KQLFunctionFactory.h" +#include "KQLAggregationFunctions.h" +#include "KQLBinaryFunctions.h" +#include "KQLCastingFunctions.h" +#include "KQLDataTypeFunctions.h" +#include "KQLDateTimeFunctions.h" +#include "KQLDynamicFunctions.h" +#include "KQLGeneralFunctions.h" +#include "KQLIPFunctions.h" +#include "KQLMathematicalFunctions.h" +#include "KQLStringFunctions.h" +#include "KQLTimeSeriesFunctions.h" + +#include + +namespace +{ +enum class KQLFunction : uint16_t +{ + none, + ago, + datetime_add, + datetime_part, + datetime_diff, + dayofmonth, + dayofweek, + dayofyear, + endofday, + endofweek, + endofyear, + endofmonth, + monthofyear, + format_datetime, + format_timespan, + getmonth, + getyear, + hourofday, + make_timespan, + make_datetime, + now, + startofday, + startofmonth, + startofweek, + startofyear, + todatetime, + totimespan, + unixtime_microseconds_todatetime, + unixtime_milliseconds_todatetime, + unixtime_nanoseconds_todatetime, + unixtime_seconds_todatetime, + week_of_year, + + base64_encode_tostring, + base64_encode_fromguid, + base64_decode_tostring, + base64_decode_toarray, + base64_decode_toguid, + countof, + extract, + extract_all, + extract_json, + has_any_index, + indexof, + isempty, + isnan, + isnotempty, + isnotnull, + isnull, + parse_command_line, + parse_csv, + parse_json, + parse_url, + parse_urlquery, + parse_version, + replace_regex, + reverse, + split, + strcat, + strcat_delim, + strcmp, + strlen, + strrep, + substring, + tolower, + toupper, + translate, + trim, + trim_end, + trim_start, + url_decode, + url_encode, + + array_concat, + array_iif, + array_index_of, + array_length, + array_reverse, + array_rotate_left, + array_rotate_right, + array_shift_left, + array_shift_right, + array_slice, + array_sort_asc, + array_sort_desc, + array_split, + array_sum, + bag_keys, + bag_merge, + bag_remove_keys, + jaccard_index, + pack, + pack_all, + pack_array, + repeat, + set_difference, + set_has_element, + set_intersect, + set_union, + treepath, + zip, + + tobool, + todouble, + toint, + tolong, + tostring, + todecimal, + + arg_max, + arg_min, + avg, + avgif, + binary_all_and, + binary_all_or, + binary_all_xor, + buildschema, + count, + countif, + dcount, + dcountif, + make_bag, + make_bag_if, + make_list, + make_list_if, + make_list_with_nulls, + make_set, + make_set_if, + max, + maxif, + min, + minif, + percentile, + percentilew, + percentiles, + percentiles_array, + percentilesw, + percentilesw_array, + stdev, + stdevif, + sum, + sumif, + take_any, + take_anyif, + variance, + varianceif, + count_distinct, + count_distinctif, + + series_fir, + series_iir, + series_fit_line, + series_fit_line_dynamic, + series_fit_2lines, + series_fit_2lines_dynamic, + series_outliers, + series_periods_detect, + series_periods_validate, + series_stats_dynamic, + series_stats, + series_fill_backward, + series_fill_const, + series_fill_forward, + series_fill_linear, + + ipv4_compare, + ipv4_is_in_range, + ipv4_is_match, + ipv4_is_private, + ipv4_netmask_suffix, + parse_ipv4, + parse_ipv4_mask, + has_ipv6, + has_any_ipv6, + has_ipv6_prefix, + has_any_ipv6_prefix, + ipv6_compare, + ipv6_is_match, + parse_ipv6, + parse_ipv6_mask, + format_ipv4, + format_ipv4_mask, + + binary_and, + binary_not, + binary_or, + binary_shift_left, + binary_shift_right, + binary_xor, + bitset_count_ones, + + bin, + bin_at, + kase, + iff, + iif, + + datatype_bool, + datatype_datetime, + datatype_dynamic, + datatype_guid, + datatype_int, + datatype_long, + datatype_real, + datatype_timespan, + datatype_decimal, + range +}; + +const std::unordered_map KQL_FUNCTIONS{ + {"ago", KQLFunction::ago}, + {"datetime_add", KQLFunction::datetime_add}, + {"datetime_part", KQLFunction::datetime_part}, + {"datetime_diff", KQLFunction::datetime_diff}, + {"dayofmonth", KQLFunction::dayofmonth}, + {"dayofweek", KQLFunction::dayofweek}, + {"dayofyear", KQLFunction::dayofyear}, + {"endofday", KQLFunction::endofday}, + {"endofweek", KQLFunction::endofweek}, + {"endofyear", KQLFunction::endofyear}, + {"endofmonth", KQLFunction::endofmonth}, + + {"format_datetime", KQLFunction::format_datetime}, + {"format_timespan", KQLFunction::format_timespan}, + {"getmonth", KQLFunction::getmonth}, + {"getyear", KQLFunction::getyear}, + {"hourofday", KQLFunction::hourofday}, + {"make_timespan", KQLFunction::make_timespan}, + {"make_datetime", KQLFunction::make_datetime}, + {"now", KQLFunction::now}, + {"startofday", KQLFunction::startofday}, + {"startofmonth", KQLFunction::startofmonth}, + {"startofweek", KQLFunction::startofweek}, + {"startofyear", KQLFunction::startofyear}, + {"todatetime", KQLFunction::todatetime}, + {"totimespan", KQLFunction::totimespan}, + {"unixtime_microseconds_todatetime", KQLFunction::unixtime_microseconds_todatetime}, + {"unixtime_milliseconds_todatetime", KQLFunction::unixtime_milliseconds_todatetime}, + {"unixtime_nanoseconds_todatetime", KQLFunction::unixtime_nanoseconds_todatetime}, + {"unixtime_seconds_todatetime", KQLFunction::unixtime_seconds_todatetime}, + {"week_of_year", KQLFunction::week_of_year}, + {"monthofyear", KQLFunction::monthofyear}, + {"base64_encode_tostring", KQLFunction::base64_encode_tostring}, + {"base64_encode_fromguid", KQLFunction::base64_encode_fromguid}, + {"base64_decode_tostring", KQLFunction::base64_decode_tostring}, + {"base64_decode_toarray", KQLFunction::base64_decode_toarray}, + {"base64_decode_toguid", KQLFunction::base64_decode_toguid}, + {"countof", KQLFunction::countof}, + {"extract", KQLFunction::extract}, + {"extract_all", KQLFunction::extract_all}, + {"extract_json", KQLFunction::extract_json}, + {"extractjson", KQLFunction::extract_json}, + {"has_any_index", KQLFunction::has_any_index}, + {"indexof", KQLFunction::indexof}, + {"isempty", KQLFunction::isempty}, + {"isnan", KQLFunction::isnan}, + {"isnotempty", KQLFunction::isnotempty}, + {"notempty", KQLFunction::isnotempty}, + {"isnotnull", KQLFunction::isnotnull}, + {"notnull", KQLFunction::isnotnull}, + {"isnull", KQLFunction::isnull}, + {"parse_command_line", KQLFunction::parse_command_line}, + {"parse_csv", KQLFunction::parse_csv}, + {"parse_json", KQLFunction::parse_json}, + {"parse_url", KQLFunction::parse_url}, + {"parse_urlquery", KQLFunction::parse_urlquery}, + {"parse_version", KQLFunction::parse_version}, + {"replace_regex", KQLFunction::replace_regex}, + {"reverse", KQLFunction::reverse}, + {"split", KQLFunction::split}, + {"strcat", KQLFunction::strcat}, + {"strcat_delim", KQLFunction::strcat_delim}, + {"strcmp", KQLFunction::strcmp}, + {"strlen", KQLFunction::strlen}, + {"strrep", KQLFunction::strrep}, + {"substring", KQLFunction::substring}, + {"tolower", KQLFunction::tolower}, + {"toupper", KQLFunction::toupper}, + {"translate", KQLFunction::translate}, + {"trim", KQLFunction::trim}, + {"trim_end", KQLFunction::trim_end}, + {"trim_start", KQLFunction::trim_start}, + {"url_decode", KQLFunction::url_decode}, + {"url_encode", KQLFunction::url_encode}, + + {"array_concat", KQLFunction::array_concat}, + {"array_iff", KQLFunction::array_iif}, + {"array_iif", KQLFunction::array_iif}, + {"array_index_of", KQLFunction::array_index_of}, + {"array_length", KQLFunction::array_length}, + {"array_reverse", KQLFunction::array_reverse}, + {"array_rotate_left", KQLFunction::array_rotate_left}, + {"array_rotate_right", KQLFunction::array_rotate_right}, + {"array_shift_left", KQLFunction::array_shift_left}, + {"array_shift_right", KQLFunction::array_shift_right}, + {"array_slice", KQLFunction::array_slice}, + {"array_sort_asc", KQLFunction::array_sort_asc}, + {"array_sort_desc", KQLFunction::array_sort_desc}, + {"array_split", KQLFunction::array_split}, + {"array_sum", KQLFunction::array_sum}, + {"bag_keys", KQLFunction::bag_keys}, + {"bag_merge", KQLFunction::bag_merge}, + {"bag_remove_keys", KQLFunction::bag_remove_keys}, + {"jaccard_index", KQLFunction::jaccard_index}, + {"pack", KQLFunction::pack}, + {"pack_all", KQLFunction::pack_all}, + {"pack_array", KQLFunction::pack_array}, + {"repeat", KQLFunction::repeat}, + {"set_difference", KQLFunction::set_difference}, + {"set_has_element", KQLFunction::set_has_element}, + {"set_intersect", KQLFunction::set_intersect}, + {"set_union", KQLFunction::set_union}, + {"treepath", KQLFunction::treepath}, + {"zip", KQLFunction::zip}, + + {"tobool", KQLFunction::tobool}, + {"toboolean", KQLFunction::tobool}, + {"todouble", KQLFunction::todouble}, + {"toint", KQLFunction::toint}, + {"tolong", KQLFunction::tolong}, + {"toreal", KQLFunction::todouble}, + {"tostring", KQLFunction::tostring}, + {"totimespan", KQLFunction::totimespan}, + {"todecimal", KQLFunction::todecimal}, + + {"arg_max", KQLFunction::arg_max}, + {"arg_min", KQLFunction::arg_min}, + {"avg", KQLFunction::avg}, + {"avgif", KQLFunction::avgif}, + {"binary_all_and", KQLFunction::binary_all_and}, + {"binary_all_or", KQLFunction::binary_all_or}, + {"binary_all_xor", KQLFunction::binary_all_xor}, + {"buildschema", KQLFunction::buildschema}, + {"count", KQLFunction::count}, + {"countif", KQLFunction::countif}, + {"dcount", KQLFunction::dcount}, + {"dcountif", KQLFunction::dcountif}, + {"make_bag", KQLFunction::make_bag}, + {"make_bag_if", KQLFunction::make_bag_if}, + {"make_list", KQLFunction::make_list}, + {"make_list_if", KQLFunction::make_list_if}, + {"make_list_with_nulls", KQLFunction::make_list_with_nulls}, + {"make_set", KQLFunction::make_set}, + {"make_set_if", KQLFunction::make_set_if}, + {"max", KQLFunction::max}, + {"maxif", KQLFunction::maxif}, + {"min", KQLFunction::min}, + {"minif", KQLFunction::minif}, + {"percentile", KQLFunction::percentile}, + {"percentilew", KQLFunction::percentilew}, + {"percentiles", KQLFunction::percentiles}, + {"percentiles_array", KQLFunction::percentiles_array}, + {"percentilesw", KQLFunction::percentilesw}, + {"percentilesw_array", KQLFunction::percentilesw_array}, + {"stdev", KQLFunction::stdev}, + {"stdevif", KQLFunction::stdevif}, + {"sum", KQLFunction::sum}, + {"sumif", KQLFunction::sumif}, + {"take_any", KQLFunction::take_any}, + {"take_anyif", KQLFunction::take_anyif}, + {"variance", KQLFunction::variance}, + {"varianceif", KQLFunction::varianceif}, + {"count_distinct", KQLFunction::count_distinct}, + {"count_distinctif", KQLFunction::count_distinctif}, + + {"series_fir", KQLFunction::series_fir}, + {"series_iir", KQLFunction::series_iir}, + {"series_fit_line", KQLFunction::series_fit_line}, + {"series_fit_line_dynamic", KQLFunction::series_fit_line_dynamic}, + {"series_fit_2lines", KQLFunction::series_fit_2lines}, + {"series_fit_2lines_dynamic", KQLFunction::series_fit_2lines_dynamic}, + {"series_outliers", KQLFunction::series_outliers}, + {"series_periods_detect", KQLFunction::series_periods_detect}, + {"series_periods_validate", KQLFunction::series_periods_validate}, + {"series_stats_dynamic", KQLFunction::series_stats_dynamic}, + {"series_stats", KQLFunction::series_stats}, + {"series_fill_backward", KQLFunction::series_fill_backward}, + {"series_fill_const", KQLFunction::series_fill_const}, + {"series_fill_forward", KQLFunction::series_fill_forward}, + {"series_fill_linear", KQLFunction::series_fill_linear}, + + {"ipv4_compare", KQLFunction::ipv4_compare}, + {"ipv4_is_in_range", KQLFunction::ipv4_is_in_range}, + {"ipv4_is_match", KQLFunction::ipv4_is_match}, + {"ipv4_is_private", KQLFunction::ipv4_is_private}, + {"ipv4_netmask_suffix", KQLFunction::ipv4_netmask_suffix}, + {"parse_ipv4", KQLFunction::parse_ipv4}, + {"parse_ipv4_mask", KQLFunction::parse_ipv4_mask}, + {"ipv6_compare", KQLFunction::ipv6_compare}, + {"ipv6_is_match", KQLFunction::ipv6_is_match}, + {"parse_ipv6", KQLFunction::parse_ipv6}, + {"parse_ipv6_mask", KQLFunction::parse_ipv6_mask}, + {"format_ipv4", KQLFunction::format_ipv4}, + {"format_ipv4_mask", KQLFunction::format_ipv4_mask}, + + {"binary_and", KQLFunction::binary_and}, + {"binary_not", KQLFunction::binary_not}, + {"binary_or", KQLFunction::binary_or}, + {"binary_shift_left", KQLFunction::binary_shift_left}, + {"binary_shift_right", KQLFunction::binary_shift_right}, + {"binary_xor", KQLFunction::binary_xor}, + {"bitset_count_ones", KQLFunction::bitset_count_ones}, + + {"bin", KQLFunction::bin}, + {"floor", KQLFunction::bin}, + {"bin_at", KQLFunction::bin_at}, + {"case", KQLFunction::kase}, + {"iff", KQLFunction::iff}, + {"iif", KQLFunction::iif}, + + {"bool", KQLFunction::datatype_bool}, + {"boolean", KQLFunction::datatype_bool}, + {"datetime", KQLFunction::datatype_datetime}, + {"date", KQLFunction::datatype_datetime}, + {"dynamic", KQLFunction::datatype_dynamic}, + {"guid", KQLFunction::datatype_guid}, + {"int", KQLFunction::datatype_int}, + {"long", KQLFunction::datatype_long}, + {"real", KQLFunction::datatype_real}, + {"double", KQLFunction::datatype_real}, + {"timespan", KQLFunction::datatype_timespan}, + {"time", KQLFunction::datatype_timespan}, + {"decimal", KQLFunction::datatype_decimal}, + {"range", KQLFunction::range} + }; +} + +namespace DB +{ +std::unique_ptr KQLFunctionFactory::get(const String & kql_function) +{ + const auto kql_function_it = KQL_FUNCTIONS.find(kql_function); + if (kql_function_it == KQL_FUNCTIONS.end()) + return nullptr; + + const auto& kql_function_id = kql_function_it->second; + switch (kql_function_id) + { + case KQLFunction::none: + return nullptr; + + case KQLFunction::ago: + return std::make_unique(); + + case KQLFunction::datetime_add: + return std::make_unique(); + + case KQLFunction::datetime_part: + return std::make_unique(); + + case KQLFunction::datetime_diff: + return std::make_unique(); + + case KQLFunction::dayofmonth: + return std::make_unique(); + + case KQLFunction::dayofweek: + return std::make_unique(); + + case KQLFunction::dayofyear: + return std::make_unique(); + + case KQLFunction::endofday: + return std::make_unique(); + + case KQLFunction::endofweek: + return std::make_unique(); + + case KQLFunction::endofyear: + return std::make_unique(); + + case KQLFunction::endofmonth: + return std::make_unique(); + + case KQLFunction::monthofyear: + return std::make_unique(); + + case KQLFunction::format_datetime: + return std::make_unique(); + + case KQLFunction::format_timespan: + return std::make_unique(); + + case KQLFunction::getmonth: + return std::make_unique(); + + case KQLFunction::getyear: + return std::make_unique(); + + case KQLFunction::hourofday: + return std::make_unique(); + + case KQLFunction::make_timespan: + return std::make_unique(); + + case KQLFunction::make_datetime: + return std::make_unique(); + + case KQLFunction::now: + return std::make_unique(); + + case KQLFunction::startofday: + return std::make_unique(); + + case KQLFunction::startofmonth: + return std::make_unique(); + + case KQLFunction::startofweek: + return std::make_unique(); + + case KQLFunction::startofyear: + return std::make_unique(); + + case KQLFunction::unixtime_microseconds_todatetime: + return std::make_unique(); + + case KQLFunction::unixtime_milliseconds_todatetime: + return std::make_unique(); + + case KQLFunction::unixtime_nanoseconds_todatetime: + return std::make_unique(); + + case KQLFunction::unixtime_seconds_todatetime: + return std::make_unique(); + + case KQLFunction::week_of_year: + return std::make_unique(); + + case KQLFunction::base64_encode_tostring: + return std::make_unique(); + + case KQLFunction::base64_encode_fromguid: + return std::make_unique(); + + case KQLFunction::base64_decode_tostring: + return std::make_unique(); + + case KQLFunction::base64_decode_toarray: + return std::make_unique(); + + case KQLFunction::base64_decode_toguid: + return std::make_unique(); + + case KQLFunction::countof: + return std::make_unique(); + + case KQLFunction::extract: + return std::make_unique(); + + case KQLFunction::extract_all: + return std::make_unique(); + + case KQLFunction::extract_json: + return std::make_unique(); + + case KQLFunction::has_any_index: + return std::make_unique(); + + case KQLFunction::indexof: + return std::make_unique(); + + case KQLFunction::isempty: + return std::make_unique(); + + case KQLFunction::isnan: + return std::make_unique(); + + case KQLFunction::isnotempty: + return std::make_unique(); + + case KQLFunction::isnotnull: + return std::make_unique(); + + case KQLFunction::isnull: + return std::make_unique(); + + case KQLFunction::parse_command_line: + return std::make_unique(); + + case KQLFunction::parse_csv: + return std::make_unique(); + + case KQLFunction::parse_json: + return std::make_unique(); + + case KQLFunction::parse_url: + return std::make_unique(); + + case KQLFunction::parse_urlquery: + return std::make_unique(); + + case KQLFunction::parse_version: + return std::make_unique(); + + case KQLFunction::replace_regex: + return std::make_unique(); + + case KQLFunction::reverse: + return std::make_unique(); + + case KQLFunction::split: + return std::make_unique(); + + case KQLFunction::strcat: + return std::make_unique(); + + case KQLFunction::strcat_delim: + return std::make_unique(); + + case KQLFunction::strcmp: + return std::make_unique(); + + case KQLFunction::strlen: + return std::make_unique(); + + case KQLFunction::strrep: + return std::make_unique(); + + case KQLFunction::substring: + return std::make_unique(); + + case KQLFunction::tolower: + return std::make_unique(); + + case KQLFunction::toupper: + return std::make_unique(); + + case KQLFunction::translate: + return std::make_unique(); + + case KQLFunction::trim: + return std::make_unique(); + + case KQLFunction::trim_end: + return std::make_unique(); + + case KQLFunction::trim_start: + return std::make_unique(); + + case KQLFunction::url_decode: + return std::make_unique(); + + case KQLFunction::url_encode: + return std::make_unique(); + + case KQLFunction::array_concat: + return std::make_unique(); + + case KQLFunction::array_iif: + return std::make_unique(); + + case KQLFunction::array_index_of: + return std::make_unique(); + + case KQLFunction::array_length: + return std::make_unique(); + + case KQLFunction::array_reverse: + return std::make_unique(); + + case KQLFunction::array_rotate_left: + return std::make_unique(); + + case KQLFunction::array_rotate_right: + return std::make_unique(); + + case KQLFunction::array_shift_left: + return std::make_unique(); + + case KQLFunction::array_shift_right: + return std::make_unique(); + + case KQLFunction::array_slice: + return std::make_unique(); + + case KQLFunction::array_sort_asc: + return std::make_unique(); + + case KQLFunction::array_sort_desc: + return std::make_unique(); + + case KQLFunction::array_split: + return std::make_unique(); + + case KQLFunction::array_sum: + return std::make_unique(); + + case KQLFunction::bag_keys: + return std::make_unique(); + + case KQLFunction::bag_merge: + return std::make_unique(); + + case KQLFunction::bag_remove_keys: + return std::make_unique(); + + case KQLFunction::jaccard_index: + return std::make_unique(); + + case KQLFunction::pack: + return std::make_unique(); + + case KQLFunction::pack_all: + return std::make_unique(); + + case KQLFunction::pack_array: + return std::make_unique(); + + case KQLFunction::repeat: + return std::make_unique(); + + case KQLFunction::set_difference: + return std::make_unique(); + + case KQLFunction::set_has_element: + return std::make_unique(); + + case KQLFunction::set_intersect: + return std::make_unique(); + + case KQLFunction::set_union: + return std::make_unique(); + + case KQLFunction::treepath: + return std::make_unique(); + + case KQLFunction::zip: + return std::make_unique(); + + case KQLFunction::tobool: + return std::make_unique(); + + case KQLFunction::todatetime: + return std::make_unique(); + + case KQLFunction::todouble: + return std::make_unique(); + + case KQLFunction::toint: + return std::make_unique(); + + case KQLFunction::tolong: + return std::make_unique(); + + case KQLFunction::tostring: + return std::make_unique(); + + case KQLFunction::totimespan: + return std::make_unique(); + + case KQLFunction::todecimal: + return std::make_unique(); + + case KQLFunction::arg_max: + return std::make_unique(); + + case KQLFunction::arg_min: + return std::make_unique(); + + case KQLFunction::avg: + return std::make_unique(); + + case KQLFunction::avgif: + return std::make_unique(); + + case KQLFunction::binary_all_and: + return std::make_unique(); + + case KQLFunction::binary_all_or: + return std::make_unique(); + + case KQLFunction::binary_all_xor: + return std::make_unique(); + + case KQLFunction::buildschema: + return std::make_unique(); + + case KQLFunction::count: + return std::make_unique(); + + case KQLFunction::countif: + return std::make_unique(); + + case KQLFunction::dcount: + return std::make_unique(); + + case KQLFunction::dcountif: + return std::make_unique(); + + case KQLFunction::make_bag: + return std::make_unique(); + + case KQLFunction::make_bag_if: + return std::make_unique(); + + case KQLFunction::make_list: + return std::make_unique(); + + case KQLFunction::make_list_if: + return std::make_unique(); + + case KQLFunction::make_list_with_nulls: + return std::make_unique(); + + case KQLFunction::make_set: + return std::make_unique(); + + case KQLFunction::make_set_if: + return std::make_unique(); + + case KQLFunction::max: + return std::make_unique(); + + case KQLFunction::maxif: + return std::make_unique(); + + case KQLFunction::min: + return std::make_unique(); + + case KQLFunction::minif: + return std::make_unique(); + + case KQLFunction::percentile: + return std::make_unique(); + + case KQLFunction::percentilew: + return std::make_unique(); + + case KQLFunction::percentiles: + return std::make_unique(); + + case KQLFunction::percentiles_array: + return std::make_unique(); + + case KQLFunction::percentilesw: + return std::make_unique(); + + case KQLFunction::percentilesw_array: + return std::make_unique(); + + case KQLFunction::stdev: + return std::make_unique(); + + case KQLFunction::stdevif: + return std::make_unique(); + + case KQLFunction::sum: + return std::make_unique(); + + case KQLFunction::sumif: + return std::make_unique(); + + case KQLFunction::take_any: + return std::make_unique(); + + case KQLFunction::take_anyif: + return std::make_unique(); + + case KQLFunction::variance: + return std::make_unique(); + + case KQLFunction::varianceif: + return std::make_unique(); + + case KQLFunction::count_distinct: + return std::make_unique(); + + case KQLFunction::count_distinctif: + return std::make_unique(); + + + case KQLFunction::series_fir: + return std::make_unique(); + + case KQLFunction::series_iir: + return std::make_unique(); + + case KQLFunction::series_fit_line: + return std::make_unique(); + + case KQLFunction::series_fit_line_dynamic: + return std::make_unique(); + + case KQLFunction::series_fit_2lines: + return std::make_unique(); + + case KQLFunction::series_fit_2lines_dynamic: + return std::make_unique(); + + case KQLFunction::series_outliers: + return std::make_unique(); + + case KQLFunction::series_periods_detect: + return std::make_unique(); + + case KQLFunction::series_periods_validate: + return std::make_unique(); + + case KQLFunction::series_stats_dynamic: + return std::make_unique(); + + case KQLFunction::series_stats: + return std::make_unique(); + + case KQLFunction::series_fill_backward: + return std::make_unique(); + + case KQLFunction::series_fill_const: + return std::make_unique(); + + case KQLFunction::series_fill_forward: + return std::make_unique(); + + case KQLFunction::series_fill_linear: + return std::make_unique(); + + case KQLFunction::ipv4_compare: + return std::make_unique(); + + case KQLFunction::ipv4_is_in_range: + return std::make_unique(); + + case KQLFunction::ipv4_is_match: + return std::make_unique(); + + case KQLFunction::ipv4_is_private: + return std::make_unique(); + + case KQLFunction::ipv4_netmask_suffix: + return std::make_unique(); + + case KQLFunction::parse_ipv4: + return std::make_unique(); + + case KQLFunction::parse_ipv4_mask: + return std::make_unique(); + + case KQLFunction::has_ipv6: + return std::make_unique(); + + case KQLFunction::has_any_ipv6: + return std::make_unique(); + + case KQLFunction::has_ipv6_prefix: + return std::make_unique(); + + case KQLFunction::has_any_ipv6_prefix: + return std::make_unique(); + + case KQLFunction::ipv6_compare: + return std::make_unique(); + + case KQLFunction::ipv6_is_match: + return std::make_unique(); + + case KQLFunction::parse_ipv6: + return std::make_unique(); + + case KQLFunction::parse_ipv6_mask: + return std::make_unique(); + + case KQLFunction::format_ipv4: + return std::make_unique(); + + case KQLFunction::format_ipv4_mask: + return std::make_unique(); + + case KQLFunction::binary_and: + return std::make_unique(); + + case KQLFunction::binary_not: + return std::make_unique(); + + case KQLFunction::binary_or: + return std::make_unique(); + + case KQLFunction::binary_shift_left: + return std::make_unique(); + + case KQLFunction::binary_shift_right: + return std::make_unique(); + + case KQLFunction::binary_xor: + return std::make_unique(); + + case KQLFunction::bitset_count_ones: + return std::make_unique(); + + case KQLFunction::bin: + return std::make_unique(); + + case KQLFunction::bin_at: + return std::make_unique(); + + case KQLFunction::kase: + return std::make_unique(); + + case KQLFunction::iff: + return std::make_unique(); + + case KQLFunction::iif: + return std::make_unique(); + + case KQLFunction::datatype_bool: + return std::make_unique(); + + case KQLFunction::datatype_datetime: + return std::make_unique(); + + case KQLFunction::datatype_dynamic: + return std::make_unique(); + + case KQLFunction::datatype_guid: + return std::make_unique(); + + case KQLFunction::datatype_int: + return std::make_unique(); + + case KQLFunction::datatype_long: + return std::make_unique(); + + case KQLFunction::datatype_real: + return std::make_unique(); + + case KQLFunction::datatype_timespan: + return std::make_unique(); + + case KQLFunction::datatype_decimal: + return std::make_unique(); + + case KQLFunction::range: + return std::make_unique(); + } +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h new file mode 100644 index 000000000000..f9aea3b57987 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -0,0 +1,12 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class KQLFunctionFactory +{ +public: + static std::unique_ptr get(const String & kql_function); +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp new file mode 100644 index 000000000000..e033d938af6f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -0,0 +1,29 @@ +#include "KQLGeneralFunctions.h" + +namespace DB +{ +bool Bin::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_bin"); +} + +bool BinAt::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_bin_at"); +} + +bool Case::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "multiIf"); +} + +bool Iff::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "If"); +} + +bool Iif::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "If"); +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h new file mode 100644 index 000000000000..188e9ffe12e2 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h @@ -0,0 +1,42 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Bin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bin()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinAt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bin_at()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Case : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "case()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Iff : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "iff()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; +class Iif : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "iif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp new file mode 100644 index 000000000000..ff273b0d434c --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -0,0 +1,287 @@ +#include "KQLIPFunctions.h" + +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Parsed); + out = std::format( + "if(isNull({0} as lhs_ip_{5}) or isNull({1} as lhs_mask_{5}) " + "or isNull({2} as rhs_ip_{5}) or isNull({3} as rhs_mask_{5}), null, " + "sign(toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip_{5}), " + "toUInt8(min2({4}, min2(assumeNotNull(lhs_mask_{5}), assumeNotNull(rhs_mask_{5})))) as mask_{5}), 1))" + " - toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip_{5}), mask_{5}), 1))))", + kqlCallToExpression("parse_ipv4", {lhs}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {lhs}, pos.max_depth), + kqlCallToExpression("parse_ipv4", {rhs}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {rhs}, pos.max_depth), + mask.value_or("32"), + generateUniqueIdentifier()); + return true; +} + +bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos, ArgumentState::Parsed); + const auto ip_range = getArgument(function_name, pos, ArgumentState::Raw); + out = std::format( + "if(isNull(IPv4StringToNumOrNull({0}) as ip_{3}) " + "or isNull({1} as range_start_ip_{3}) or isNull({2} as range_mask_{3}), null, " + "bitXor(range_start_ip_{3}, bitAnd(ip_{3}, bitNot(toUInt32(intExp2(32 - range_mask_{3}) - 1)))) = 0)", + ip_address, + kqlCallToExpression("parse_ipv4", {ip_range}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth), + generateUniqueIdentifier()); + return true; +} + +bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask.value_or("32")}, pos.max_depth)); + return true; +} + +bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) +{ + static const std::array s_private_subnets{"10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"}; + + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto unique_identifier = generateUniqueIdentifier(); + + out += std::format( + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(toIPv4OrNull(tokens_{1}[1]) as nullable_ip_{1}) " + "or length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " + "ignore(assumeNotNull(nullable_ip_{1}) as ip_{1}, " + "IPv4CIDRToRange(ip_{1}, assumeNotNull(mask_{1})) as range_{1}, IPv4NumToString(tupleElement(range_{1}, 1)) as begin_{1}, " + "IPv4NumToString(tupleElement(range_{1}, 2)) as end_{1}), null, ", + ip_address, + unique_identifier); + for (int i = 0; i < std::ssize(s_private_subnets); ++i) + { + if (i > 0) + out += " or "; + + const auto & subnet = s_private_subnets[i]; + out += std::format( + "length(tokens_{1}) = 1 and isIPAddressInRange(IPv4NumToString(ip_{1}), '{0}') or " + "length(tokens_{1}) = 2 and isIPAddressInRange(begin_{1}, '{0}') and isIPAddressInRange(end_{1}, '{0}')", + subnet, + unique_identifier); + } + + out.push_back(')'); + return true; +} + +bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_range = getArgument(function_name, pos); + out = std::format( + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or not isIPv4String(tokens_{1}[1]), null, " + "length(tokens_{1}) = 1, 32, isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, toUInt8(min2(mask_{1}, 32)))", + ip_range, + generateUniqueIdentifier()); + return true; +} + +bool ParseIpv4::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + out = std::format( + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) = 1, IPv4StringToNumOrNull(tokens_{1}[1]) as ip_{1}, " + "length(tokens_{1}) = 2 and isNotNull(ip_{1}) and isNotNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), " + "tupleElement(IPv4CIDRToRange(assumeNotNull(ip_{1}), assumeNotNull(mask_{1})), 1), null)", + ip_address, + generateUniqueIdentifier()); + return true; +} + +bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto mask = getArgument(function_name, pos); + out = std::format( + "if(isNull(toIPv4OrNull({0}) as ip_{2}) or isNull(toUInt8OrNull(toString({1})) as mask_{2}), null, " + "toUInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(ip_{2}), arrayMax([0, arrayMin([32, assumeNotNull(mask_{2})])])), 1)))", + ip_address, + mask, + generateUniqueIdentifier()); + return true; +} + +bool HasIpv6::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, ""); +} + +bool HasAnyIpv6::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, ""); +} + +bool HasIpv6Prefix::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, ""); +} + +bool HasAnyIpv6Prefix::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, ""); +} + +bool Ipv6Compare::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + const auto calculated_mask = mask.value_or("128"); + out = std::format( + "if(length(splitByChar('/', {1}) as lhs_tokens_{0}) > 2 or length(splitByChar('/', {2}) as rhs_tokens_{0}) > 2 " + "or isNull(IPv6StringToNumOrNull(lhs_tokens_{0}[1]) as lhs_ipv6_{0}) or length(lhs_tokens_{0}) = 2 " + "and isNull((if(isIPv4String(lhs_tokens_{0}[1]), 96, 0) + toUInt8OrNull(lhs_tokens_{0}[-1])) as lhs_suffix_{0}) " + "or isNull(IPv6StringToNumOrNull(rhs_tokens_{0}[1]) as rhs_ipv6_{0}) or length(rhs_tokens_{0}) = 2 " + "and isNull((if(isIPv4String(rhs_tokens_{0}[1]), 96, 0) + toUInt8OrNull(rhs_tokens_{0}[-1])) as rhs_suffix_{0}) " + "or isNull(toUInt8(min2({3}, min2(ifNull(lhs_suffix_{0}, 128), ifNull(rhs_suffix_{0}, 128)))) as suffix_{0}) " + "or isNull(bitShiftLeft(bitShiftRight(bitNot(reinterpretAsFixedString(0::UInt128)), (128 - suffix_{0}) as zeroes_{0}), " + "zeroes_{0}) as mask_{0}) or isNull(bitAnd(lhs_ipv6_{0}, mask_{0}) as lhs_base_{0}) " + "or isNull(bitAnd(rhs_ipv6_{0}, mask_{0}) as rhs_base_{0}), null, " + "multiIf(lhs_base_{0} < rhs_base_{0}, -1, lhs_base_{0} > rhs_base_{0}, 1, 0))", + generateUniqueIdentifier(), + lhs, + rhs, + calculated_mask); + return true; +} + +bool Ipv6IsMatch::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv6_compare", {lhs, rhs, mask.value_or("128")}, pos.max_depth)); + return true; +} + +bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + out = std::format( + "if(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(IPv6StringToNumOrNull(tokens_{1}[1]) as ip_{1}) " + "or length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " + "arrayStringConcat(flatten(extractAllGroups(lower(hex(tupleElement(IPv6CIDRToRange(assumeNotNull(ip_{1}), toUInt8(ifNull(mask_{1} " + "+ if(isIPv4String(tokens_{1}[1]), 96, 0), 128))), 1))), '([\\da-f]{{4}})')), ':'))", + ip_address, + generateUniqueIdentifier()); + return true; +} + +bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getArgument(function_name, pos, ArgumentState::Raw); + const auto unique_identifier = generateUniqueIdentifier(); + out = std::format( + "if(empty({0} as ipv4_{3}), {1}, {2})", + kqlCallToExpression("format_ipv4", {"trim_start('::', " + ip_address + ")", mask + " - 96"}, pos.max_depth), + kqlCallToExpression("parse_ipv6", {"strcat(tostring(parse_ipv6(" + ip_address + ")), '/', tostring(" + mask + "))"}, pos.max_depth), + kqlCallToExpression("parse_ipv6", {"ipv4_" + unique_identifier}, pos.max_depth), + unique_identifier); + return true; +} + +bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Parsed); + out = std::format( + "ifNull(if(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32_{3}) and toTypeName({0}) = 'String' or ({1}) < 0 " + "or isNull(ifNull(param_as_uint32_{3}, {2}) as ip_as_number_{3}), null, " + "IPv4NumToString(bitAnd(ip_as_number_{3}, bitNot(toUInt32(intExp2(32 - ({1})) - 1))))), '')", + ParserKQLBase::getExprFromToken(ip_address, pos.max_depth), + mask.value_or("32"), + kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth), + generateUniqueIdentifier()); + return true; +} + +bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); + const auto calculated_mask = mask.value_or("32"); + out = std::format( + "if(empty({1} as formatted_ip_{2}) or position(toTypeName({0}), 'Int') = 0 or not {0} between 0 and 32, '', " + "concat(formatted_ip_{2}, '/', toString(toInt64(min2({0}, ifNull({3} as suffix_{2}, 32))))))", + ParserKQLBase::getExprFromToken(calculated_mask, pos.max_depth), + kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth), + generateUniqueIdentifier(), + kqlCallToExpression("ipv4_netmask_suffix", {"tostring(" + ip_address + ")"}, pos.max_depth)); + return true; +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h new file mode 100644 index 000000000000..529245e365fa --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h @@ -0,0 +1,126 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Ipv4Compare : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_compare()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv4IsInRange : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_in_range()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv4IsMatch : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_match()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv4IsPrivate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_private()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv4NetmaskSuffix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_netmask_suffix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv4()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseIpv4Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv4_mask()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv6Compare : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv6_compare()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasIpv6 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_ipv6()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasAnyIpv6 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_ipv6()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasIpv6Prefix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_ipv6_prefix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasAnyIpv6Prefix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_ipv6_prefix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv6IsMatch : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv6_is_match()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseIpv6 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv6()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseIpv6Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv6_mask()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class FormatIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_ipv4()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class FormatIpv4Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_ipv4_mask()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.cpp new file mode 100644 index 000000000000..373cc014f214 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.cpp @@ -0,0 +1,20 @@ +#include "KQLMathematicalFunctions.h" + +#include + +namespace DB +{ +bool IsNan::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format( + "if(toTypeName({0}) in ['Float64', 'Nullable(Float64)'], isNaN({0}), throwIf(true, 'Expected argument of data type real'))", + argument); + + return true; +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.h new file mode 100644 index 000000000000..76cae66cae4d --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.h @@ -0,0 +1,11 @@ +#include "IParserKQLFunction.h" + +namespace DB +{ +class IsNan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnan()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp new file mode 100644 index 000000000000..c5b593b15579 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -0,0 +1,710 @@ +#include "KQLStringFunctions.h" + +#include + +#include + +namespace DB::ErrorCodes +{ +extern const int SYNTAX_ERROR; +extern const int UNKNOWN_TYPE; +extern const int BAD_ARGUMENTS; +extern const int UNKNOWN_TYPE; + +} + +namespace DB +{ + +bool Base64EncodeToString::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "base64Encode"); +} + +bool Base64EncodeFromGuid::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format( + "if(toTypeName({0}) not in ['UUID', 'Nullable(UUID)'], toString(throwIf(true, 'Expected guid as argument')), " + "base64Encode(UUIDStringToNum(toString({0}), 2)))", + argument, + generateUniqueIdentifier()); + return true; +} + +bool Base64DecodeToString::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "base64Decode"); +} + +bool Base64DecodeToArray::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String str = getConvertedArgument(fn_name, pos); + + out = std::format("arrayMap(x -> (reinterpretAsUInt8(x)), splitByRegexp ('',base64Decode({})))", str); + + return true; +} + +bool Base64DecodeToGuid::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format("toUUIDOrNull(UUIDNumToString(toFixedString(base64Decode({}), 16), 2))", argument); + + return true; +} + +bool CountOf::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String search = getConvertedArgument(fn_name, pos); + + String kind = "'normal'"; + if (pos->type == TokenType::Comma) + { + ++pos; + kind = getConvertedArgument(fn_name,pos); + } + assert (kind =="'normal'" || kind =="'regex'"); + + if (kind == "'normal'") + out = "countSubstrings(" + source + ", " + search + ")"; + else + out = "countMatches("+ source + ", " + search + ")"; + return true; + +} + +bool Extract::convertImpl(String & out, IParser::Pos & pos) +{ + ParserKeyword s_kql("typeof"); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + Expected expected; + + std::unordered_map type_cast = + { {"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"}, + {"decimal", "Decimal"} + }; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + + ++pos; + String capture_group = getConvertedArgument(fn_name, pos); + + ++pos; + String source = getConvertedArgument(fn_name, pos); + + String type_literal; + + if (pos->type == TokenType::Comma) + { + ++pos; + + if (s_kql.ignore(pos, expected)) + { + if (!open_bracket.ignore(pos, expected)) + throw Exception("Syntax error near typeof", ErrorCodes::SYNTAX_ERROR); + + type_literal= String(pos->begin, pos->end); + + if (type_cast.find(type_literal) == type_cast.end()) + throw Exception(type_literal + " is not a supported kusto data type for extract", ErrorCodes::UNKNOWN_TYPE); + + type_literal = type_cast[type_literal]; + ++pos; + + if (!close_bracket.ignore(pos, expected)) + throw Exception("Syntax error near typeof", ErrorCodes::SYNTAX_ERROR); + } + } + + out = std::format("kql_extract({}, {}, {})", source, regex, capture_group); + if (type_literal == "Decimal") + { + out = std::format("countSubstrings({0}, '.') > 1 ? NULL: {0}, length(substr({0}, position({0},'.') + 1)))", out); + out = std::format("toDecimal128OrNull({0})", out); + } + else + { + if (type_literal == "Boolean") + out = std::format("toInt64OrNull({})", out); + + if (!type_literal.empty()) + out = "accurateCastOrNull(" + out + ", '" + type_literal + "')"; + } + return true; +} + +bool ExtractAll::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String regex = getConvertedArgument(fn_name, pos); + + ++pos; + const String second_arg = getConvertedArgument(fn_name, pos); + + String third_arg; + if (pos->type == TokenType::Comma) + { + ++pos; + third_arg = getConvertedArgument(fn_name, pos); + } + + if (!third_arg.empty()) // currently the captureGroups not supported + return false; + + out = "extractAllGroups(" + second_arg + ", " + regex + ")"; + return true; +} + +bool ExtractJson::convertImpl(String & out, IParser::Pos & pos) +{ + String datatype = "String"; + ParserKeyword s_kql("typeof"); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + Expected expected; + + std::unordered_map type_cast = + { + {"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"dynamic", "Array"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"}, + {"decimal", "Decimal"} + }; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String json_datapath = getConvertedArgument(fn_name, pos); + ++pos; + const String json_datasource = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + if (s_kql.ignore(pos, expected)) + { + if (!open_bracket.ignore(pos, expected)) + throw Exception("Syntax error near typeof", ErrorCodes::SYNTAX_ERROR); + + datatype= String(pos->begin, pos->end); + + if (type_cast.find(datatype) == type_cast.end()) + throw Exception(datatype + " is not a supported kusto data type for " + fn_name, ErrorCodes::UNKNOWN_TYPE); + datatype = type_cast[datatype]; + ++pos; + + if (!close_bracket.ignore(pos, expected)) + throw Exception("Syntax error near typeof", ErrorCodes::SYNTAX_ERROR); + } + } + const auto json_val = std::format("JSON_VALUE({0},{1})", json_datasource, json_datapath); + + if (datatype == "Decimal") + { + out = std::format("countSubstrings({0}, '.') > 1 ? NULL: length(substr({0}, position({0},'.') + 1)))", json_val); + out = std::format("toDecimal128OrNull({0}::String ,{1})", json_val, out); + } + else + { + if (datatype == "Boolean") + out = std::format("toInt64OrNull({})", json_val); + + if (!datatype.empty()) + out = std::format("accurateCastOrNull({},'{}')", json_val, datatype); + } + return true; +} + +bool HasAnyIndex::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String lookup = getConvertedArgument(fn_name, pos); + String src_array = std::format("splitByChar(' ',{})", source); + out = std::format("if (empty({1}), -1, indexOf(arrayMap ( x -> (x in {0}), if (empty({1}),[''], arrayMap(x->(toString(x)),{1}))),1) - 1)", src_array, lookup); + return true; +} + +bool IndexOf::convertImpl(String & out, IParser::Pos & pos) +{ + String start_index = "0", length = "-1", occurrence = "1"; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String lookup = getConvertedArgument(fn_name, pos); + + if (pos->type == TokenType::Comma) + { + ++pos; + start_index = getConvertedArgument(fn_name, pos); + + if (pos->type == TokenType::Comma) + { + ++pos; + length = getConvertedArgument(fn_name, pos); + + if (pos->type == TokenType::Comma) + { + ++pos; + occurrence = getConvertedArgument(fn_name, pos); + } + } + } + + out = std::format("kql_indexof({},{},{},{},{})", source, lookup, start_index, length , occurrence); + return true; +} + +bool IsEmpty::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "empty"); +} + +bool IsNotEmpty::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "notEmpty"); +} + +bool IsNotNull::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "isNotNull"); +} + +bool ParseCommandLine::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String json_string = getConvertedArgument(fn_name, pos); + + ++pos; + const String type = getConvertedArgument(fn_name, pos); + + if (type != "'windows'") + throw Exception("Supported type argument is windows for " + fn_name, ErrorCodes::BAD_ARGUMENTS); + + out = std::format("if(empty({0}) OR hasAll(splitByChar(' ', {0}) , ['']) , arrayMap(x->null, splitByChar(' ', '')), splitByChar(' ', {0}))" , json_string); + return true; +} + +bool IsNull::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "isNull"); +} + +bool ParseCSV::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String csv_string = getConvertedArgument(fn_name, pos); + + out = std::format("if(position({0} ,'\n')::UInt8, (splitByChar(',', substring({0}, 1, position({0},'\n') -1))), (splitByChar(',', substring({0}, 1, length({0})))))", csv_string); + return true; +} + +bool ParseJson::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (String(pos->begin, pos->end) == "dynamic") + { + --pos; + auto arg = getArgument(fn_name,pos); + auto result = kqlCallToExpression("dynamic", {arg}, pos.max_depth); + out = std::format("{}", result); + } + else + { + auto arg = getConvertedArgument(fn_name, pos); + out = std::format("if (isValidJSON({0}) , JSON_QUERY({0}, '$') , toJSONString({0}))" , arg); + } + return true; +} + +bool ParseURL::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String url = getConvertedArgument(fn_name, pos); + + const String scheme = std::format("concat('\"Scheme\":\"', protocol({0}),'\"')",url); + const String host = std::format("concat('\"Host\":\"', domain({0}),'\"')",url); + String port = std::format("concat('\"Port\":\"', toString(port({0})),'\"')",url); + const String path = std::format("concat('\"Path\":\"', path({0}),'\"')",url); + const String username_pwd = std::format("netloc({0})",url); + const String query_string = std::format("queryString({0})",url); + const String fragment = std::format("concat('\"Fragment\":\"',fragment({0}),'\"')",url); + const String username = std::format("concat('\"Username\":\"', arrayElement(splitByChar(':',arrayElement(splitByChar('@',{0}) ,1)),1),'\"')", username_pwd); + const String password = std::format("concat('\"Password\":\"', arrayElement(splitByChar(':',arrayElement(splitByChar('@',{0}) ,1)),2),'\"')", username_pwd); + String query_parameters = std::format("concat('\"Query Parameters\":', concat('{{\"', replace(replace({}, '=', '\":\"'),'&','\",\"') ,'\"}}'))", query_string); + + bool all_space = true; + for(size_t i = 0; i < url.size(); i++) + { + if(url[i] == '\'' || url[i] == '\"') + continue; + if(url[i] != ' ') + { + all_space = false; + break; + } + } + + if(all_space) + { + port = "'\"Port\":\"\"'"; + query_parameters = "'\"Query Parameters\":{}'"; + } + out = std::format("concat('{{',{},',',{},',',{},',',{},',',{},',',{},',',{},',',{},'}}')",scheme, host, port, path, username, password, query_parameters,fragment); + return true; +} + +bool ParseURLQuery::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + const String query = getConvertedArgument(fn_name, pos); + + const String query_string = std::format("if (position({},'?') > 0, queryString({}), {})", query, query, query); + const String query_parameters = std::format("concat('\"Query Parameters\":', concat('{{\"', replace(replace({}, '=', '\":\"'),'&','\",\"') ,'\"}}'))", query_string); + out = std::format("concat('{{',{},'}}')",query_parameters); + return true; +} + +bool ParseVersion::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String arg ; + ++pos; + arg = getConvertedArgument(fn_name, pos); + out = std::format("length(splitByChar('.', {0})) > 4 OR length(splitByChar('.', {0})) < 1 OR match({0}, '.*[a-zA-Z]+.*') = 1 OR empty({0}) OR hasAll(splitByChar('.', {0}) , ['']) ? toDecimal128OrNull('NULL' , 0) : toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', {0}), 4)))), 8),0)", arg); + return true; +} + +bool ReplaceRegex::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "replaceRegexpAll"); +} + +bool Reverse::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos, ArgumentState::Raw); + out = std::format("reverse({})", kqlCallToExpression("tostring", {argument}, pos.max_depth)); + + return true; +} + +bool Split::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String delimiter = getConvertedArgument(fn_name, pos); + auto split_res = std::format("empty({0}) ? splitByString(' ' , {1}) : splitByString({0} , {1})", delimiter, source); + int requested_index = -1; + + if (pos->type == TokenType::Comma) + { + ++pos; + auto arg = getConvertedArgument(fn_name, pos); + // remove space between minus and value + arg.erase(remove_if(arg.begin(), arg.end(), isspace), arg.end()); + requested_index = std::stoi(arg); + requested_index += 1; + out = std::format("multiIf(length({0}) >= {1} AND {1} > 0 , arrayPushBack([],arrayElement({0}, {1})) , {1}=0 ,{0} , arrayPushBack([] ,arrayElement(NULL,1)))", split_res, requested_index); + } + else + out = split_res; + return true; +} + +bool StrCat::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto arguments = getArguments(function_name, pos, ArgumentState::Raw); + + out.append("concat("); + for (const auto & argument : arguments) + { + out.append(kqlCallToExpression("tostring", {argument}, pos.max_depth)); + out.append(", "); + } + + out.append("'')"); + return true; +} + +bool StrCatDelim::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String delimiter = getConvertedArgument(fn_name, pos); + + int arg_count = 0; + String args; + + while (!pos->isEnd() && pos->type != TokenType::Semicolon && pos->type != TokenType::ClosingRoundBracket) + { + ++pos; + String arg = getConvertedArgument(fn_name, pos); + if (args.empty()) + args = "concat(" + arg; + else + args = args + ", " + delimiter + ", " + arg; + ++arg_count; + } + args += ")"; + + if (arg_count < 2 || arg_count > 64) + throw Exception("argument count out of bound in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); + + out = std::move(args); + return true; +} + +bool StrCmp::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String string1 = getConvertedArgument(fn_name, pos); + ++pos; + const String string2 = getConvertedArgument(fn_name, pos); + + out = std::format("multiIf({0} == {1}, 0, {0} < {1}, -1, 1)", string1, string2); + return true; +} + +bool StrLen::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "lengthUTF8"); +} + +bool StrRep::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + const auto arguments = getArguments(fn_name, pos, ArgumentState::Raw); + + if(arguments.size() < 2 || arguments.size() > 3) + throw Exception("number of arguments do not match in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); + + const String value = arguments[0]; + const String multiplier = arguments[1]; + + if(arguments.size() == 2) + out = "repeat(" + value + " , " + multiplier + ")"; + else if(arguments.size() == 3) + { + const String delimiter = arguments[2]; + const String repeated_str = "repeat(concat(" + kqlCallToExpression("tostring", {value}, pos.max_depth) + " , " + delimiter + ")," + multiplier + ")"; + out = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; + } + return true; +} + +bool SubString::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String source = getConvertedArgument(fn_name, pos); + + ++pos; + String startingIndex = getConvertedArgument(fn_name, pos); + + if (pos->type == TokenType::Comma) + { + ++pos; + auto length = getConvertedArgument(fn_name, pos); + + if(startingIndex.empty()) + throw Exception("number of arguments do not match in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); + else + out = "if(toInt64(length(" + source + ")) <= 0, '', substr("+ source + ", " + "((" + startingIndex + "% toInt64(length(" + source + ")) + toInt64(length(" + source + "))) % toInt64(length(" + source + "))) + 1, " + length + ") )"; + } + else + out = "if(toInt64(length(" + source + ")) <= 0, '', substr("+ source + "," + "((" + startingIndex + "% toInt64(length(" + source + ")) + toInt64(length(" + source + "))) % toInt64(length(" + source + "))) + 1))"; + + return true; +} + +bool ToLower::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "lower"); +} + +bool ToUpper::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "upper"); +} + +bool Translate::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String from = getConvertedArgument(fn_name, pos); + ++pos; + String to = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + + String len_diff = std::format("length({}) - length({})", from, to); + String to_str = std::format("multiIf(length({1}) = 0, {0}, {2} > 0, concat({1},repeat(substr({1},length({1}),1),toUInt16({2}))),{2} < 0 , substr({1},1,length({0})),{1})", + from, to, len_diff); + out = std::format("if (length({3}) = 0,'',translate({0},{1},{2}))", source, from, to_str, to); + return true; +} + +bool Trim::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto regex = getArgument(fn_name, pos, ArgumentState::Raw); + const auto source = getArgument(fn_name, pos, ArgumentState::Raw); + out = kqlCallToExpression("trim_start", {regex, std::format("trim_end({0}, {1})", regex, source)}, pos.max_depth); + + return true; +} + +bool TrimEnd::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto regex = getArgument(fn_name, pos); + const auto source = getArgument(fn_name, pos); + out = std::format("replaceRegexpOne({0}, concat({1}, '$'), '')", source, regex); + + return true; +} + +bool TrimStart::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto regex = getArgument(fn_name, pos); + const auto source = getArgument(fn_name, pos); + out = std::format("replaceRegexpOne({0}, concat('^', {1}), '')", source, regex); + + return true; +} + +bool URLDecode::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "decodeURLComponent"); +} + +bool URLEncode::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "encodeURLComponent"); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h new file mode 100644 index 000000000000..5597fec54afe --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h @@ -0,0 +1,274 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Base64EncodeToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_encode_tostring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64EncodeFromGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_encode_fromguid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_tostring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_toarray()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_toguid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "countof()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Extract : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ExtractAll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract_all()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ExtractJson : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract_json(), extractjson()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class HasAnyIndex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_index()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IndexOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "indexof()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsEmpty : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isempty()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNotEmpty : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnotempty()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNotNull : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnotnull()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNull : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnull()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseCommandLine : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_command_line()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseCSV : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_csv()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseJson : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_json()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseURL : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_url()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseURLQuery : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_urlquery()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseVersion : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_version()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ReplaceRegex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "replace_regex()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Reverse : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "reverse()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Split : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "split()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCatDelim : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcat_delim()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCmp : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcmp()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrLen : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strlen()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrRep : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strrep()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SubString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "substring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToLower : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tolower()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToUpper : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toupper()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Translate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "translate()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Trim : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TrimEnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim_end()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TrimStart : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim_start()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class URLDecode : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "url_decode()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class URLEncode : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "url_encode()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp new file mode 100644 index 000000000000..d5be8e262a84 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp @@ -0,0 +1,111 @@ +#include "KQLTimeSeriesFunctions.h" + +namespace DB +{ + +bool SeriesFir::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesIir::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFitLine::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFitLineDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFit2lines::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFit2linesDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesOutliers::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesPeriodsDetect::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesPeriodsValidate::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesStatsDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesStats::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillBackward::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillConst::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillForward::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillLinear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h new file mode 100644 index 000000000000..999a27f6b391 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h @@ -0,0 +1,113 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class SeriesFir : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fir()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesIir : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_iir()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFitLine : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_line()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFitLineDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_line_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFit2lines : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_2lines()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFit2linesDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_2lines_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesOutliers : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_outliers()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesPeriodsDetect : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_periods_detect()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesPeriodsValidate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_periods_validate()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesStatsDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_stats_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesStats : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_stats()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillBackward : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_backward()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillConst : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_const()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillForward : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_forward()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillLinear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_linear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLCount.cpp b/src/Parsers/Kusto/ParserKQLCount.cpp new file mode 100644 index 000000000000..28fbb83fec07 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLCount.cpp @@ -0,0 +1,30 @@ +#include +#include +#include +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLCount::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + throw Exception("Syntax error near count operator", ErrorCodes::SYNTAX_ERROR); + + ASTPtr select_expression_list; + String converted_columns = getExprFromToken("Count = count()", pos.max_depth); + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLCount.h b/src/Parsers/Kusto/ParserKQLCount.h new file mode 100644 index 000000000000..49132e090d7b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLCount.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLCount : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL count"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLDistinct.cpp b/src/Parsers/Kusto/ParserKQLDistinct.cpp new file mode 100644 index 000000000000..de226c2817ba --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLDistinct.cpp @@ -0,0 +1,26 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLDistinct::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_expression_list; + String expr; + + expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + node->as()->distinct = true; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLDistinct.h b/src/Parsers/Kusto/ParserKQLDistinct.h new file mode 100644 index 000000000000..eb997893d3e6 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLDistinct.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLDistinct : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL distinct"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLExtend.cpp b/src/Parsers/Kusto/ParserKQLExtend.cpp new file mode 100644 index 000000000000..517e82aa1e7e --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLExtend.cpp @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +bool ParserKQLExtend :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_query; + int32_t new_column_index = 1; + + String extend_expr = getExprFromToken(pos); + + String except_str; + String new_extend_str; + Tokens ntokens(extend_expr.c_str(), extend_expr.c_str() + extend_expr.size()); + IParser::Pos npos(ntokens, pos.max_depth); + + String alias; + + auto apply_alias =[&] + { + if (alias.empty()) + { + alias = std::format("Column{}", new_column_index); + ++new_column_index; + new_extend_str += " AS"; + } + else + except_str = except_str.empty() ? " except " + alias : except_str + " except " + alias; + + new_extend_str = new_extend_str + " " + alias; + + alias.clear(); + }; + + int32_t round_bracket_count = 0; + int32_t square_bracket_count = 0; + while (!npos->isEnd()) + { + if (npos->type == TokenType::OpeningRoundBracket) + ++round_bracket_count; + if (npos->type == TokenType::OpeningSquareBracket) + ++square_bracket_count; + if (npos->type == TokenType::ClosingRoundBracket) + --round_bracket_count; + if (npos->type == TokenType::ClosingSquareBracket) + --square_bracket_count; + + auto expr = String(npos->begin, npos->end); + if (expr == "AS") + { + ++npos; + alias = String(npos->begin, npos->end); + } + + if (npos->type == TokenType::Comma && square_bracket_count == 0 && round_bracket_count == 0) + { + apply_alias(); + new_extend_str += ", "; + } + else + new_extend_str = new_extend_str.empty() ? expr : new_extend_str + " " + expr; + + ++npos; + } + apply_alias(); + + String expr = std::format("SELECT * {}, {} from prev", except_str, new_extend_str); + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserSelectQuery().parse(new_pos, select_query, expected)) + return false; + if (!setSubQuerySource(select_query, node, false, false)) + return false; + + node = select_query; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLExtend.h b/src/Parsers/Kusto/ParserKQLExtend.h new file mode 100644 index 000000000000..95c46cd67cd5 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLExtend.h @@ -0,0 +1,19 @@ + +#pragma once + +#include +#include +#include + +namespace DB +{ + +class ParserKQLExtend : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL extend"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 3a399bdccdb1..942f4cd90d2a 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -8,7 +8,7 @@ namespace DB { -bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLFilter::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { String expr = getExprFromToken(pos); ASTPtr where_expression; diff --git a/src/Parsers/Kusto/ParserKQLJoin.cpp b/src/Parsers/Kusto/ParserKQLJoin.cpp new file mode 100644 index 000000000000..7086c9578f30 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLJoin.cpp @@ -0,0 +1,293 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLJoin ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr sub_query_node; + String str_right_table; + String str_attributes; + std::vector attribute_list; + std::vector left_columns; + const String default_join = "UNINQUE INNER JOIN"; + String join_kind = default_join; + String kql_join_kind = "innerunique"; + + ParserKeyword s_kind("kind"); + ParserToken equals(TokenType::Equals); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserIdentifier id_right_table; + + size_t paren_count = 0; + ASTPtr ast_right_table; + + std::unordered_map join_type + = {{"innerunique", default_join}, + {"inner", "INNER JOIN"}, + {"leftouter", "LEFT OUTER JOIN"}, + {"rightouter", "RIGHT OUTER JOIN"}, + {"fullouter", "FULL OUTER JOIN"}, + + {"leftanti", "LEFT ANTI JOIN"}, + {"anti", "LEFT ANTI JOIN"}, + {"leftantisemi", "LEFT ANTI JOIN"}, + + {"rightanti", "RIGHT ANTI JOIN"}, + {"rightantisemi", "RIGHT ANTI JOIN"}, + + {"leftsemi", "LEFT SEMI JOIN"}, + {"rightsemi", "RIGHT SEMI JOIN"}}; + + if (s_kind.ignore(pos)) + { + if (!equals.ignore(pos)) + throw Exception("Invalid kind for join operator", ErrorCodes::SYNTAX_ERROR); + + String join_word(pos->begin, pos->end); + if (join_type.find(join_word) == join_type.end()) + throw Exception("Invalid value of kind for join operator", ErrorCodes::SYNTAX_ERROR); + + join_kind = join_type[join_word]; + kql_join_kind = join_word; + ++pos; + } + + Pos right_table_start_pos = pos; + Pos keyword_on_pos = pos; + + bool has_bracket = open_bracket.ignore(pos); + + if (!has_bracket) + { + if (!id_right_table.parse(pos, ast_right_table, expected)) + return false; + } + else + paren_count = 1; + + Pos attributes_start_pos = pos; + bool attributes_on_column = false; + + auto parse_attribute = [&](Pos & start_pos, Pos & end_pos) + { + while (start_pos < end_pos && start_pos->type == TokenType::OpeningRoundBracket) + ++start_pos; + while (start_pos < end_pos && end_pos->type == TokenType::ClosingRoundBracket) + --end_pos; + + if (start_pos == end_pos) + { + if (start_pos->type != TokenType::BareWord) + return false; + attribute_list.push_back(String(start_pos->begin, end_pos->end)); + left_columns.push_back(String(start_pos->begin, end_pos->end)); + } + else + { + String left_column, right_column; + auto get_coulmn = [&]() + { + String left_alias = "left_.", right_alias = "right_."; + String left_alias2 = "$left.", right_alias2 = "$right."; + + auto attribute_str = String(start_pos->begin, end_pos->end); + + if (attribute_str.substr(0, left_alias.length()) != left_alias + && attribute_str.substr(0, left_alias2.length()) != left_alias2) + return false; + + auto r_begin = attribute_str.find("=="); + if (r_begin == std::string::npos) + return false; + if (attribute_str.substr(0, left_alias.length()) == left_alias) + left_column = attribute_str.substr(left_alias.length(), r_begin - left_alias.length()); + else + left_column = attribute_str.substr(left_alias2.length(), r_begin - left_alias2.length()); + + r_begin += 2; + while (r_begin < attribute_str.length() && attribute_str[r_begin] <= 0x20) + ++r_begin; + + if (attribute_str.substr(r_begin, right_alias.length()) != right_alias + && attribute_str.substr(r_begin, right_alias2.length()) != right_alias2) + return false; + + right_column = attribute_str.substr(r_begin + right_alias.length()); + return true; + }; + + if (!get_coulmn()) + return false; + trim(left_column); + trim(right_column); + left_columns.push_back(left_column); + + if (left_column != right_column) + { + attributes_on_column = true; + attribute_list.push_back(String(start_pos->begin, end_pos->end)); + } + else + attribute_list.push_back(left_column); + } + + return true; + }; + + auto update_attributes = [&] + { + auto temp_pos = pos; + --temp_pos; + + if (temp_pos < attributes_start_pos || !parse_attribute(attributes_start_pos, temp_pos)) + throw Exception("Attributes error for join or lookup operator", ErrorCodes::SYNTAX_ERROR); + attributes_start_pos = pos; + ++attributes_start_pos; + }; + + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::PipeMark && paren_count == 0) + break; + + if (String(pos->begin, pos->end) == "on" && paren_count == 0) + { + if (keyword_on_pos == right_table_start_pos) + { + keyword_on_pos = pos; + attributes_start_pos = pos; + ++attributes_start_pos; + } + } + + if (pos->type == TokenType::Comma && right_table_start_pos < keyword_on_pos && paren_count == 0) + { + update_attributes(); + } + ++pos; + } + + update_attributes(); + + if (keyword_on_pos <= right_table_start_pos) + throw Exception("Missing right table or 'on' for join or lookup operator", ErrorCodes::SYNTAX_ERROR); + + --keyword_on_pos; + if (right_table_start_pos == keyword_on_pos) + str_right_table = String(right_table_start_pos->begin, keyword_on_pos->end); + else + str_right_table = std::format("kql{}", String(right_table_start_pos->begin, keyword_on_pos->end)); + + ++keyword_on_pos; + ++keyword_on_pos; + --pos; + if (pos < keyword_on_pos) + throw Exception("Missing attributes for join or lookup operator", ErrorCodes::SYNTAX_ERROR); + + String query_join; + if (join_kind == default_join) + { + join_kind = "INNER JOIN"; + String distinct_column; + for (auto col : left_columns) + distinct_column = distinct_column.empty() ? col : distinct_column + "," + col; + + String distinct_query = std::format("(SELECT DISTINCT ON ({}) * FROM dum_tbl)", distinct_column); + if (!parseSQLQueryByString(std::make_unique(), distinct_query, sub_query_node, pos.max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, node, true, false)) + return false; + node = std::move(sub_query_node); + } + + if (attributes_on_column) + { + auto replace = [&](std::string & str, const std::string & from, const std::string & to) + { + size_t start_pos = str.find(from); + if (start_pos != std::string::npos) + str.replace(start_pos, from.length(), to); + }; + + for (auto str : attribute_list) + { + if (str.substr(0, 6) != "left_." && str.substr(0, 6) != "$left.") + str = std::format("left_.{0} == right_.{0}", str); + else if (str.substr(0, 6) == "$left.") + { + replace(str, "$left.", "left_."); + replace(str, "$right.", "right_."); + } + + str_attributes = str_attributes.empty() ? str : str_attributes + " and " + str; + } + query_join = std::format("SELECT * FROM tbl {} {} ON {}", join_kind, str_right_table, str_attributes); + } + else + { + for (auto str : attribute_list) + str_attributes = str_attributes.empty() ? str : str_attributes + "," + str; + + query_join = std::format("SELECT * FROM tbl {} {} USING {}", join_kind, str_right_table, str_attributes); + } + + if (!parseSQLQueryByString(std::make_unique(), query_join, sub_query_node, pos.max_depth)) + return false; + + ASTPtr table_expr; + if (sub_query_node->as()->tables() + && sub_query_node->as()->tables()->as()->children.size() > 1) + { + table_expr = sub_query_node->as()->tables()->as()->children[1]; + if (table_expr->as()->table_expression->as()->subquery) + table_expr->as()->table_expression->as()->subquery->as()->alias + = "right_"; + else if (table_expr->as()->table_expression->as()->database_and_table_name) + { + table_expr + = table_expr->as()->table_expression->as()->database_and_table_name; + if (auto * ast_with_alias = dynamic_cast(table_expr.get())) + ast_with_alias->alias = "right_"; + } + } + if (kql_join_kind == "innerunique") + { + if (!setSubQuerySource(sub_query_node, node, false, true, "left_")) + return false; + } + else + { + if (!setSubQuerySource(sub_query_node, node, false, false, "left_")) + return false; + } + + node = std::move(sub_query_node); + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLJoin.h b/src/Parsers/Kusto/ParserKQLJoin.h new file mode 100644 index 000000000000..185f91bdeb2f --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLJoin.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLJoin : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL Join"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index bb8e08fd3786..306884ac8b77 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -2,14 +2,11 @@ #include #include #include -#include -#include -#include namespace DB { -bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLLimit::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr limit_length; diff --git a/src/Parsers/Kusto/ParserKQLLookup.cpp b/src/Parsers/Kusto/ParserKQLLookup.cpp new file mode 100644 index 000000000000..0cbf7a0251ef --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLookup.cpp @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLLookup::updatePipeLine(OperationsPos & operations, String & query) +{ + Pos pos = operations.back().second; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Syntax error near lookup operator", ErrorCodes::SYNTAX_ERROR); + + Pos start_pos = operations.front().second; + Pos end_pos = pos; + --end_pos; + --end_pos; + + String prev_query(start_pos->begin, end_pos->end); + + String join_kind = "kind=leftouter"; + ParserKeyword s_kind("kind"); + ParserToken equals(TokenType::Equals); + start_pos = pos; + end_pos = pos; + + if (s_kind.ignore(pos)) + { + if (!equals.ignore(pos)) + throw Exception("Invalid kind for lookup operator", ErrorCodes::SYNTAX_ERROR); + + if (ParserKeyword("leftouter").ignore(pos)) + join_kind = "kind=leftouter"; + else if (ParserKeyword("inner").ignore(pos)) + join_kind = "kind=inner"; + else + throw Exception("Invalid value of kind for lookup operator", ErrorCodes::SYNTAX_ERROR); + } + Pos right_table_start_pos = pos; + + size_t paren_count = 0; + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::PipeMark && paren_count == 0) + break; + end_pos = pos; + ++pos; + } + + String right_expr = (right_table_start_pos <= end_pos) ? String(right_table_start_pos->begin, end_pos->end) : ""; + if (right_expr.empty()) + throw Exception("lookup operator need right table", ErrorCodes::SYNTAX_ERROR); + + query = std::format("{} join {} {} ", prev_query, join_kind, right_expr); + + return true; +} + +bool ParserKQLLookup::parseImpl(Pos & /*pos*/, ASTPtr & /*node*/, Expected & /*expected*/) //(Pos & pos, ASTPtr & node, Expected & expected) +{ + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLLookup.h b/src/Parsers/Kusto/ParserKQLLookup.h new file mode 100644 index 000000000000..d8880c7f5e5b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLookup.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLLookup : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL lookup"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool updatePipeLine (OperationsPos & operations, String & query) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.cpp b/src/Parsers/Kusto/ParserKQLMVExpand.cpp new file mode 100644 index 000000000000..8ed82a05695a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMVExpand.cpp @@ -0,0 +1,303 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB::ErrorCodes +{ +extern const int UNKNOWN_TYPE; +} + +namespace DB +{ + +std::unordered_map ParserKQLMVExpand::type_cast = +{ {"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"} +}; + +bool ParserKQLMVExpand::parseColumnArrayExprs(ColumnArrayExprs & column_array_exprs, Pos & pos, Expected & expected) +{ + ParserToken equals(TokenType::Equals); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + ParserToken comma(TokenType::Comma); + + ParserKeyword s_to("to"); + ParserKeyword s_type("typeof"); + uint16_t bracket_count = 0; + Pos expr_begin_pos = pos; + Pos expr_end_pos = pos; + + String alias; + String column_array_expr; + String to_type; + --expr_end_pos; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if(pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if(pos->type == TokenType::ClosingRoundBracket) + --bracket_count; + + if (String(pos->begin,pos->end) == "=") + { + --pos; + alias = String(pos->begin, pos->end); + ++pos; + ++pos; + expr_begin_pos = pos; + } + + auto addColumns = [&] + { + column_array_expr = getExprFromToken(String(expr_begin_pos->begin, expr_end_pos->end), pos.max_depth); + + if (alias.empty()) + { + alias = expr_begin_pos == expr_end_pos ? column_array_expr : String(expr_begin_pos->begin,expr_begin_pos->end) + "_"; + } + column_array_exprs.push_back(ColumnArrayExpr(alias, column_array_expr, to_type)); + }; + + if (s_to.ignore(pos, expected)) + { + --pos; + --pos; + expr_end_pos = pos; + ++pos; + ++pos; + + column_array_expr = String(expr_begin_pos->begin, expr_end_pos->end); + + if (!s_type.ignore(pos, expected)) + return false; + if (!open_bracket.ignore(pos, expected)) + return false; + to_type = String(pos->begin, pos->end); + + if (type_cast.find(to_type) == type_cast.end()) + throw Exception(to_type + " is not a supported kusto data type for mv-expand", ErrorCodes::UNKNOWN_TYPE); + + ++pos; + if (!close_bracket.ignore(pos, expected)) + return false; + } + + if ((pos->type == TokenType::Comma && bracket_count == 0) || String(pos->begin, pos->end) == "limit" || pos->type == TokenType::Semicolon) + { + if (column_array_expr.empty()) + { + expr_end_pos = pos; + --expr_end_pos; + } + addColumns(); + expr_begin_pos = pos; + expr_end_pos = pos; + ++expr_begin_pos; + + alias.clear(); + column_array_expr.clear(); + to_type.clear(); + + if (pos->type == TokenType::Semicolon) + break; + } + + if (String(pos->begin, pos->end) == "limit") + break; + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + { + if (expr_end_pos < expr_begin_pos) + { + expr_end_pos = pos; + --expr_end_pos; + } + addColumns(); + break; + } + } + return true; +} + +bool ParserKQLMVExpand::parserMVExpand(KQLMVExpand & kql_mv_expand, Pos & pos, Expected & expected) +{ + ParserKeyword s_bagexpansion("bagexpansion"); + ParserKeyword s_kind("kind"); + ParserKeyword s_with_itemindex("with_itemindex"); + ParserKeyword s_limit("limit"); + + ParserToken equals(TokenType::Equals); + ParserToken comma(TokenType::Comma); + + auto & column_array_exprs = kql_mv_expand.column_array_exprs; + auto & bagexpansion = kql_mv_expand.bagexpansion; + auto & with_itemindex = kql_mv_expand.with_itemindex; + auto & limit = kql_mv_expand.limit; + + if (s_bagexpansion.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + bagexpansion = String(pos->begin, pos->end); + ++pos; + } + else if (s_kind.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + bagexpansion = String(pos->begin, pos->end); + ++pos; + } + + if (s_with_itemindex.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + with_itemindex = String(pos->begin, pos->end); + ++pos; + } + + if (!parseColumnArrayExprs(column_array_exprs, pos, expected)) + return false; + + if (s_limit.ignore(pos, expected)) + limit = String(pos->begin, pos->end); + + return true; +} + +bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, int32_t max_depth) +{ + String expand_str; + String cast_type_column_remove, cast_type_column_rename ; + String cast_type_column_restore, cast_type_column_restore_name ; + String row_count_str; + String extra_columns; + String input = "dummy_input"; + for (auto column : kql_mv_expand.column_array_exprs) + { + if (column.alias == column.column_array_expr) + expand_str = expand_str.empty() ? String("ARRAY JOIN ") + column.alias : expand_str + "," + column.alias; + else + { + expand_str = expand_str.empty() ? std::format("ARRAY JOIN {} AS {} ", column.column_array_expr, column.alias): expand_str + std::format(", {} AS {}", column.column_array_expr, column.alias); + extra_columns = extra_columns + ", " + column.alias; + } + + if (!column.to_type.empty()) + { + cast_type_column_remove = cast_type_column_remove.empty() ? " Except " + column.alias : cast_type_column_remove + " Except " + column.alias ; + String rename_str; + + if (type_cast[column.to_type] == "Boolean") + rename_str = std::format("accurateCastOrNull(toInt64OrNull(toString({0})),'{1}') as {0}_ali",column.alias, type_cast[column.to_type]); + else + rename_str = std::format("accurateCastOrNull({0},'{1}') as {0}_ali",column.alias, type_cast[column.to_type]); + + cast_type_column_rename = cast_type_column_rename.empty() ? rename_str : cast_type_column_rename + "," + rename_str; + cast_type_column_restore = cast_type_column_restore.empty() ? std::format(" Except {}_ali ", column.alias) : cast_type_column_restore + std::format(" Except {}_ali ", column.alias); + cast_type_column_restore_name = cast_type_column_restore_name.empty() ? std::format("{0}_ali as {0}", column.alias ) :cast_type_column_restore_name + std::format(", {0}_ali as {0}", column.alias); + } + + if (!kql_mv_expand.with_itemindex.empty()) + { + row_count_str = row_count_str.empty() ? "length("+column.alias+")" : row_count_str + ", length("+column.alias+")"; + } + } + + String columns = "*"; + if (!row_count_str.empty()) + { + expand_str += std::format(", range(0, arrayMax([{}])) AS {} ", row_count_str, kql_mv_expand.with_itemindex); + columns = kql_mv_expand.with_itemindex + " , " + columns; + } + + if (!kql_mv_expand.limit.empty()) + expand_str += " LIMIT " + kql_mv_expand.limit; + + auto query = std::format("(Select {} {} From {} {})", columns, extra_columns, input, expand_str); + + ASTPtr sub_query_node; + Expected expected; + + if (cast_type_column_remove.empty()) + { + query = std::format("Select {} {} From {} {}", columns, extra_columns, input, expand_str); + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, select_node, false, false)) + return false; + select_node = std::move(sub_query_node); + } + else + { + query = std::format("(Select {} {} From {} {})", columns, extra_columns, input, expand_str); + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, select_node, true, false)) + return false; + select_node = std::move(sub_query_node); + + auto rename_query = std::format("(Select * {}, {} From {})", cast_type_column_remove, cast_type_column_rename, "query"); + if (!parseSQLQueryByString(std::make_unique(), rename_query, sub_query_node, max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, select_node, true, true)) + return false; + + select_node = std::move(sub_query_node); + query = std::format("Select * {}, {} from {}", cast_type_column_restore, cast_type_column_restore_name, "rename_query"); + + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + return false; + sub_query_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(select_node)); + select_node = std::move(sub_query_node); + } + return true; +} + +bool ParserKQLMVExpand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr setting; + ASTPtr select_expression_list; + auto begin = pos; + + KQLMVExpand kql_mv_expand; + if (!parserMVExpand(kql_mv_expand, pos, expected)) + return false; + if (!genQuery(kql_mv_expand, node, pos.max_depth)) + return false; + + const String setting_str = "enable_unaligned_array_join = 1"; + Tokens token_settings(setting_str.c_str(), setting_str.c_str() + setting_str.size()); + IParser::Pos pos_settings(token_settings, pos.max_depth); + + if (!ParserSetQuery(true).parse(pos_settings, setting, expected)) + return false; + node->as()->setExpression(ASTSelectQuery::Expression::SETTINGS, std::move(setting)); + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.h b/src/Parsers/Kusto/ParserKQLMVExpand.h new file mode 100644 index 000000000000..9fdabded5b1f --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMVExpand.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLMVExpand : public ParserKQLBase +{ + +protected: + static std::unordered_map type_cast; + + struct ColumnArrayExpr { + String alias; + String column_array_expr; + String to_type; + ColumnArrayExpr(String alias_, String column_array_expr_, String to_type_) + :alias(alias_), column_array_expr(column_array_expr_), to_type(to_type_){} + }; + using ColumnArrayExprs = std::vector; + + struct KQLMVExpand { + ColumnArrayExprs column_array_exprs; + String bagexpansion; + String with_itemindex; + String limit; + }; + + static bool parseColumnArrayExprs(ColumnArrayExprs & column_array_exprs, Pos & pos, Expected & expected); + static bool parserMVExpand(KQLMVExpand & kql_mv_expand, Pos & pos, Expected & expected); + static bool genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, int32_t max_depth); + + const char * getName() const override { return "KQL mv-expand"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp new file mode 100644 index 000000000000..8a5fe614c13f --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -0,0 +1,419 @@ +#include "ParserKQLTimespan.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +bool ParserKQLMakeSeries :: parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos) +{ + std::unordered_set allowed_aggregation + ({ + "avg", + "avgif", + "count", + "countif", + "dcount", + "dcountif", + "max", + "maxif", + "min", + "minif", + "percentile", + "take_any", + "stdev", + "sum", + "sumif", + "variance" + }); + + Expected expected; + ParserKeyword s_default("default"); + ParserToken equals(TokenType::Equals); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + ParserToken comma(TokenType::Comma); + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + String alias; + String aggregation_fun; + String column; + double default_value = 0; + + String first_token(pos->begin,pos->end); + + ++pos; + if (equals.ignore(pos, expected)) + { + alias = std::move(first_token); + aggregation_fun = String(pos->begin,pos->end); + ++pos; + } + else + aggregation_fun = std::move(first_token); + + if (allowed_aggregation.find(aggregation_fun) == allowed_aggregation.end()) + return false; + + if (open_bracket.ignore(pos, expected)) + column = String(pos->begin,pos->end); + else + return false; + + ++pos; + if (!close_bracket.ignore(pos, expected)) + return false; + + if (s_default.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + + default_value = std::stod(String(pos->begin,pos->end)); + ++pos; + } + if (alias.empty()) + alias = std::format("{}_{}", aggregation_fun, column); + aggregation_columns.push_back(AggregationColumn(alias, aggregation_fun, column, default_value)); + + if (!comma.ignore(pos, expected)) + break; + } + return true; +} + +bool ParserKQLMakeSeries :: parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos) +{ + auto begin = pos; + auto from_pos = begin; + auto to_pos = begin; + auto step_pos = begin; + auto end_pos = begin; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if ( String(pos->begin, pos->end) == "from") + from_pos = pos; + if ( String(pos->begin, pos->end) == "to") + to_pos = pos; + if ( String(pos->begin, pos->end) == "step") + step_pos = pos; + if ( String(pos->begin, pos->end) == "by") + { + end_pos = pos; + break; + } + ++pos; + } + + if (end_pos == begin) + end_pos = pos; + + if (String(step_pos->begin, step_pos->end) != "step") + return false; + + if (String(from_pos->begin, from_pos->end) == "from") + { + ++from_pos; + auto end_from_pos = (to_pos != begin) ? to_pos : step_pos; + --end_from_pos; + from_to_step.from_str = String(from_pos->begin, end_from_pos->end); + } + + if (String(to_pos->begin, to_pos->end) == "to") + { ++to_pos; + --step_pos; + from_to_step.to_str = String(to_pos->begin, step_pos->end); + ++step_pos; + } + --end_pos; + ++step_pos; + from_to_step.step_str = String(step_pos->begin, end_pos->end); + + if (std::optional ticks; String(step_pos->begin, step_pos->end) == "time" || String(step_pos->begin, step_pos->end) == "timespan" + || ParserKQLTimespan::tryParse(from_to_step.step_str, ticks)) + { + // TODO: this is a hack of the ugliest kind that can only be fixed by supporting arbitrary expressions in make-series + static constexpr std::string_view wrapper = "toIntervalNanosecond("; + const auto timespan = getExprFromToken(from_to_step.step_str, pos.max_depth); + const auto value = timespan.substr(wrapper.length(), timespan.length() - wrapper.length() - 1); + + from_to_step.is_timespan = true; + from_to_step.step = std::stod(value) * 1e-9; + } + else + from_to_step.step = std::stod(from_to_step.step_str); + + return true; +} + +bool ParserKQLMakeSeries :: makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, const uint32_t & max_depth) +{ + const uint64_t era_diff = 62135596800; // this magic number is the differicen is second form 0001-01-01 (Azure start time ) and 1970-01-01 (CH start time) + + String start_str, end_str; + String sub_query, main_query; + + auto & aggregation_columns = kql_make_series.aggregation_columns; + auto & from_to_step = kql_make_series.from_to_step; + auto & subquery_columns = kql_make_series.subquery_columns; + auto & axis_column = kql_make_series.axis_column; + auto & group_expression = kql_make_series.group_expression; + auto step = from_to_step.step; + + if (!kql_make_series.from_to_step.from_str.empty()) + start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth); + + if (!kql_make_series.from_to_step.to_str.empty()) + end_str = getExprFromToken(from_to_step.to_str, max_depth); + + auto date_type_cast = [&] (String & src) + { + Tokens tokens(src.c_str(), src.c_str() + src.size()); + IParser::Pos pos(tokens, max_depth); + String res; + while (!pos->isEnd()) + { + String tmp = String(pos->begin, pos->end); + if (tmp == "kql_datetime" || tmp == "kql_todatetime") + { + ++pos; + auto datetime_start_pos = pos; + auto datetime_end_pos = pos; + auto paren_count = 0; + while(!pos->isEnd()) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + + if (pos->type == TokenType::ClosingRoundBracket && paren_count == 0) + { + ++datetime_start_pos; + datetime_end_pos = pos; + --datetime_end_pos; + tmp = std::format("toDateTime64({}, 9, 'UTC')",String(datetime_start_pos->begin, datetime_end_pos->end)); + break; + } + + ++pos; + } + } + res = res.empty() ? tmp : res + " " + tmp; + ++pos; + } + return res; + }; + + start_str = date_type_cast(start_str); + end_str = date_type_cast(end_str); + + String bin_str, start, end; + + uint64_t diff = 0; + String axis_column_format; + String axis_str; + + auto get_group_expression_alias = [&] + { + std::vector group_expression_tokens; + Tokens tokens(group_expression.c_str(), group_expression.c_str() + group_expression.size()); + IParser::Pos pos(tokens, max_depth); + while (!pos->isEnd()) + { + if (String(pos->begin, pos->end) == "AS") + { + if (!group_expression_tokens.empty()) + group_expression_tokens.pop_back(); + ++pos; + group_expression_tokens.push_back(String(pos->begin, pos->end)); + } + else + group_expression_tokens.push_back(String(pos->begin, pos->end)); + ++pos; + } + String res; + for (auto token : group_expression_tokens) + res = res + token + " "; + return res; + }; + + auto group_expression_alias = get_group_expression_alias(); + + if (from_to_step.is_timespan) + { + axis_column_format = std::format("toFloat64(toDateTime64({}, 9, 'UTC'))", axis_column); + } + else + axis_column_format = std::format("toFloat64({})", axis_column); + + if (!start_str.empty()) // has from + { + bin_str = std::format(" toFloat64({0}) + (toInt64((({1} - toFloat64({0})) / {2}) ) * {2}) AS {3}_ali ", + start_str, axis_column_format, step, axis_column); + start = std::format("toUInt64({})", start_str); + } + else + { + if (from_to_step.is_timespan) + diff = era_diff; + bin_str = std::format(" toFloat64(toInt64(({0} + {1}) / {2}) * {2}) AS {3}_ali ", axis_column_format, diff, step, axis_column); + } + + if (!end_str.empty()) + end = std::format("toUInt64({})", end_str); + + String range, condition; + + if (!start_str.empty() && !end_str.empty()) + { + range = std::format("range({}, {}, toUInt64({}))", start, end, step); + condition = std::format("where toInt64({0}) >= {1} and toInt64({0}) < {2}", axis_column_format, start, end); + } + else if (start_str.empty() && !end_str.empty()) + { + range = std::format("range(low, {} + {}, toUInt64({}))", end, diff, step); + condition = std::format("where toInt64({0}) - {1} < {2}", axis_column_format, diff, end); + } + else if (!start_str.empty() && end_str.empty()) + { + range = std::format("range({}, high, toUInt64({}))", start, step); + condition = std::format("where toInt64({}) >= {}", axis_column_format, start); + } + else + { + range = std::format("range(low, high, toUInt64({}))", step); + condition = " "; + } + + auto range_len = std::format("length({})", range); + + String sub_sub_query; + if (group_expression.empty()) + sub_sub_query = std::format(" (Select {0}, {1} FROM {2} {4} GROUP BY {3}_ali ORDER BY {3}_ali) ", subquery_columns, bin_str, "table_name", axis_column, condition); + else + sub_sub_query = std::format(" (Select {0}, {1}, {2} FROM {3} {5} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", group_expression, subquery_columns, bin_str, "table_name", axis_column, condition); + + ASTPtr sub_query_node; + + if (!ParserSimpleCHSubquery(select_node).parseByString(sub_sub_query, sub_query_node, max_depth)) + return false; + select_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(sub_query_node)); + + if (!group_expression.empty()) + main_query = std::format("{} ", group_expression_alias); + + auto axis_and_agg_alias_list = axis_column; + auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}", axis_column); + int idx = 2; + for (auto agg_column : aggregation_columns) + { + String agg_group_column = std::format("arrayConcat(groupArray ({}_ali) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", + agg_column.alias, agg_column.default_value, range_len, range_len, agg_column.alias); + main_query = main_query.empty() ? agg_group_column : main_query + ", " + agg_group_column; + + axis_and_agg_alias_list += ", " + agg_column.alias; + final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); + } + + if (from_to_step.is_timespan) + axis_str = std::format("arrayDistinct(arrayConcat(groupArray(toDateTime64({0}_ali - {1},9,'UTC')), arrayMap( x->(toDateTime64(x - {1} ,9,'UTC')), {2}) )) as {0}", + axis_column, diff, range); + else + axis_str = std::format("arrayDistinct(arrayConcat(groupArray({0}_ali), arrayMap( x->(toFloat64(x)), {1}) )) as {0}", + axis_column, range); + + main_query += ", " + axis_str; + auto sub_group_by = group_expression.empty() ? "" : std::format("GROUP BY {}", group_expression_alias); + + sub_query = std::format("( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} {} )", + axis_column, axis_column,step, axis_and_agg_alias_list, main_query, sub_sub_query, sub_group_by); + + if (group_expression.empty()) + main_query = std::format("{}", final_axis_agg_alias_list); + else + main_query = std::format("{},{}", group_expression_alias, final_axis_agg_alias_list); + + if (!ParserSimpleCHSubquery(select_node).parseByString(sub_query, sub_query_node, max_depth)) + return false; + select_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(sub_query_node)); + + kql_make_series.sub_query = std::move(sub_query); + kql_make_series.main_query = std::move(main_query); + + return true; +} + +bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + ParserKeyword s_on("on"); + ParserKeyword s_by("by"); + + ParserToken equals(TokenType::Equals); + ParserToken comma(TokenType::Comma); + + ASTPtr select_expression_list; + + KQLMakeSeries kql_make_series; + auto & aggregation_columns = kql_make_series.aggregation_columns; + auto & from_to_step = kql_make_series.from_to_step; + auto & subquery_columns = kql_make_series.subquery_columns; + auto & axis_column = kql_make_series.axis_column; + auto & group_expression = kql_make_series.group_expression; + + //const auto make_series_parameters = getMakeSeriesParameters(pos); + + if (!parseAggregationColumns(aggregation_columns, pos)) + return false; + + if (!s_on.ignore(pos, expected)) + return false; + + axis_column = String(pos->begin, pos->end); + ++pos; + + if (!parseFromToStepClause(from_to_step, pos)) + return false; + + if (s_by.ignore(pos, expected)) + { + group_expression = getExprFromToken(pos); + if (group_expression.empty()) + return false; + } + + for (auto agg_column : aggregation_columns) + { + String column_str = std::format("{}({}) AS {}_ali", agg_column.aggregation_fun, agg_column.column, agg_column.alias); + if (subquery_columns.empty()) + subquery_columns = column_str; + else + subquery_columns += ", "+ column_str; + } + + makeSeries(kql_make_series, node, pos.max_depth); + + Tokens token_main_query(kql_make_series.main_query.c_str(), kql_make_series.main_query.c_str() + kql_make_series.main_query.size()); + IParser::Pos pos_main_query(token_main_query, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_main_query, select_expression_list, expected)) + return false; + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.h b/src/Parsers/Kusto/ParserKQLMakeSeries.h new file mode 100644 index 000000000000..7ece7e414d58 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLMakeSeries : public ParserKQLBase +{ + +protected: + struct AggregationColumn { + String alias; + String aggregation_fun; + String column; + double default_value; + AggregationColumn(String alias_, String aggregation_fun_, String column_, double default_value_ ) + :alias(alias_), aggregation_fun(aggregation_fun_), column(column_), default_value(default_value_){} + }; + using AggregationColumns = std::vector; + + struct FromToStepClause { + String from_str; + String to_str; + String step_str; + bool is_timespan = false; + double step; + }; + + struct KQLMakeSeries { + AggregationColumns aggregation_columns; + FromToStepClause from_to_step; + String axis_column; + String group_expression; + String subquery_columns; + String sub_query; + String main_query; + }; + + bool makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, const uint32_t & max_depth); + bool parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos); + bool parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos); + + const char * getName() const override { return "KQL make-series"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index f8e4f9eaab05..5f3af9fe0e65 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,71 +1,268 @@ -#include -#include -#include -#include +#include "ParserKQLOperators.h" +#include "KustoFunctions/IParserKQLFunction.h" +#include "ParserKQLStatement.h" + #include -namespace DB +#include +#include + +namespace DB::ErrorCodes { +extern const int SYNTAX_ERROR; +} -namespace ErrorCodes +namespace +{ +enum class WildcardsPos : uint8_t { - extern const int SYNTAX_ERROR; + none, + left, + right, + both +}; + +enum class KQLOperatorValue : uint16_t +{ + none, + contains, + not_contains, + contains_cs, + not_contains_cs, + endswith, + not_endswith, + endswith_cs, + not_endswith_cs, + equal, //=~ + not_equal, //!~ + equal_cs, //= + not_equal_cs, //!= + has, + not_has, + has_all, + has_any, + has_cs, + not_has_cs, + hasprefix, + not_hasprefix, + hasprefix_cs, + not_hasprefix_cs, + hassuffix, + not_hassuffix, + hassuffix_cs, + not_hassuffix_cs, + in_cs, //in + not_in_cs, //!in + in, //in~ + not_in, //!in~ + matches_regex, + startswith, + not_startswith, + startswith_cs, + not_startswith_cs, +}; + +const std::unordered_map KQLOperator = { + {"contains", KQLOperatorValue::contains}, + {"!contains", KQLOperatorValue::not_contains}, + {"contains_cs", KQLOperatorValue::contains_cs}, + {"!contains_cs", KQLOperatorValue::not_contains_cs}, + {"endswith", KQLOperatorValue::endswith}, + {"!endswith", KQLOperatorValue::not_endswith}, + {"endswith_cs", KQLOperatorValue::endswith_cs}, + {"!endswith_cs", KQLOperatorValue::not_endswith_cs}, + {"=~", KQLOperatorValue::equal}, + {"!~", KQLOperatorValue::not_equal}, + {"==", KQLOperatorValue::equal_cs}, + {"!=", KQLOperatorValue::not_equal_cs}, + {"has", KQLOperatorValue::has}, + {"!has", KQLOperatorValue::not_has}, + {"has_all", KQLOperatorValue::has_all}, + {"has_any", KQLOperatorValue::has_any}, + {"has_cs", KQLOperatorValue::has_cs}, + {"!has_cs", KQLOperatorValue::not_has_cs}, + {"hasprefix", KQLOperatorValue::hasprefix}, + {"!hasprefix", KQLOperatorValue::not_hasprefix}, + {"hasprefix_cs", KQLOperatorValue::hasprefix_cs}, + {"!hasprefix_cs", KQLOperatorValue::not_hasprefix_cs}, + {"hassuffix", KQLOperatorValue::hassuffix}, + {"!hassuffix", KQLOperatorValue::not_hassuffix}, + {"hassuffix_cs", KQLOperatorValue::hassuffix_cs}, + {"!hassuffix_cs", KQLOperatorValue::not_hassuffix_cs}, + {"in", KQLOperatorValue::in_cs}, + {"!in", KQLOperatorValue::not_in_cs}, + {"in~", KQLOperatorValue::in}, + {"!in~", KQLOperatorValue::not_in}, + {"matches regex", KQLOperatorValue::matches_regex}, + {"startswith", KQLOperatorValue::startswith}, + {"!startswith", KQLOperatorValue::not_startswith}, + {"startswith_cs", KQLOperatorValue::startswith_cs}, + {"!startswith_cs", KQLOperatorValue::not_startswith_cs}, +}; } -String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Pos &token_pos,String kql_op, String ch_op) +String genHasAnyAllOpExpr( + std::vector & tokens, DB::IParser::Pos & token_pos, const std::string & kql_op, const std::string_view ch_op) { - String new_expr; - Expected expected; - ParserToken s_lparen(TokenType::OpeningRoundBracket); + std::string new_expr; + DB::Expected expected; + DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + throw DB::Exception("Syntax error near " + kql_op, DB::ErrorCodes::SYNTAX_ERROR); auto haystack = tokens.back(); - - String logic_op = (kql_op == "has_all") ? " and " : " or "; - - while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) + const auto logic_op = (kql_op == "has_all") ? " and " : " or "; + while (!token_pos->isEnd() && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon) { - auto tmp_arg = String(token_pos->begin, token_pos->end); - if (token_pos->type == TokenType::Comma) - new_expr = new_expr + logic_op; + auto tmp_arg = DB::IParserKQLFunction::getExpression(token_pos); + if (token_pos->type == DB::TokenType::Comma) + new_expr += logic_op; else - new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; + new_expr += std::vformat(ch_op, std::make_format_args(haystack, tmp_arg)); ++token_pos; - if (token_pos->type == TokenType::ClosingRoundBracket) + if (token_pos->type == DB::TokenType::ClosingRoundBracket) break; - } tokens.pop_back(); return new_expr; } -String KQLOperators::genInOpExpr(IParser::Pos &token_pos, String kql_op, String ch_op) +String genEqOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos, const DB::String & ch_op) { - String new_expr; + const DB::String tmp_arg(token_pos->begin, token_pos->end); + + if (tokens.empty() || tmp_arg != "~") + return tmp_arg; - ParserToken s_lparen(TokenType::OpeningRoundBracket); + DB::String new_expr; + new_expr += "lower(" + tokens.back() + ")"; + new_expr += ch_op; + ++token_pos; + new_expr += " lower(" + DB::String(token_pos->begin, token_pos->end) + ")" + " "; + tokens.pop_back(); - ASTPtr select; - Expected expected; + return new_expr; +} + +String genInOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos, const DB::String & kql_op, const DB::String & ch_op) +{ + DB::ParserKQLTaleFunction kqlfun_p; + + DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); + + DB::ASTPtr select; + DB::Expected expected; + DB::String new_expr; ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + throw DB::Exception("Syntax error near " + kql_op, DB::ErrorCodes::SYNTAX_ERROR); + + for (const auto & s : tokens) + new_expr += "lower(" + s + ")" + " "; + auto pos = token_pos; + if (kqlfun_p.parse(pos, select, expected)) + { + new_expr += ch_op + " kql"; + auto tmp_pos = token_pos; + auto keep_pos = token_pos; + int pipe = 0; + bool desired_column_lowerd = false; + while (tmp_pos != pos) + { + ++tmp_pos; + if (tmp_pos->type == DB::TokenType::PipeMark) + pipe += 1; + if (pipe == 2 && !desired_column_lowerd) + { + new_expr = new_expr + " tolower(" + DB::String(keep_pos->begin, keep_pos->end) + ")"; + desired_column_lowerd = true; + } + else + new_expr = new_expr + " " + DB::String(keep_pos->begin, keep_pos->end); + ++keep_pos; + } + + if (pos->type != DB::TokenType::ClosingRoundBracket) + throw DB::Exception("Syntax error near " + kql_op, DB::ErrorCodes::SYNTAX_ERROR); + + token_pos = pos; + tokens.pop_back(); + return new_expr; + } --token_pos; --token_pos; - return ch_op; + new_expr += ch_op + "( "; + while (!token_pos->isEnd() && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon) + { + auto tmp_arg = DB::String(token_pos->begin, token_pos->end); + if (token_pos->type != DB::TokenType::Comma && token_pos->type != DB::TokenType::ClosingRoundBracket + && token_pos->type != DB::TokenType::OpeningRoundBracket && token_pos->type != DB::TokenType::OpeningSquareBracket + && token_pos->type != DB::TokenType::ClosingSquareBracket && tmp_arg != "~" && tmp_arg != "dynamic") + new_expr = new_expr + "lower(" + tmp_arg + ")"; + ++token_pos; + if (token_pos->type == DB::TokenType::ClosingRoundBracket) + break; + else if (token_pos->type == DB::TokenType::Comma) + new_expr += ", "; + } + ++token_pos; + new_expr += ")"; + + tokens.pop_back(); + return new_expr; +} + +std::string genInOpExpr(DB::IParser::Pos & token_pos, const std::string & kql_op, const std::string & ch_op) +{ + DB::ParserKQLTaleFunction kqlfun_p; + DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); + + DB::ASTPtr select; + DB::Expected expected; + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw DB::Exception("Syntax error near " + kql_op, DB::ErrorCodes::SYNTAX_ERROR); + + auto pos = token_pos; + if (kqlfun_p.parse(pos, select, expected)) + { + auto new_expr = ch_op + " kql"; + auto tmp_pos = token_pos; + while (tmp_pos != pos) + { + new_expr = new_expr + " " + std::string(tmp_pos->begin, tmp_pos->end); + ++tmp_pos; + } + + if (pos->type != DB::TokenType::ClosingRoundBracket) + throw DB::Exception("Syntax error near " + kql_op, DB::ErrorCodes::SYNTAX_ERROR); + + token_pos = pos; + return new_expr; + } + + --token_pos; + --token_pos; + return ch_op; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) +std::string genHaystackOpExpr( + std::vector & tokens, + DB::IParser::Pos & token_pos, + const std::string & kql_op, + const std::string_view ch_op, + WildcardsPos wildcards_pos, + WildcardsPos space_pos = WildcardsPos::none) { - String new_expr, left_wildcards, right_wildcards, left_space, right_space; + std::string new_expr, left_wildcards, right_wildcards, left_space, right_space; switch (wildcards_pos) { @@ -73,7 +270,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::left: - left_wildcards ="%"; + left_wildcards = "%"; break; case WildcardsPos::right: @@ -81,7 +278,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::both: - left_wildcards ="%"; + left_wildcards = "%"; right_wildcards = "%"; break; } @@ -92,7 +289,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::left: - left_space =" "; + left_space = " "; break; case WildcardsPos::right: @@ -100,260 +297,273 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::both: - left_space =" "; + left_space = " "; right_space = " "; break; } ++token_pos; - if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; - else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) + if (!tokens.empty() && (token_pos->type == DB::TokenType::StringLiteral || token_pos->type == DB::TokenType::QuotedIdentifier)) + new_expr = std::vformat( + ch_op, + std::make_format_args( + tokens.back(), + "'" + left_wildcards + left_space + std::string(token_pos->begin + 1, token_pos->end - 1) + right_space + right_wildcards + + "'")); + else if (!tokens.empty() && token_pos->type == DB::TokenType::BareWord) { - auto tmp_arg = String(token_pos->begin, token_pos->end); - new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; + auto tmp_arg = DB::IParserKQLFunction::getExpression(token_pos); + new_expr = std::vformat( + ch_op, + std::make_format_args( + tokens.back(), "concat('" + left_wildcards + left_space + "', " + tmp_arg + ", '" + right_space + right_wildcards + "')")); } else - throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + tokens.pop_back(); return new_expr; } -bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) +namespace DB +{ +bool KQLOperators::convert(std::vector & tokens, IParser::Pos & pos) { + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + return false; + auto begin = pos; + auto token = String(pos->begin, pos->end); - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + String op = token; + if (token == "!") + { + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); + op = "!" + String(pos->begin, pos->end); + } + else if (token == "matches") { - KQLOperatorValue op_value = KQLOperatorValue::none; - - auto token = String(pos->begin,pos->end); - - String op = token; - if (token == "!") - { - ++pos; - if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) - throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); - op ="!"+String(pos->begin,pos->end); - } - else if (token == "matches") - { - ++pos; - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) - { - if (String(pos->begin,pos->end) == "regex") - op +=" regex"; - else - --pos; - } - } - else - { - op = token; - } - ++pos; if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - if (String(pos->begin,pos->end) == "~") - op +="~"; + if (String(pos->begin, pos->end) == "regex") + op += " regex"; else --pos; } + } + else + { + op = token; + } + + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "~") + op += "~"; else --pos; + } + else + --pos; - if (KQLOperator.find(op) == KQLOperator.end()) - { - pos = begin; - return false; - } + const auto op_it = KQLOperator.find(op); + if (op_it == KQLOperator.end()) + { + pos = begin; + return false; + } - op_value = KQLOperator[op]; + String new_expr; - String new_expr; + const auto & op_value = op_it->second; + if (op_value == KQLOperatorValue::none) + { + tokens.push_back(op); + return true; + } - if (op_value == KQLOperatorValue::none) - tokens.push_back(op); - else - { - auto last_op = tokens.back(); - auto last_pos = pos; + if (tokens.empty()) + throw Exception("Syntax error near " + op, ErrorCodes::SYNTAX_ERROR); - switch (op_value) - { - case KQLOperatorValue::contains: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); - break; - - case KQLOperatorValue::not_contains: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both); - break; - - case KQLOperatorValue::contains_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both); - break; - - case KQLOperatorValue::not_contains_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both); - break; - - case KQLOperatorValue::endswith: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); - break; - - case KQLOperatorValue::not_endswith: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); - break; - - case KQLOperatorValue::endswith_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); - break; - - case KQLOperatorValue::not_endswith_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); - break; - - case KQLOperatorValue::equal: - break; - - case KQLOperatorValue::not_equal: - break; - - case KQLOperatorValue::equal_cs: - new_expr = "=="; - break; - - case KQLOperatorValue::not_equal_cs: - new_expr = "!="; - break; - case KQLOperatorValue::has: - new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); - break; - - case KQLOperatorValue::not_has: - new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none); - break; - - case KQLOperatorValue::has_all: - new_expr = genHasAnyAllOpExpr(tokens, pos, "has_all", "hasTokenCaseInsensitive"); - break; - - case KQLOperatorValue::has_any: - new_expr = genHasAnyAllOpExpr(tokens, pos, "has_any", "hasTokenCaseInsensitive"); - break; - - case KQLOperatorValue::has_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none); - break; - - case KQLOperatorValue::not_has_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none); - break; - - case KQLOperatorValue::hasprefix: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left); - break; - - case KQLOperatorValue::not_hasprefix: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left); - break; - - case KQLOperatorValue::hasprefix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left); - break; - - case KQLOperatorValue::not_hasprefix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left); - break; - - case KQLOperatorValue::hassuffix: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right); - break; - - case KQLOperatorValue::not_hassuffix: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right); - break; - - case KQLOperatorValue::hassuffix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right); - break; - - case KQLOperatorValue::not_hassuffix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right); - break; - - case KQLOperatorValue::in_cs: - new_expr = genInOpExpr(pos,op,"in"); - break; - - case KQLOperatorValue::not_in_cs: - new_expr = genInOpExpr(pos,op,"not in"); - break; - - case KQLOperatorValue::in: - break; - - case KQLOperatorValue::not_in: - break; - - case KQLOperatorValue::matches_regex: - new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none); - break; - - case KQLOperatorValue::startswith: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); - break; - - case KQLOperatorValue::not_startswith: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); - break; - - case KQLOperatorValue::startswith_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); - break; - - case KQLOperatorValue::not_startswith_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); - break; - - default: - break; - } + auto last_op = tokens.back(); + auto last_pos = pos; - tokens.push_back(new_expr); - } - return true; + switch (op_value) + { + case KQLOperatorValue::contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike({0}, {1})", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike({0}, {1})", WildcardsPos::both); + break; + + case KQLOperatorValue::contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "like({0}, {1})", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not like({0}, {1})", WildcardsPos::both); + break; + + case KQLOperatorValue::endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike({0}, {1})", WildcardsPos::left); + break; + + case KQLOperatorValue::not_endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike({0}, {1})", WildcardsPos::left); + break; + + case KQLOperatorValue::endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith({0}, {1})", WildcardsPos::none); + break; + + case KQLOperatorValue::not_endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith({0}, {1})", WildcardsPos::none); + break; + + case KQLOperatorValue::equal: + new_expr = genEqOpExprCis(tokens, pos, "=="); + break; + + case KQLOperatorValue::not_equal: + new_expr = genEqOpExprCis(tokens, pos, "!="); + break; + + case KQLOperatorValue::equal_cs: + new_expr = "=="; + break; + + case KQLOperatorValue::not_equal_cs: + new_expr = "!="; + break; + case KQLOperatorValue::has: + new_expr = genHaystackOpExpr(tokens, pos, op, "ifNull(hasTokenCaseInsensitiveOrNull({0}, {1}), {0} = {1})", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has: + new_expr + = genHaystackOpExpr(tokens, pos, op, "not ifNull(hasTokenCaseInsensitiveOrNull({0}, {1}), {0} = {1})", WildcardsPos::none); + break; + + case KQLOperatorValue::has_all: + new_expr = genHasAnyAllOpExpr(tokens, pos, op, "ifNull(hasTokenCaseInsensitiveOrNull({0}, {1}), {0} = {1})"); + break; + + case KQLOperatorValue::has_any: + new_expr = genHasAnyAllOpExpr(tokens, pos, op, "ifNull(hasTokenCaseInsensitiveOrNull({0}, {1}), {0} = {1})"); + break; + + case KQLOperatorValue::has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "ifNull(hasTokenOrNull({0}, {1}), {0} = {1})", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ifNull(hasTokenOrNull({0}, {1}), {0} = {1})", WildcardsPos::none); + break; + + case KQLOperatorValue::hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike({0}, {1})", WildcardsPos::right); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike({0}, {1})", WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::not_hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike({0}, {1})", WildcardsPos::right); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike({0}, {1})", WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith({0}, {1})", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like({0}, {1})", WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::not_hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith({0}, {1})", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like({0}, {1})", WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike({0}, {1})", WildcardsPos::left); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike({0}, {1})", WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::not_hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike({0}, {1})", WildcardsPos::left); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike({0}, {1})", WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith({0}, {1})", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like({0}, {1})", WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::not_hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith({0}, {1})", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like({0}, {1})", WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::in_cs: + new_expr = genInOpExpr(pos, op, "in"); + break; + + case KQLOperatorValue::not_in_cs: + new_expr = genInOpExpr(pos, op, "not in"); + break; + + case KQLOperatorValue::in: + new_expr = genInOpExprCis(tokens, pos, op, "in"); + break; + + case KQLOperatorValue::not_in: + new_expr = genInOpExprCis(tokens, pos, op, "not in"); + break; + + case KQLOperatorValue::matches_regex: + new_expr = genHaystackOpExpr(tokens, pos, op, "match({0}, {1})", WildcardsPos::none); + break; + + case KQLOperatorValue::startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike({0}, {1})", WildcardsPos::right); + break; + + case KQLOperatorValue::not_startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike({0}, {1})", WildcardsPos::right); + break; + + case KQLOperatorValue::startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith({0}, {1})", WildcardsPos::none); + break; + + case KQLOperatorValue::not_startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith({0}, {1})", WildcardsPos::none); + break; + + default: + break; } - pos = begin; - return false; -} + tokens.push_back(new_expr); + return true; +} } - diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9796ae10c07c..294e45c44c72 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -1,106 +1,12 @@ #pragma once -#include -#include -#include +#include + namespace DB { - class KQLOperators { public: - bool convert(std::vector &tokens,IParser::Pos &pos); -protected: - - enum class WildcardsPos:uint8_t - { - none, - left, - right, - both - }; - - enum class KQLOperatorValue : uint16_t - { - none, - contains, - not_contains, - contains_cs, - not_contains_cs, - endswith, - not_endswith, - endswith_cs, - not_endswith_cs, - equal, //=~ - not_equal,//!~ - equal_cs, //= - not_equal_cs,//!= - has, - not_has, - has_all, - has_any, - has_cs, - not_has_cs, - hasprefix, - not_hasprefix, - hasprefix_cs, - not_hasprefix_cs, - hassuffix, - not_hassuffix, - hassuffix_cs, - not_hassuffix_cs, - in_cs, //in - not_in_cs, //!in - in, //in~ - not_in ,//!in~ - matches_regex, - startswith, - not_startswith, - startswith_cs, - not_startswith_cs, - }; - - std::unordered_map KQLOperator = - { - {"contains" , KQLOperatorValue::contains}, - {"!contains" , KQLOperatorValue::not_contains}, - {"contains_cs" , KQLOperatorValue::contains_cs}, - {"!contains_cs" , KQLOperatorValue::not_contains_cs}, - {"endswith" , KQLOperatorValue::endswith}, - {"!endswith" , KQLOperatorValue::not_endswith}, - {"endswith_cs" , KQLOperatorValue::endswith_cs}, - {"!endswith_cs" , KQLOperatorValue::not_endswith_cs}, - {"=~" , KQLOperatorValue::equal}, - {"!~" , KQLOperatorValue::not_equal}, - {"==" , KQLOperatorValue::equal_cs}, - {"!=" , KQLOperatorValue::not_equal_cs}, - {"has" , KQLOperatorValue::has}, - {"!has" , KQLOperatorValue::not_has}, - {"has_all" , KQLOperatorValue::has_all}, - {"has_any" , KQLOperatorValue::has_any}, - {"has_cs" , KQLOperatorValue::has_cs}, - {"!has_cs" , KQLOperatorValue::not_has_cs}, - {"hasprefix" , KQLOperatorValue::hasprefix}, - {"!hasprefix" , KQLOperatorValue::not_hasprefix}, - {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, - {"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs}, - {"hassuffix" , KQLOperatorValue::hassuffix}, - {"!hassuffix" , KQLOperatorValue::not_hassuffix}, - {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, - {"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs}, - {"in" , KQLOperatorValue::in_cs}, - {"!in" , KQLOperatorValue::not_in_cs}, - {"in~" , KQLOperatorValue::in}, - {"!in~" , KQLOperatorValue::not_in}, - {"matches regex" , KQLOperatorValue::matches_regex}, - {"startswith" , KQLOperatorValue::startswith}, - {"!startswith" , KQLOperatorValue::not_startswith}, - {"startswith_cs" , KQLOperatorValue::startswith_cs}, - {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, - }; - static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); - static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); - static String genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op); + bool convert(std::vector & tokens, IParser::Pos & pos); }; - } diff --git a/src/Parsers/Kusto/ParserKQLPrint.cpp b/src/Parsers/Kusto/ParserKQLPrint.cpp new file mode 100644 index 000000000000..1f56ddee4c1f --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLPrint.cpp @@ -0,0 +1,22 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLPrint::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_expression_list; + const String expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) + return false; + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLPrint.h b/src/Parsers/Kusto/ParserKQLPrint.h new file mode 100644 index 000000000000..c962d7fa4282 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLPrint.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLPrint : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index e978323d8215..d1adc4f89443 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -14,7 +14,7 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos new_pos(tokens, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) + if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) return false; node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h index b64675beed09..8e12b8dc6bb3 100644 --- a/src/Parsers/Kusto/ParserKQLProject.h +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -8,6 +8,7 @@ namespace DB class ParserKQLProject : public ParserKQLBase { + protected: const char * getName() const override { return "KQL project"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 04ee36705a9a..bcf25d0f9186 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -1,24 +1,189 @@ +#include #include #include +#include +#include +#include +#include #include -#include -#include -#include +#include +#include +#include +#include #include -#include -#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include + #include -#include -#include -#include namespace DB { -String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) +namespace ErrorCodes +{ + extern const int UNKNOWN_FUNCTION; + extern const int SYNTAX_ERROR; +} + +std::unordered_map kql_parser = { + {"filter", {"filter", false, false, false, 3}}, + {"where", {"filter", false, false, false, 3}}, + {"limit", {"limit", false, true, false, 3}}, + {"take", {"limit", false, true, false, 3}}, + {"project", {"project", false, false, false, 3}}, + {"distinct", {"distinct", true, true, false, 3}}, + {"extend", {"extend", true, true, false, 3}}, + {"sort by", {"order by", false, false, false, 4}}, + {"order by", {"order by", false, false, false, 4}}, + {"table", {"table", false, false, false, 3}}, + {"print", {"print", false, true, false, 3}}, + {"summarize", {"summarize", true, true, false, 3}}, + {"make-series", {"make-series", true, true, false, 5}}, + {"mv-expand", {"mv-expand", true, true, false, 5}}, + {"count", {"count", false, true, false, 3}}, + {"top", {"top", false, true, true, 3}}, + {"top-hitters", {"top-hitters", true, true, true, 5}}, + {"lookup", {"lookup", true, true, false, 3}}, + {"join", {"join", true, true, false, 3}}, + {"top-nested", {"top-nested", true, true, true, 5}}, + {"range", {"range", false, true, false, 3}}, +}; + +bool ParserKQLBase::parseByString(const String expr, ASTPtr & node, const uint32_t max_depth) +{ + Expected expected; + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos pos(tokens, max_depth); + return parse(pos, node, expected); +} + +bool ParserKQLBase::parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, int32_t max_depth) +{ + Expected expected; + Tokens token_subquery(query.c_str(), query.c_str() + query.size()); + IParser::Pos pos_subquery(token_subquery, max_depth); + if (!parser->parse(pos_subquery, select_node, expected)) + return false; + return true; +}; + +bool ParserKQLBase::setSubQuerySource( + ASTPtr & select_query, ASTPtr & source, const bool dest_is_subquery, const bool src_is_subquery, const String alias, const int32_t table_index) +{ + ASTPtr table_expr; + auto apply_alias = [&]() + { + if (!alias.empty()) + { + if (table_expr->as()->table_expression->as()->subquery) + table_expr->as() + ->table_expression->as() + ->subquery->as() + ->alias + = std::move(alias); + else if (table_expr->as()->table_expression->as()->database_and_table_name) + { + table_expr + = table_expr->as()->table_expression->as()->database_and_table_name; + if (auto * ast_with_alias = dynamic_cast(table_expr.get())) + ast_with_alias->alias = std::move(alias); + } + } + }; + if (!dest_is_subquery) + { + if (!select_query || !select_query->as()->tables() + || select_query->as()->tables()->as()->children.empty()) + return false; + table_expr = select_query->as()->tables()->as()->children[table_index]; + + if (!src_is_subquery) + { + table_expr->as()->table_expression + = source->as()->tables()->children[0]->as()->table_expression; + } + else + { + table_expr->as()->table_expression + = source->children[0]->as()->table_expression; + } + apply_alias(); + return true; + } + + if (!select_query || select_query->as()->children.empty() + || !select_query->as()->children[table_index]->as()->table_expression + || select_query->as() + ->children[table_index] + ->as() + ->table_expression->as() + ->subquery->children.empty() + || select_query->as() + ->children[table_index] + ->as() + ->table_expression->as() + ->subquery->children[0] + ->as() + ->list_of_selects->children.empty() + || select_query->as() + ->children[table_index] + ->as() + ->table_expression->as() + ->subquery->children[0] + ->as() + ->list_of_selects->children[0] + ->as() + ->tables() + ->as() + ->children.empty()) + return false; + + table_expr = select_query->as() + ->children[table_index] + ->as() + ->table_expression->as() + ->subquery->children[0] + ->as() + ->list_of_selects->children[0] + ->as() + ->tables() + ->as() + ->children[0]; + + if (!src_is_subquery) + { + table_expr->as()->table_expression + = source->as()->tables()->children[0]->as()->table_expression; + } + else + { + table_expr->as()->table_expression + = source->children[0]->as()->table_expression; + } + apply_alias(); + + return true; +} + +String ParserKQLBase::getExprFromToken(const String & text, const uint32_t max_depth) { Tokens tokens(text.c_str(), text.c_str() + text.size()); IParser::Pos pos(tokens, max_depth); @@ -26,7 +191,7 @@ String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & m return getExprFromToken(pos); } -String ParserKQLBase :: getExprFromPipe(Pos & pos) +String ParserKQLBase::getExprFromPipe(Pos & pos) { uint16_t bracket_count = 0; auto begin = pos; @@ -36,7 +201,7 @@ String ParserKQLBase :: getExprFromPipe(Pos & pos) if (end->type == TokenType::OpeningRoundBracket) ++bracket_count; - if (end->type == TokenType::OpeningRoundBracket) + if (end->type == TokenType::ClosingRoundBracket) --bracket_count; if (end->type == TokenType::PipeMark && bracket_count == 0) @@ -45,57 +210,164 @@ String ParserKQLBase :: getExprFromPipe(Pos & pos) ++end; } --end; - return String(begin->begin, end->end); + return (begin <= end) ? String(begin->begin, end->end) : ""; } -String ParserKQLBase :: getExprFromToken(Pos & pos) +String ParserKQLBase::getExprFromToken(Pos & pos) { String res; - std::vector tokens; - String alias; + std::vector comma_pos; + std::vector columns; + size_t paren_count = 0; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + comma_pos.push_back(pos); + while (!pos->isEnd() && pos->type != TokenType::Semicolon) { - String token = String(pos->begin,pos->end); + if (pos->type == TokenType::PipeMark && paren_count == 0) + break; + + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; - if (token == "=") + if (pos->type == TokenType::Comma && paren_count == 0) { ++pos; - if (String(pos->begin,pos->end) != "~") + comma_pos.push_back(pos); + --pos; + } + ++pos; + } + + auto set_columns = [&](Pos & start_pos, Pos & end_pos) + { + bool has_alias = false; + auto equal_pos = start_pos; + auto columms_start_pos = start_pos; + auto it_pos = start_pos; + if (String(it_pos->begin, it_pos->end) == "=") + throw Exception("Invalid equal symbol (=)", ErrorCodes::SYNTAX_ERROR); + + while (it_pos < end_pos) + { + if (String(it_pos->begin, it_pos->end) == "=") { - alias = tokens.back(); - tokens.pop_back(); + ++it_pos; + if (String(it_pos->begin, it_pos->end) != "~") + { + if (has_alias) + throw Exception("Invalid equal symbol (=)", ErrorCodes::SYNTAX_ERROR); + has_alias = true; + } + --it_pos; + equal_pos = it_pos; } - --pos; + ++it_pos; } - else if (!KQLOperators().convert(tokens,pos)) + + if (has_alias) { - tokens.push_back(token); + columms_start_pos = equal_pos; + ++columms_start_pos; } + String column_str; + String function_name; + std::vector tokens; - if (pos->type == TokenType::Comma && !alias.empty()) + while (columms_start_pos < end_pos) { - tokens.pop_back(); - tokens.push_back("AS"); - tokens.push_back(alias); - tokens.push_back(","); - alias.clear(); + if (!KQLOperators().convert(tokens, columms_start_pos)) + { + if (columms_start_pos->type == TokenType::BareWord && function_name.empty()) + function_name = String(columms_start_pos->begin, columms_start_pos->end); + + auto expr = IParserKQLFunction::getExpression(columms_start_pos); + tokens.push_back(expr); + } + ++columms_start_pos; } - ++pos; - } - if (!alias.empty()) + for (auto token : tokens) + column_str = column_str.empty() ? token : column_str + " " + token; + + if (has_alias) + { + --equal_pos; + if (start_pos == equal_pos) + { + String new_column_str; + if (start_pos->type != TokenType::BareWord) + throw Exception(String(start_pos->begin, start_pos->end) + " is not a valid alias", ErrorCodes::SYNTAX_ERROR); + + if (function_name == "array_sort_asc" || function_name == "array_sort_desc") + new_column_str = std::format("{0}[1] AS {1}", column_str, String(start_pos->begin, start_pos->end)); + else + new_column_str = std::format("{0} AS {1}", column_str, String(start_pos->begin, start_pos->end)); + + columns.push_back(new_column_str); + } + else + { + String whole_alias(start_pos->begin, equal_pos->end); + + if (function_name != "array_sort_asc" && function_name != "array_sort_desc") + throw Exception(whole_alias + " is not a valid alias", ErrorCodes::SYNTAX_ERROR); + + if (start_pos->type != TokenType::OpeningRoundBracket && equal_pos->type != TokenType::ClosingRoundBracket) + throw Exception(whole_alias + " is not a valid alias for " + function_name, ErrorCodes::SYNTAX_ERROR); + + String alias_inside; + bool comma_meet = false; + size_t index = 1; + ++start_pos; + while (start_pos < equal_pos) + { + if (start_pos->type == TokenType::Comma) + { + alias_inside.clear(); + if (comma_meet) + throw Exception(whole_alias + " has invalid alias for " + function_name, ErrorCodes::SYNTAX_ERROR); + comma_meet = true; + } + else + { + if (!alias_inside.empty() || start_pos->type != TokenType::BareWord) + throw Exception(whole_alias + " has invalid alias for " + function_name, ErrorCodes::SYNTAX_ERROR); + + alias_inside = String(start_pos->begin, start_pos->end); + auto new_column_str = std::format("{0}[{1}] AS {2}", column_str, index, alias_inside); + columns.push_back(new_column_str); + comma_meet = false; + ++index; + } + ++start_pos; + } + } + } + else + columns.push_back(column_str); + }; + + size_t cloumn_size = comma_pos.size(); + for (size_t i = 0; i < cloumn_size; ++i) { - tokens.push_back("AS"); - tokens.push_back(alias); + if (i == cloumn_size - 1) + set_columns(comma_pos[i], pos); + else + { + auto end_pos = comma_pos[i + 1]; + --end_pos; + set_columns(comma_pos[i], end_pos); + } } - for (auto const &token : tokens) - res = res.empty()? token : res +" " + token; + for (auto token : columns) + res = res.empty() ? token : res + "," + token; return res; } -std::unique_ptr ParserKQLQuery::getOperator(String & op_name) +std::unique_ptr ParserKQLQuery::getOperator(String & op_name) { if (op_name == "filter" || op_name == "where") return std::make_unique(); @@ -103,73 +375,96 @@ std::unique_ptr ParserKQLQuery::getOperator(String & op_name) return std::make_unique(); else if (op_name == "project") return std::make_unique(); + else if (op_name == "distinct") + return std::make_unique(); + else if (op_name == "extend") + return std::make_unique(); else if (op_name == "sort by" || op_name == "order by") return std::make_unique(); else if (op_name == "summarize") return std::make_unique(); else if (op_name == "table") return std::make_unique(); + else if (op_name == "make-series") + return std::make_unique(); + else if (op_name == "mv-expand") + return std::make_unique(); + else if (op_name == "print") + return std::make_unique(); + else if (op_name == "count") + return std::make_unique(); + else if (op_name == "top") + return std::make_unique(); + else if (op_name == "top-hitters") + return std::make_unique(); + else if (op_name == "lookup") + return std::make_unique(); + else if (op_name == "join") + return std::make_unique(); + else if (op_name == "top-nested") + return std::make_unique(); + else if (op_name == "range") + return std::make_unique(); else return nullptr; } -bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLQuery::getOperations(Pos & pos, Expected & expected, OperationsPos & operation_pos) { - struct KQLOperatorDataFlowState - { - String operator_name; - bool need_input; - bool gen_output; - int8_t backspace_steps; // how many steps to last token of previous pipe - }; - - auto select_query = std::make_shared(); - node = select_query; - ASTPtr tables; - - std::unordered_map kql_parser = - { - { "filter", {"filter", false, false, 3}}, - { "where", {"filter", false, false, 3}}, - { "limit", {"limit", false, true, 3}}, - { "take", {"limit", false, true, 3}}, - { "project", {"project", false, false, 3}}, - { "sort by", {"order by", false, false, 4}}, - { "order by", {"order by", false, false, 4}}, - { "table", {"table", false, false, 3}}, - { "summarize", {"summarize", true, true, 3}} - }; - - std::vector> operation_pos; - String table_name(pos->begin, pos->end); - operation_pos.push_back(std::make_pair("table", pos)); + if (table_name == "print" || table_name == "range") + operation_pos.push_back(std::make_pair(table_name, pos)); + else + operation_pos.push_back(std::make_pair("table", pos)); + ++pos; + uint16_t bracket_count = 0; while (!pos->isEnd() && pos->type != TokenType::Semicolon) { if (pos->type == TokenType::OpeningRoundBracket) ++bracket_count; - if (pos->type == TokenType::OpeningRoundBracket) + if (pos->type == TokenType::ClosingRoundBracket) --bracket_count; if (pos->type == TokenType::PipeMark && bracket_count == 0) { ++pos; String kql_operator(pos->begin, pos->end); - if (kql_operator == "order" || kql_operator == "sort") + + auto validate_kql_operator = [&] { - ++pos; - ParserKeyword s_by("by"); - if (s_by.ignore(pos,expected)) + if (kql_operator == "order" || kql_operator == "sort") { - kql_operator = "order by"; - --pos; + ++pos; + ParserKeyword s_by("by"); + if (s_by.ignore(pos, expected)) + { + kql_operator = "order by"; + --pos; + } } - } - if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) + else + { + auto op_pos_begin = pos; + ++pos; + ParserToken s_dash(TokenType::Minus); + if (s_dash.ignore(pos, expected)) + { + String tmp_op(op_pos_begin->begin, pos->end); + kql_operator = tmp_op; + } + else + --pos; + } + if (kql_parser.find(kql_operator) == kql_parser.end()) + return false; + return true; + }; + + if (!validate_kql_operator()) return false; ++pos; operation_pos.push_back(std::make_pair(kql_operator, pos)); @@ -177,29 +472,129 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) else ++pos; } + return true; +} - auto kql_operator_str = operation_pos.back().first; - auto npos = operation_pos.back().second; - if (!npos.isValid()) +bool ParserKQLQuery::pre_process(String & source, Pos & pos) +{ + bool need_preprocess = false; + auto begin = pos; + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::HereDoc) + need_preprocess = true; + + ++pos; + } + + auto end = pos; + --end; + source = String(begin->begin, end->end); + + auto replace = [&](std::string & str, const std::string & from, const std::string & to) + { + size_t start_pos = str.find(from); + if (start_pos != std::string::npos) + { + str.replace(start_pos, from.length(), to); + return true; + } return false; + }; + + if (need_preprocess) + { + bool done = true; + while (done) + done = replace(source, "$left", "left_"); + done = true; + while (done) + done = replace(source, "$right", "right_"); + } + + return need_preprocess; +} + +bool ParserKQLQuery::parseImpl(Pos & original_pos, ASTPtr & node, Expected & expected) +{ + auto pos = original_pos; + bool pre_processed = false; + String pre_processed_query; + + pre_processed = pre_process(pre_processed_query, original_pos); + if (pre_processed) + { + Tokens tokens(pre_processed_query.data(), pre_processed_query.data() + pre_processed_query.size(), original_pos.max_depth); + IParser::Pos n_pos(tokens, original_pos.max_depth); + return executeImpl(n_pos, node, expected); + } + return executeImpl(pos, node, expected); +} + +bool ParserKQLQuery::executeImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto select_query = std::make_shared(); + node = select_query; + ASTPtr tables; + OperationsPos operation_pos; + + if (!getOperations(pos, expected, operation_pos)) + return false; + + auto kql_operator_str = operation_pos.back().first; auto kql_operator_p = getOperator(kql_operator_str); + if (!kql_operator_p) + return false; + + String updated_query; + kql_operator_p->updatePipeLine(operation_pos, updated_query); + Tokens token_query(updated_query.c_str(), updated_query.c_str() + updated_query.size()); + IParser::Pos pos_query(token_query, pos.max_depth); + if (!updated_query.empty()) + { + operation_pos.clear(); + if (!ParserKQLQuery::getOperations(pos_query, expected, operation_pos)) + return false; + } + + kql_operator_str = operation_pos.back().first; + kql_operator_p = getOperator(kql_operator_str); if (!kql_operator_p) return false; + auto npos = operation_pos.back().second; + if (operation_pos.size() == 1) { - if (!kql_operator_p->parse(npos, node, expected)) - return false; + if (kql_operator_str == "print") + { + ++npos; + if (!ParserKQLPrint().parse(npos, node, expected)) + return false; + } + else if (kql_operator_str == "range") + { + ++npos; + if (!ParserKQLRange().parse(npos, node, expected)) + return false; + } + else if (kql_operator_str == "table") + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + } } else if (operation_pos.size() == 2 && operation_pos.front().first == "table") { - if (!kql_operator_p->parse(npos, node, expected)) - return false; npos = operation_pos.front().second; if (!ParserKQLTable().parse(npos, node, expected)) return false; + + npos = operation_pos.back().second; + if (!kql_operator_p->parse(npos, node, expected)) + return false; } else { @@ -207,7 +602,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto last_pos = operation_pos.back().second; auto last_op = operation_pos.back().first; - auto set_main_query_clause =[&](String & op, Pos & op_pos) + auto set_main_query_clause = [&](String & op, Pos & op_pos) { auto op_str = ParserKQLBase::getExprFromPipe(op_pos); if (op == "project") @@ -224,12 +619,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) operation_pos.pop_back(); - if (kql_parser[last_op].need_input) - { - if (!kql_operator_p->parse(npos, node, expected)) - return false; - } - else + if (!kql_parser[last_op].need_input) { while (!operation_pos.empty()) { @@ -240,6 +630,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) break; if (!project_clause.empty() && prev_op == "project") break; + set_main_query_clause(prev_op, prev_pos); operation_pos.pop_back(); last_op = prev_op; @@ -249,7 +640,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!operation_pos.empty()) { - for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) + for (auto i = 0; i < kql_parser[last_op].backspace_steps; ++i) --last_pos; String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); @@ -266,7 +657,10 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - auto set_query_clasue =[&](String op_str, String op_calsue) + if (!kql_operator_p->parse(npos, node, expected)) + return false; + + auto set_query_clasue = [&](String op_str, String op_calsue) { auto oprator = getOperator(op_str); if (oprator) @@ -279,7 +673,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; }; - if (!select_query->select()) + if (!node->as()->select()) { if (project_clause.empty()) project_clause = "*"; @@ -301,16 +695,16 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } - if (!select_query->select()) + if (!node->as()->select()) { auto expr = String("*"); - Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); IParser::Pos new_pos(tokens, pos.max_depth); if (!std::make_unique()->parse(new_pos, node, expected)) return false; } - return true; + return true; } bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) @@ -339,4 +733,46 @@ bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } +bool ParserSimpleCHSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr sub_select_node; + ParserSelectWithUnionQuery select; + + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; + + if (!select.parse(pos, sub_select_node, expected)) + return false; + + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + + if (parent_select_node && parent_select_node->as()->tables()) + { + auto select_query = sub_select_node->as()->list_of_selects->children[0]; + select_query->as()->setExpression( + ASTSelectQuery::Expression::TABLES, parent_select_node->as()->tables()); + } + + ASTPtr node_subquery = std::make_shared(); + node_subquery->children.push_back(sub_select_node); + + ASTPtr node_table_expr = std::make_shared(); + node_table_expr->as()->subquery = node_subquery; + + node_table_expr->children.emplace_back(node_subquery); + + ASTPtr node_table_in_select_query_emlement = std::make_shared(); + node_table_in_select_query_emlement->as()->table_expression = node_table_expr; + + ASTPtr res = std::make_shared(); + + res->children.emplace_back(node_table_in_select_query_emlement); + + node = res; + return true; +} + } diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 42f5f84f0317..7fbc1b4a85d1 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -5,28 +5,54 @@ namespace DB { +using OperationsPos = std::vector>; + class ParserKQLBase : public IParserBase { public: static String getExprFromToken(Pos & pos); + static String getExprFromToken(const String & text, const uint32_t max_depth); static String getExprFromPipe(Pos & pos); - static String getExprFromToken(const String & text, const uint32_t & max_depth); + static bool setSubQuerySource(ASTPtr & select_query, ASTPtr & source, const bool dest_is_subquery, const bool src_is_subquery, const String alias = "", const int32_t table_index = 0); + static bool parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, int32_t max_depth); + bool parseByString(const String expr, ASTPtr & node, const uint32_t max_depth); + virtual bool updatePipeLine (OperationsPos & /*operations*/, String & /*query*/) {return false;} }; class ParserKQLQuery : public IParserBase { - +public: + struct KQLOperatorDataFlowState + { + String operator_name; + bool need_input; + bool gen_output; + bool need_reinterpret; + int8_t backspace_steps; // how many steps to last token of previous pipe + }; + static bool getOperations(Pos & pos, Expected & expected, OperationsPos & operation_pos); protected: - static std::unique_ptr getOperator(String &op_name); + static std::unique_ptr getOperator(String &op_name); + static bool pre_process(String & source, Pos & pos); const char * getName() const override { return "KQL query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool executeImpl(Pos & pos, ASTPtr & node, Expected & expected); }; -class ParserKQLSubquery : public IParserBase +class ParserKQLSubquery : public ParserKQLBase { protected: const char * getName() const override { return "KQL subquery"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserSimpleCHSubquery : public ParserKQLBase +{ +public: + ParserSimpleCHSubquery(ASTPtr parent_select_node_ = nullptr) {parent_select_node = parent_select_node_;} +protected: + const char * getName() const override { return "Simple ClickHouse subquery"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + ASTPtr parent_select_node; +}; } diff --git a/src/Parsers/Kusto/ParserKQLRange.cpp b/src/Parsers/Kusto/ParserKQLRange.cpp new file mode 100644 index 000000000000..139c7ff75cd0 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLRange.cpp @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLRange::parseImpl(Pos & pos, ASTPtr & node, Expected & /*expected*/) +{ + ASTPtr select_node; + String columnName, start, stop, step; + auto start_pos = pos; + auto end_pos = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "from") + { + end_pos = pos; + --end_pos; + if (end_pos < start_pos) + throw Exception("Missing columnName for range operator", ErrorCodes::SYNTAX_ERROR); + + columnName = String(start_pos->begin, end_pos->end); + start_pos = pos; + ++start_pos; + } + if (String(pos->begin, pos->end) == "to") + { + if (columnName.empty()) + throw Exception("Missing `from` for range operator", ErrorCodes::SYNTAX_ERROR); + end_pos = pos; + --end_pos; + if (end_pos < start_pos) + throw Exception("Missing start expression for range operator", ErrorCodes::SYNTAX_ERROR); + start = String(start_pos->begin, end_pos->end); + start_pos = pos; + ++start_pos; + } + if (String(pos->begin, pos->end) == "step") + { + if (columnName.empty()) + throw Exception("Missing `from` for range operator", ErrorCodes::SYNTAX_ERROR); + if (start.empty()) + throw Exception("Missing 'to' for range operator", ErrorCodes::SYNTAX_ERROR); + + end_pos = pos; + --end_pos; + if (end_pos < start_pos) + throw Exception("Missing stop expression for range operator", ErrorCodes::SYNTAX_ERROR); + + stop = String(start_pos->begin, end_pos->end); + start_pos = pos; + ++start_pos; + } + ++pos; + } + + if (columnName.empty() || start.empty() || stop.empty()) + throw Exception("Missing required expression for range operator", ErrorCodes::SYNTAX_ERROR); + + end_pos = pos; + --end_pos; + if (end_pos < start_pos) + throw Exception("Missing step expression for range operator", ErrorCodes::SYNTAX_ERROR); + + step = String(start_pos->begin, end_pos->end); + + columnName = getExprFromToken(columnName, pos.max_depth); + start = getExprFromToken(start, pos.max_depth); + stop = getExprFromToken(stop, pos.max_depth); + step = getExprFromToken(step, pos.max_depth); + String query = std::format("SELECT * FROM (SELECT kql_range({0}, {1},{2}) AS {3}) ARRAY JOIN {3}", start, stop, step, columnName); + + if (!parseSQLQueryByString(std::make_unique(), query, select_node, pos.max_depth)) + return false; + node = std::move(select_node); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLRange.h b/src/Parsers/Kusto/ParserKQLRange.h new file mode 100644 index 000000000000..124e06a4d329 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLRange.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLRange : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL range"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index f7540d729fdd..18fd68ff1871 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -8,7 +8,7 @@ namespace DB { -bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLSort::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { bool has_dir = false; std::vector has_directions; @@ -35,6 +35,7 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) has_directions.push_back(has_dir); has_dir = false; } + ++new_pos; } has_directions.push_back(has_dir); @@ -53,7 +54,6 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } node->as()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); - return true; } diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 21e480234d39..b940763f71ba 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include namespace DB @@ -36,6 +37,7 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + // will support union next phase ASTPtr kql_query; if (!ParserKQLQuery().parse(pos, kql_query, expected)) @@ -71,6 +73,7 @@ bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (s_lparen.ignore(pos, expected)) { ++paren_count; + auto pos_start = pos; while (!pos->isEnd()) { if (pos->type == TokenType::ClosingRoundBracket) @@ -80,10 +83,10 @@ bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (paren_count == 0) break; - - kql_statement = kql_statement + " " + String(pos->begin,pos->end); ++pos; } + kql_statement = String(pos_start->begin, (--pos)->end); + ++pos; Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); IParser::Pos pos_kql(token_kql, pos.max_depth); diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index ef44b2d6c8ac..864cda5531ad 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -19,7 +19,6 @@ class ParserKQLStatement : public IParserBase {} }; - class ParserKQLWithOutput : public IParserBase { protected: diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 75eacb1adbd2..6e2c61be698c 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -10,15 +12,26 @@ #include #include #include +#include #include #include #include #include +#include +#include +#include +#include + namespace DB { -bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr select_expression_list; ASTPtr group_expression_list; @@ -27,33 +40,166 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String expr_groupby; String expr_columns; bool groupby = false; + auto column_begin_pos = pos; + + uint16_t bracket_count = 0; + int32_t new_column_index = 1; + + std::vector expr_aggregations; + std::vector expr_groupbys; + + std::unordered_set aggregate_functions( + {"arg_max", + "arg_min", + "avg", + "avgif", + "binary_all_and", + "binary_all_or", + "binary_all_xor", + "buildschema", + "count", + "countif", + "dcount", + "dcountif", + "make_bag", + "make_bag_if", + "make_list", + "make_list_if", + "make_list_with_nulls", + "make_set", + "make_set_if", + "max", + "maxif", + "min", + "minif", + "percentile", + "percentilew", + "percentiles", + "percentiles_array", + "percentilesw", + "percentilesw_array", + "stdev", + "stdevif", + "sum", + "sumif", + "take_any", + "take_anyif", + "variance", + "varianceif"}); + + auto apply_aliais = [&](Pos & begin_pos, Pos & end_pos, bool is_groupby) + { + if (end_pos->end <= begin_pos->begin) + throw Exception("Syntax error near keyword \"" + String(begin_pos->begin, begin_pos->end) + "\"", ErrorCodes::SYNTAX_ERROR); + auto expr = String(begin_pos->begin, end_pos->end); + auto equal_pos = begin_pos; + ++equal_pos; + if (!is_groupby) + { + if (String(equal_pos->begin, equal_pos->end) != "=") + { + String alias; + String aggregate_fun = String(begin_pos->begin, begin_pos->end); + if (aggregate_functions.find(aggregate_fun) == aggregate_functions.end()) + { + alias = std::format("Columns{}", new_column_index); + ++new_column_index; + } + else + { + alias = std::format("{}_", aggregate_fun); + auto agg_colum_pos = begin_pos; + ++agg_colum_pos; + ++agg_colum_pos; + ++agg_colum_pos; + if (agg_colum_pos->type == TokenType::Comma || agg_colum_pos->type == TokenType::ClosingRoundBracket) + { + --agg_colum_pos; + if (agg_colum_pos->type != TokenType::ClosingRoundBracket) + alias = alias + String(agg_colum_pos->begin, agg_colum_pos->end); + } + } + expr = std::format("{} = {}", alias, expr); + } + expr_aggregations.push_back(expr); + } + else + { + if (String(equal_pos->begin, equal_pos->end) != "=") + { + String groupby_fun = String(begin_pos->begin, begin_pos->end); + if (equal_pos->isEnd() || equal_pos->type == TokenType::Comma || equal_pos->type == TokenType::Semicolon + || equal_pos->type == TokenType::PipeMark) + { + expr = groupby_fun; + } + else + { + String alias; + if (groupby_fun == "bin" || groupby_fun == "bin_at") + { + auto bin_colum_pos = begin_pos; + ++bin_colum_pos; + ++bin_colum_pos; + alias = String(bin_colum_pos->begin, bin_colum_pos->end); + ++bin_colum_pos; + if (bin_colum_pos->type != TokenType::Comma) + alias.clear(); + } + if (alias.empty()) + { + alias = std::format("Columns{}", new_column_index); + ++new_column_index; + } - auto begin = pos; - auto pos_groupby = pos; + expr = std::format("{} = {}", alias, expr); + } + } + expr_groupbys.push_back(expr); + } + }; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - if (String(pos->begin, pos->end) == "by") + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if (pos->type == TokenType::ClosingRoundBracket) + --bracket_count; + + if ((bracket_count == 0 and pos->type == TokenType::Comma) || String(pos->begin, pos->end) == "by") { - groupby = true; - auto end = pos; - --end; - expr_aggregation = begin <= end ? String(begin->begin, end->end) : ""; - pos_groupby = pos; - ++pos_groupby; + auto end_pos = pos; + --end_pos; + apply_aliais(column_begin_pos, end_pos, groupby); + if (String(pos->begin, pos->end) == "by") + groupby = true; + column_begin_pos = pos; + ++column_begin_pos; } ++pos; } --pos; - if (groupby) - expr_groupby = String(pos_groupby->begin, pos->end); - else - expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; + apply_aliais(column_begin_pos, pos, groupby); + + for (auto expr : expr_aggregations) + expr_aggregation = expr_aggregation.empty() ? expr : expr_aggregation + "," + expr; - auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; - expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; + for (auto expr : expr_groupbys) + expr_groupby = expr_groupby.empty() ? expr : expr_groupby + "," + expr; - String converted_columns = getExprFromToken(expr_columns, pos.max_depth); + if (!expr_groupby.empty()) + expr_columns = expr_groupby; + + if (!expr_aggregation.empty()) + { + if (expr_columns.empty()) + expr_columns = expr_aggregation; + else + expr_columns = expr_columns + "," + expr_aggregation; + } + + String converted_columns = getExprFromToken(expr_columns, pos.max_depth); Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); @@ -65,7 +211,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (groupby) { - String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); + String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); @@ -77,5 +223,4 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte return true; } - } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1aad02705dfd..e95043c15232 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -8,7 +8,6 @@ namespace DB class ParserKQLSummarize : public ParserKQLBase { - protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 6356ad688b67..3e071f592f8a 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,7 +3,9 @@ #include #include #include + #include + namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h index c67dcb151562..efb717ffb557 100644 --- a/src/Parsers/Kusto/ParserKQLTable.h +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -8,10 +8,10 @@ namespace DB class ParserKQLTable : public ParserKQLBase { - protected: const char * getName() const override { return "KQL Table"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + }; } diff --git a/src/Parsers/Kusto/ParserKQLTimespan.cpp b/src/Parsers/Kusto/ParserKQLTimespan.cpp new file mode 100644 index 000000000000..967446471650 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTimespan.cpp @@ -0,0 +1,257 @@ +#include "ParserKQLTimespan.h" +#include "Utilities.h" + +#include + +#include +#include + +namespace x3 = boost::spirit::x3; + +namespace +{ +enum class KQLTimespanUnit +{ + Day, + Hour, + Minute, + Second, + Millisecond, + Microsecond, + Nanosecond, + Tick +}; + +template +concept arithmetic = std::is_arithmetic_v; + +Int64 kqlTimespanToTicks(const arithmetic auto value, const KQLTimespanUnit unit) +{ + static constexpr Int64 TICKS_PER_MICROSECOND = 10; + static constexpr auto TICKS_PER_MILLISECOND = TICKS_PER_MICROSECOND * 1000; + static constexpr auto TICKS_PER_SECOND = TICKS_PER_MILLISECOND * 1000; + static constexpr auto TICKS_PER_MINUTE = TICKS_PER_SECOND * 60; + static constexpr auto TICKS_PER_HOUR = TICKS_PER_MINUTE * 60; + static constexpr auto TICKS_PER_DAY = TICKS_PER_HOUR * 24; + + switch (unit) + { + case KQLTimespanUnit::Day: + return static_cast(value * TICKS_PER_DAY); + case KQLTimespanUnit::Hour: + return static_cast(value * TICKS_PER_HOUR); + case KQLTimespanUnit::Minute: + return static_cast(value * TICKS_PER_MINUTE); + case KQLTimespanUnit::Second: + return static_cast(value * TICKS_PER_SECOND); + case KQLTimespanUnit::Millisecond: + return static_cast(value * TICKS_PER_MILLISECOND); + case KQLTimespanUnit::Microsecond: + return static_cast(value * TICKS_PER_MICROSECOND); + case KQLTimespanUnit::Tick: + return static_cast(value); + case KQLTimespanUnit::Nanosecond: + return static_cast(value / 100); + } +} + +struct timespan_units_ : public x3::symbols +{ + timespan_units_() + { + // clang-format off + add + ("d", KQLTimespanUnit::Day) + ("day", KQLTimespanUnit::Day) + ("days", KQLTimespanUnit::Day) + ("h", KQLTimespanUnit::Hour) + ("hr", KQLTimespanUnit::Hour) + ("hrs", KQLTimespanUnit::Hour) + ("hour", KQLTimespanUnit::Hour) + ("hours", KQLTimespanUnit::Hour) + ("m", KQLTimespanUnit::Minute) + ("min", KQLTimespanUnit::Minute) + ("minute", KQLTimespanUnit::Minute) + ("minutes", KQLTimespanUnit::Minute) + ("s", KQLTimespanUnit::Second) + ("sec", KQLTimespanUnit::Second) + ("second", KQLTimespanUnit::Second) + ("seconds", KQLTimespanUnit::Second) + ("ms", KQLTimespanUnit::Millisecond) + ("milli", KQLTimespanUnit::Millisecond) + ("millis", KQLTimespanUnit::Millisecond) + ("millisec", KQLTimespanUnit::Millisecond) + ("millisecond", KQLTimespanUnit::Millisecond) + ("milliseconds", KQLTimespanUnit::Millisecond) + ("micro", KQLTimespanUnit::Microsecond) + ("micros", KQLTimespanUnit::Microsecond) + ("microsec", KQLTimespanUnit::Microsecond) + ("microsecond", KQLTimespanUnit::Microsecond) + ("microseconds", KQLTimespanUnit::Microsecond) + ("nano", KQLTimespanUnit::Nanosecond) + ("nanos", KQLTimespanUnit::Nanosecond) + ("nanosec", KQLTimespanUnit::Nanosecond) + ("nanosecond", KQLTimespanUnit::Nanosecond) + ("nanoseconds", KQLTimespanUnit::Nanosecond) + ("tick", KQLTimespanUnit::Tick) + ("ticks", KQLTimespanUnit::Tick) + ; + // clang-format on + } +}; + +const timespan_units_ timespan_units; + +struct KQLTimespanComponents +{ + static constexpr auto MAX_SECONDS_FRACTIONAL = 10'000'000U; + + bool isValid() const { return hours < 24 && minutes < 60 && seconds < 60 && seconds_fractional < MAX_SECONDS_FRACTIONAL; } + std::optional toTicks() const + { + if (!isValid()) + return {}; + + const auto sign = is_negative ? -1 : 1; + auto seconds_fractional_in_ticks = seconds_fractional; + while (seconds_fractional_in_ticks > 0 && seconds_fractional_in_ticks < (MAX_SECONDS_FRACTIONAL / 10)) + seconds_fractional_in_ticks *= 10; + + const auto ticks = kqlTimespanToTicks(days, KQLTimespanUnit::Day) + kqlTimespanToTicks(hours, KQLTimespanUnit::Hour) + + kqlTimespanToTicks(minutes, KQLTimespanUnit::Minute) + kqlTimespanToTicks(seconds, KQLTimespanUnit::Second) + + kqlTimespanToTicks(seconds_fractional_in_ticks, KQLTimespanUnit::Tick); + return sign * ticks; + } + + bool is_negative = false; + unsigned days = 0; + unsigned hours = 0; + unsigned minutes = 0; + unsigned seconds = 0; + unsigned seconds_fractional = 0; +}; + +struct KQLTimespanNull +{ +}; + +using KQLTimespanValueWithUnit = std::pair; + +using x3::_attr; +using x3::_val; +using x3::double_; +using x3::int_; +using x3::lexeme; +using x3::lit; +using x3::omit; +using x3::uint_; + +const auto SET_DAYS = [](auto & ctx) { _val(ctx).days = _attr(ctx); }; +const auto SET_HOURS_AND_MINUTES = [](auto & ctx) +{ + auto & kql_timespan_components = _val(ctx); + const auto & attributes = _attr(ctx); + kql_timespan_components.hours = at_c<0>(attributes); + kql_timespan_components.minutes = at_c<1>(attributes); +}; + +const auto SET_NEGATIVE = [](auto & ctx) { _val(ctx).is_negative = true; }; +const auto SET_SECONDS = [](auto & ctx) { _val(ctx).seconds = _attr(ctx); }; +const auto SET_SECONDS_FRACTIONAL = [](auto & ctx) { _val(ctx).seconds_fractional = _attr(ctx); }; + +const x3::rule KQL_TIMESPAN_SEPARATED_COMPONENTS = "KQL timespan separated components"; +// clang-format off +const auto KQL_TIMESPAN_SEPARATED_COMPONENTS_def = + lexeme + [ + -(lit('-')[SET_NEGATIVE] | lit('+')) + >> -(uint_ >> lit('.'))[SET_DAYS] + >> (uint_ >> lit(':') >> uint_)[SET_HOURS_AND_MINUTES] + >> -(lit(':') >> uint_[SET_SECONDS] >> -(lit('.') >> uint_[SET_SECONDS_FRACTIONAL])) + ]; +// clang-format on + +const auto SET_VALUE_AND_UNIT = [](auto & ctx) +{ + const auto & value_and_unit = _attr(ctx); + _val(ctx) = {at_c<0>(value_and_unit), at_c<1>(value_and_unit)}; +}; + +const x3::rule KQL_TIMESPAN_VALUE_WITH_UNIT = "KQL timespan value with unit"; +const auto KQL_TIMESPAN_VALUE_WITH_UNIT_def = (double_ >> timespan_units)[SET_VALUE_AND_UNIT]; + +const x3::rule KQL_TIMESPAN_DAY_VALUE = "KQL timespan day value"; +const auto KQL_TIMESPAN_DAY_VALUE_def = int_; + +const x3::rule KQL_TIMESPAN_NULL = "KQL timespan null"; +const auto KQL_TIMESPAN_NULL_def = omit[lit("null")]; + +const x3::rule> + KQL_TIMESPAN = "KQL timespan"; +const auto KQL_TIMESPAN_def = KQL_TIMESPAN_SEPARATED_COMPONENTS | KQL_TIMESPAN_VALUE_WITH_UNIT | KQL_TIMESPAN_DAY_VALUE | KQL_TIMESPAN_NULL; + +BOOST_SPIRIT_DEFINE( + KQL_TIMESPAN_SEPARATED_COMPONENTS, KQL_TIMESPAN_VALUE_WITH_UNIT, KQL_TIMESPAN_DAY_VALUE, KQL_TIMESPAN_NULL, KQL_TIMESPAN); +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +std::string kqlTicksToInterval(const std::optional ticks) +{ + return std::format("toIntervalNanosecond({})", ticks ? std::to_string(*ticks * 100) : "null"); +} + +std::optional ParserKQLTimespan::parse(const std::string_view expression) +{ + const auto throw_exception + = [&expression] { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a correct timespan expression: {}", expression); }; + + auto first = expression.cbegin(); + auto last = expression.cend(); + + boost::variant kql_timespan_variant; + const auto success = x3::parse(first, last, KQL_TIMESPAN, kql_timespan_variant); + + if (!success || first != last) + throw_exception(); + + return boost::apply_visitor( + [&throw_exception](const auto & kql_timespan) -> std::optional + { + using Type = std::decay_t; + if constexpr (std::is_same_v) + { + const auto ticks = kql_timespan.toTicks(); + if (!ticks) + throw_exception(); + + return *ticks; + } + else if constexpr (std::is_same_v) + return kqlTimespanToTicks(kql_timespan.first, kql_timespan.second); + else if constexpr (std::is_same_v) + return kqlTimespanToTicks(kql_timespan, KQLTimespanUnit::Day); + else if constexpr (std::is_same_v) + return std::nullopt; + }, + kql_timespan_variant); +} + +bool ParserKQLTimespan::tryParse(const std::string_view expression, std::optional & ticks) +{ + try + { + ticks = parse(expression); + return true; + } + catch (...) + { + return false; + } +} +} diff --git a/src/Parsers/Kusto/ParserKQLTimespan.h b/src/Parsers/Kusto/ParserKQLTimespan.h new file mode 100644 index 000000000000..f98de09fa980 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTimespan.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include +#include + +namespace DB +{ +std::string kqlTicksToInterval(std::optional ticks); + +class ParserKQLTimespan +{ +public: + static std::optional parse(std::string_view expression); + static bool tryParse(std::string_view expression, std::optional & ticks); +}; +} diff --git a/src/Parsers/Kusto/ParserKQLTop.cpp b/src/Parsers/Kusto/ParserKQLTop.cpp new file mode 100644 index 000000000000..9dd6cb67c78d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTop.cpp @@ -0,0 +1,58 @@ +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLTop::parseImpl(Pos & /*pos*/, ASTPtr & /*node*/, Expected & /*expected*/) +{ + return true; +} + +bool ParserKQLTop::updatePipeLine (OperationsPos & operations, String & query) +{ + Pos pos = operations.back().second; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Syntax error near top operator", ErrorCodes::SYNTAX_ERROR); + + Pos start_pos = operations.front().second; + Pos end_pos = pos; + --end_pos; + --end_pos; + + String prev_query(start_pos->begin, end_pos->end); + + String limit_expr, sort_expr; + start_pos = pos; + end_pos = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "by") + { + auto limt_end_pos = pos; + --limt_end_pos; + limit_expr = String(start_pos->begin, limt_end_pos->end); + start_pos = pos; + ++start_pos; + } + end_pos = pos; + ++pos; + } + sort_expr = (start_pos <= end_pos) ? String(start_pos->begin, end_pos->end) : ""; + if (limit_expr.empty() || sort_expr.empty()) + throw Exception("top operator need a by clause", ErrorCodes::SYNTAX_ERROR); + + query = std::format("{} sort by {} | take {}", prev_query, sort_expr, limit_expr); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTop.h b/src/Parsers/Kusto/ParserKQLTop.h new file mode 100644 index 000000000000..8672204f4020 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTop.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTop : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL top"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool updatePipeLine (OperationsPos & operations, String & query) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTopHitter.cpp b/src/Parsers/Kusto/ParserKQLTopHitter.cpp new file mode 100644 index 000000000000..d71a56fd9c3b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTopHitter.cpp @@ -0,0 +1,77 @@ +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLTopHitters::parseImpl(Pos & /*pos*/, ASTPtr & /*node*/, Expected & /*expected*/) +{ + return true; +} + +bool ParserKQLTopHitters::updatePipeLine (OperationsPos & operations, String & query) +{ + Pos pos = operations.back().second; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Syntax error near top-hitters operator", ErrorCodes::SYNTAX_ERROR); + + Pos start_pos = operations.front().second; + Pos end_pos = pos; + --end_pos; + --end_pos; + --end_pos; + --end_pos; + + String prev_query(start_pos->begin, end_pos->end); + + String number_of_values, value_expression, summing_expression; + start_pos = pos; + end_pos = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "of") + { + auto number_end_pos = pos; + --number_end_pos; + number_of_values = String(start_pos->begin, number_end_pos->end); + start_pos = pos; + ++start_pos; + } + + if (String(pos->begin, pos->end) == "by") + { + auto expr_end_pos = pos; + --expr_end_pos; + value_expression = String(start_pos->begin, expr_end_pos->end); + start_pos = pos; + ++start_pos; + } + end_pos = pos; + ++pos; + } + + if (value_expression.empty()) + value_expression = (start_pos <= end_pos) ? String(start_pos->begin, end_pos->end) : ""; + else + summing_expression = (start_pos <= end_pos) ? String(start_pos->begin, end_pos->end) : ""; + + if (number_of_values.empty() || value_expression.empty()) + throw Exception("top-hitter operator need a ValueExpression", ErrorCodes::SYNTAX_ERROR); + + if (summing_expression.empty()) + query = std::format("{0} summarize approximate_count_{1} = count() by {1} | sort by approximate_count_{1} desc | take {2} ", prev_query, value_expression, number_of_values); + else + query = std::format("{0} summarize approximate_sum_{1} = sum({1}) by {2} | sort by approximate_sum_{1} desc | take {3}", prev_query, summing_expression, value_expression, number_of_values); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTopHitter.h b/src/Parsers/Kusto/ParserKQLTopHitter.h new file mode 100644 index 000000000000..2fa6a9b6203d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTopHitter.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTopHitters : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL top-hitters"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool updatePipeLine (OperationsPos & operations, String & query) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTopNested.cpp b/src/Parsers/Kusto/ParserKQLTopNested.cpp new file mode 100644 index 000000000000..2dba5cabb460 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTopNested.cpp @@ -0,0 +1,431 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_DIRECTION_OF_SORTING; + extern const int SYNTAX_ERROR; +} + +String ParserKQLTopNested ::calculateSingleTopNestedWithOthers( + const TopNestedClauses & top_nested_clauses, size_t layer, bool has_others, const uint32_t max_depth) +{ + const String source_table = "source_table AS (SELECT * FROM StormEvents) "; + const String & topn = getExprFromToken(top_nested_clauses[layer].topn, max_depth); + const String & expr_alias = top_nested_clauses[layer].expr_alias; + const String & expr = getExprFromToken(top_nested_clauses[layer].expr, max_depth); + const String & agg_alias = top_nested_clauses[layer].agg_alias; + const String & agg_expr = getExprFromToken(top_nested_clauses[layer].agg_expr, max_depth); + const String & order_expr = top_nested_clauses[layer].order; + + String topn_expr = topn.empty() ? "" : std::format("LIMIT {} ", topn); + String column_expr_with_aliais = expr + " AS " + expr_alias; + String agg_expr_with_aliais = std::format("{} AS {} ", agg_expr, agg_alias); + String agg_expr_value_with_aliais = std::format("{} AS {}_value ", agg_expr, agg_alias); + String query; + if (layer == 0) + { + query = std::format( + "WITH {0},tb0_normal AS (SELECT {1}, {2} FROM source_table GROUP BY {3} ORDER BY {4} {5} {6})", + source_table, + column_expr_with_aliais, + agg_expr_with_aliais, + expr_alias, + agg_alias, + order_expr, + topn_expr); + if (has_others) + query = query + + std::format( + ",tb0_others AS (SELECT {0} FROM source_table WHERE {1} NOT IN (SELECT {1} FROM tb0_normal))", + agg_expr_value_with_aliais, + expr_alias); + } + else + { + const String tb0_normal_name = std::format("tb{}_normal", layer - 1); + const String row_alias0_name = std::format("row{}", layer - 1); + + const String tb1_prev_name = std::format("tb{}_prev", layer); + const String tb1_partition_name = std::format("tb{}_partition", layer); + const String tb1_normal_name = std::format("tb{}_normal", layer); + const String tb1_others_prev_name = std::format("tb{}_others_prev", layer); + const String tb1_others_name = std::format("tb{}_others", layer); + const String row_alias1_name = std::format("row{}", layer); + + String column_list, select_list, join_list, group_list, prev_group_list; + for (size_t i = 0; i < layer; ++i) + { + const String select_tmp = std::format("{0}, {1}", top_nested_clauses[i].expr_alias, top_nested_clauses[i].agg_alias); + select_list = select_list.empty() ? select_tmp : select_list + ", " + select_tmp; + join_list = join_list.empty() ? top_nested_clauses[i].expr : join_list + ", " + top_nested_clauses[i].expr; + column_list = column_list.empty() ? top_nested_clauses[i].expr_alias : column_list + ", " + top_nested_clauses[i].expr_alias; + + prev_group_list = select_list; + } + group_list = select_list + ", " + expr_alias; + auto tb1_prev_select_list = select_list + ", " + column_expr_with_aliais + ", " + agg_expr_with_aliais; + auto tb1_partition_select_list = select_list + ", " + expr_alias + ", " + agg_alias; + auto tb1_others_select_list = select_list + ", " + expr_alias + ", " + agg_alias; + + + const String tb1_prev_query = std::format( + "{0} AS (SELECT {1} FROM {2} INNER JOIN source_table AS join1 USING ({3}) GROUP BY {4})", + tb1_prev_name, + tb1_prev_select_list, + tb0_normal_name, + join_list, + group_list); + + const String tb1_partition_query = std::format( + "{0} AS (SELECT {1}, ROW_NUMBER () over (PARTITION by {2} order by {3} {4}) AS {5} FROM {6})", + tb1_partition_name, + tb1_partition_select_list, + column_list, + agg_alias, + order_expr, + row_alias1_name, + tb1_prev_name); + + const String where_clause = (topn.empty() || layer < 1) ? "" : std::format("WHERE {} <= {}", row_alias1_name, topn); + const String tb1_normal_query + = std::format("{0} AS (SELECT {1} FROM {2} {3})", tb1_normal_name, tb1_partition_select_list, tb1_partition_name, where_clause); + + query = tb1_prev_query + "," + tb1_partition_query + "," + tb1_normal_query; + if (has_others) + { + auto tb1_others_prev_select_list = column_list + ", " + agg_expr_value_with_aliais; + auto tb1_others_prev_join_clause + = std::format("LEFT JOIN {0} USING ({1})", tb1_normal_name, column_list + ", " + expr_alias); + auto tb1_others_prev_join_where_clasue = std::format(" empty({}.{}) ", tb1_normal_name, expr); + for (size_t i = 0; i < layer; ++i) + tb1_others_prev_join_where_clasue + += std::format("AND source_table.{0} IN (SELECT {0} FROM {1}) ", top_nested_clauses[i].expr, tb1_normal_name); + + const String tb1_others_prev_query = std::format( + "{0} AS (SELECT {1} FROM source_table {2} WHERE {3} GROUP BY {4})", + tb1_others_prev_name, + tb1_others_prev_select_list, + tb1_others_prev_join_clause, + tb1_others_prev_join_where_clasue, + column_list); + + const String tb1_others_query = std::format( + "{0} AS (SELECT DISTINCT {1}, {2}_value FROM {3} RIGHT JOIN {4} USING ({5}))", + tb1_others_name, + select_list, + agg_alias, + tb1_others_prev_name, + tb1_normal_name, + column_list); + + query = query + "," + tb1_others_prev_query + "," + tb1_others_query; + } + } + return query; +} + +String ParserKQLTopNested ::calculateTopNestedWithOthers(const TopNestedClauses & top_nested_clauses, const uint32_t max_depth) +{ + String query, last_select_list, last_others_list; + auto size = top_nested_clauses.size(); + bool has_others = false; + for (size_t i = 0; i < size; ++i) + { + if (!top_nested_clauses[i].others.empty()) + { + has_others = true; + break; + } + } + + for (size_t i = 0; i < size; ++i) + { + const String single_query = calculateSingleTopNestedWithOthers(top_nested_clauses, i, has_others, max_depth); + const String others_expr + = top_nested_clauses[i].others.empty() ? "NULL" : getExprFromToken(top_nested_clauses[i].others, max_depth); + const String others_agg = top_nested_clauses[i].others.empty() ? "NULL" : std::format("{}_value", top_nested_clauses[i].agg_alias); + if (i == 0) + { + query = single_query; + last_select_list = std::format("{}, {}", top_nested_clauses[i].expr_alias, top_nested_clauses[i].agg_alias); + last_others_list = std::format( + "{} AS {}, {} AS {}", others_expr, top_nested_clauses[i].expr_alias, others_agg, top_nested_clauses[i].agg_alias); + } + else + { + query = query + "," + single_query; + last_others_list + = last_select_list + ", " + + std::format( + "{} AS {}, {} AS {}", others_expr, top_nested_clauses[i].expr_alias, others_agg, top_nested_clauses[i].agg_alias); + last_select_list + = last_select_list + ", " + std::format("{}, {}", top_nested_clauses[i].expr_alias, top_nested_clauses[i].agg_alias); + } + } + if (has_others) + for (size_t i = 0; i < size - 1; ++i) + { + auto other_values = top_nested_clauses[i].agg_alias; + String all_others_table = std::format("tb{}_all_others AS (SELECT ", i); + String seperator; + String first_list; + for (size_t j = 0; j < i; ++j) + { + if (first_list.empty()) + first_list = std::format("{}, {}", top_nested_clauses[j].expr_alias, top_nested_clauses[j].agg_alias); + else + first_list += std::format(", {}, {}", top_nested_clauses[j].expr_alias, top_nested_clauses[j].agg_alias); + } + all_others_table += first_list; + for (size_t j = i; j < size; ++j) + { + seperator = (i == 0) ? "" : ","; + if (i == 0) + { + seperator = (j == 0) ? "" : ","; + } + else + seperator = ","; + if (top_nested_clauses[j].others.empty()) + all_others_table + = all_others_table + + std::format( + "{} NULL AS {} , NULL AS {}", seperator, top_nested_clauses[j].expr_alias, top_nested_clauses[j].agg_alias); + else + all_others_table = all_others_table + + std::format("{} {} AS {} , {}_value AS {}", + seperator, + getExprFromToken(top_nested_clauses[j].others, max_depth), + top_nested_clauses[j].expr_alias, + other_values, + top_nested_clauses[j].agg_alias); + } + all_others_table += std::format(" FROM tb{}_others )", i); + query = query + "," + all_others_table; + } + + String last_normal_table = std::format("tb{}_normal", size - 1); + if (has_others) + { + String last_others_table = std::format("tb{}_others", size - 1); + query = query + + std::format( + ", last_query AS ( SELECT {0} FROM {1} UNION ALL SELECT {2} FROM {3}", + last_select_list, + last_normal_table, + last_others_list, + last_others_table); + if (size > 1 ) + { + for (size_t i = 0; i < size - 1; ++i) + { + String tb_all_others = std::format("tb{}_all_others", i); + query = query + std::format(" UNION ALL SELECT {} FROM {}", last_select_list, tb_all_others); + } + } + query += ") Select * from last_query"; + } + else + query = query + std::format(" SELECT {0} FROM {1} ", last_select_list, last_normal_table); + + return query; +} + +bool ParserKQLTopNested ::parseSingleTopNestedClause(Pos & begin_pos, Pos & last_pos, TopNestedClause & top_nested_clause, const int layer) +{ + TopNestedClause arg; + auto pos = begin_pos; + for (auto i = 0; i < 3; ++i) + ++pos; + auto start_pos = pos; + auto end_pos = pos; + + auto get_name_value = [&](Pos & begin, Pos & end, String & name, String & value) + { + Pos tmp = begin; + bool has_alias = false; + Pos value_pos = begin; + while (tmp < end) + { + if (String(tmp->begin, tmp->end) == "=") + { + --tmp; + name = String(begin->begin, tmp->end); + ++tmp; + ++tmp; + --end; + value = String(tmp->begin, end->end); + value_pos = tmp; + ++end; + has_alias = true; + break; + } + ++tmp; + } + if (!has_alias) + { + --end; + value = String(begin->begin, end->end); + ++end; + } + return value_pos; + }; + + bool has_by = false, has_of = false; + Pos expr_start_pos = begin_pos; + Pos expr_end_pos = begin_pos; + while (pos < last_pos) + { + if (String(pos->begin, pos->end) == "of") + { + has_of = true; + end_pos = pos; + --end_pos; + if (start_pos <= end_pos) + arg.topn = String(start_pos->begin, end_pos->end); + start_pos = pos; + ++start_pos; + } + + if (String(pos->begin, pos->end) == "with") + { + end_pos = pos; + expr_start_pos = get_name_value(start_pos, end_pos, arg.expr_alias, arg.expr); + expr_end_pos = end_pos; + start_pos = pos; + ++start_pos; + } + + if (String(pos->begin, pos->end) == "by") + { + has_by = true; + end_pos = pos; + if (arg.expr.empty()) + { + expr_start_pos = get_name_value(start_pos, end_pos, arg.expr_alias, arg.expr); + expr_end_pos = end_pos; + } + else + get_name_value(start_pos, end_pos, arg.others_name, arg.others); + start_pos = pos; + ++start_pos; + } + ++pos; + } + + if (!has_of) + throw Exception("Missing 'of' keyword for top-nested operator", ErrorCodes::SYNTAX_ERROR); + + if (!has_by) + throw Exception("Missing 'by' keyword for top-nested operator", ErrorCodes::SYNTAX_ERROR); + + get_name_value(start_pos, pos, arg.agg_alias, arg.agg_expr); + + if (arg.agg_expr.empty()) + throw Exception("Missing aggregation expression for top-nested operator", ErrorCodes::SYNTAX_ERROR); + + if (arg.expr_alias.empty()) + { --expr_end_pos; + if (expr_start_pos == expr_end_pos) + arg.expr_alias = arg.expr; + else + arg.expr_alias = std::format("Column{}", layer + 1); + } + + if (arg.agg_alias.empty()) + arg.agg_alias = std::format("aggregated_{}", arg.expr_alias); + + --last_pos; + + if (last_pos->type != TokenType::BareWord) + { + if (last_pos->type != TokenType::Number && last_pos->type != TokenType::ClosingRoundBracket) + throw Exception("Incorrect aggregation expression : " + arg.expr, ErrorCodes::SYNTAX_ERROR); + arg.order = "DESC"; + } + else + { + const auto sort_direct = String(last_pos->begin, last_pos->end); + if (sort_direct != "desc" && sort_direct != "asc") + throw Exception("Unknown direction of sorting : " + sort_direct, ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING); + + std::size_t found = arg.agg_expr.find(sort_direct); + arg.agg_expr = arg.agg_expr.substr(0, found); + arg.order = sort_direct; + } + + top_nested_clause = std::move(arg); + return true; +} + +bool ParserKQLTopNested ::parseTopNestedClause(Pos & pos, TopNestedClauses & top_nested_clauses) +{ + TopNestedClause top_nested_clause; + auto start_pos = pos; + for (auto i = 0; i < 3; ++i) + --start_pos; + + auto end_pos = start_pos; + auto paren_count = 0; + int layer = 0; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (String(pos->begin, pos->end) == "," and paren_count == 0) + { + end_pos = pos; + parseSingleTopNestedClause(start_pos, end_pos, top_nested_clause, layer); + ++layer; + top_nested_clauses.emplace_back(top_nested_clause); + start_pos = pos; + ++start_pos; + } + ++pos; + } + + parseSingleTopNestedClause(start_pos, pos, top_nested_clause, layer); + top_nested_clauses.emplace_back(top_nested_clause); + return true; +} + +bool ParserKQLTopNested ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + TopNestedClauses top_nested_clauses; + + parseTopNestedClause(pos, top_nested_clauses); + String query = calculateTopNestedWithOthers(top_nested_clauses, pos.max_depth); + + ASTPtr select_node; + Tokens tokens(query.c_str(), query.c_str() + query.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + if (!ParserSelectQuery().parse(new_pos, select_node, expected)) + return false; + + auto with_node = select_node->as()->with(); + + auto with_elem = with_node->children[0]->as(); + + auto sub_select = with_elem->children[0]->children[0]->children[0]->children[0]; + if (!setSubQuerySource(sub_select, node, false, false, "")) + return false; + + node = std::move(select_node); + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTopNested.h b/src/Parsers/Kusto/ParserKQLTopNested.h new file mode 100644 index 000000000000..2ec1e9ba87b4 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTopNested.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTopNested : public ParserKQLBase +{ + +protected: + struct TopNestedClause { + String topn; + String expr_alias; + String expr; + String others_name; + String others; + String agg_alias; + String agg_function; + String agg_expr; + String agg_column; + String order; + }; + using TopNestedClauses = std::vector; + const char * getName() const override { return "KQL top-nested"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + + bool parseSingleTopNestedClause(Pos & begin, Pos & end, TopNestedClause & top_nested_clause, const int layer); + bool parseTopNestedClause(Pos & pos, TopNestedClauses & top_nested_clauses); + String calculateTopNestedWithOthers(const TopNestedClauses & top_nested_clauses, const uint32_t max_depth); + String calculateSingleTopNestedWithOthers(const TopNestedClauses & top_nested_clauses, size_t layer, bool has_others, const uint32_t max_depth); +}; + +} diff --git a/src/Parsers/Kusto/Utilities.cpp b/src/Parsers/Kusto/Utilities.cpp new file mode 100644 index 000000000000..6213faeeadf8 --- /dev/null +++ b/src/Parsers/Kusto/Utilities.cpp @@ -0,0 +1,26 @@ +#include "Utilities.h" + +#include "KustoFunctions/IParserKQLFunction.h" + +namespace DB +{ +String extractLiteralArgumentWithoutQuotes(const std::string & function_name, IParser::Pos & pos) +{ + ++pos; + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + { + auto result = extractTokenWithoutQuotes(pos); + ++pos; + return result; + } + + --pos; + return IParserKQLFunction::getArgument(function_name, pos, IParserKQLFunction::ArgumentState::Raw); +} + +String extractTokenWithoutQuotes(IParser::Pos & pos) +{ + const auto offset = static_cast(pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral); + return {pos->begin + offset, pos->end - offset}; +} +} diff --git a/src/Parsers/Kusto/Utilities.h b/src/Parsers/Kusto/Utilities.h new file mode 100644 index 000000000000..942252f10868 --- /dev/null +++ b/src/Parsers/Kusto/Utilities.h @@ -0,0 +1,7 @@ +#include + +namespace DB +{ +String extractLiteralArgumentWithoutQuotes(const std::string & function_name, IParser::Pos & pos); +String extractTokenWithoutQuotes(IParser::Pos & pos); +} diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 6bd27ee62aea..40e358304176 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -182,7 +182,7 @@ Token Lexer::nextTokenImpl() for (const char * iterator = token_begin; iterator < pos; ++iterator) { - if (!isWordCharASCII(*iterator) && *iterator != '$') + if (!isWordCharASCII(*iterator) && *iterator != '$' && *iterator != '.') return Token(TokenType::ErrorWrongNumber, token_begin, pos); } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 90df8a8f79a0..9408fce3d781 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -20,7 +20,7 @@ #include #include #include - +#include namespace DB { @@ -631,17 +631,25 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe /// ENGINE can not be specified for table functions. if (storage || !table_function_p.parse(pos, as_table_function, expected)) { - /// AS [db.]table - if (!name_p.parse(pos, as_table, expected)) - return false; - - if (s_dot.ignore(pos, expected)) + ParserKeyword s_kql("KQL"); + if (s_kql.ignore(pos, expected)) { - as_database = as_table; - if (!name_p.parse(pos, as_table, expected)) + if (!ParserKQLTaleFunction().parse(pos, select, expected)) return false; } + else + { + /// AS [db.]table + if (!name_p.parse(pos, as_table, expected)) + return false; + if (s_dot.ignore(pos, expected)) + { + as_database = as_table; + if (!name_p.parse(pos, as_table, expected)) + return false; + } + } /// Optional - ENGINE can be specified. if (!storage) storage_p.parse(pos, storage, expected); diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index 7f8a8d59fd05..5263a7fb3adf 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -12,7 +12,7 @@ #include #include #include "Parsers/IAST_fwd.h" - +#include namespace DB { @@ -47,6 +47,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserFunction table_function_p{false}; ParserStringLiteral infile_name_p; ParserExpressionWithOptionalAlias exp_elem_p(false); + ParserKeyword s_kql("KQL"); /// create ASTPtr variables (result of parsing will be put in them). /// They will be used to initialize ASTInsertQuery's fields. @@ -183,6 +184,11 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserWatchQuery watch_p; watch_p.parse(pos, watch, expected); } + else if (!infile && s_kql.ignore(pos, expected)) + { + if (!ParserKQLTaleFunction().parse(pos, select, expected)) + return false; + } else if (!infile) { /// If all previous conditions were false and it's not FROM INFILE, query is incorrect diff --git a/src/Parsers/TokenIterator.cpp b/src/Parsers/TokenIterator.cpp index 08877e0b2fe7..fb1a176f4fa1 100644 --- a/src/Parsers/TokenIterator.cpp +++ b/src/Parsers/TokenIterator.cpp @@ -1,18 +1,61 @@ #include - +#include +#include namespace DB { UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin) { + std::unordered_set valid_kql_negative_suffix + ({ + "contains", + "contains_cs", + "endswith", + "endswith_cs", + "~", + "=", + "has", + "has_cs", + "hasprefix", + "hasprefix_cs", + "hassuffix", + "hassuffix_cs", + "in", + "startswith", + "startswith_cs" + }); /// We have just two kind of parentheses: () and []. UnmatchedParentheses stack; /// We have to iterate through all tokens until the end to avoid false positive "Unmatched parentheses" error /// when parser failed in the middle of the query. - for (TokenIterator it = begin; it.isValid(); ++it) + bool is_kql_included = false; + for (TokenIterator it = begin; !it->isEnd() ; ++it) { + //for kql function in sql like : select * from kql(Customers | where FirstName !in ("test", "test2")); + if (String(it.get().begin, it.get().end) == "kql") + { + ++it; + if (it->type == TokenType::OpeningRoundBracket) + is_kql_included = true; + --it; + } + + if (!it.isValid()) + { + if (!is_kql_included) + break; + + if (it->type == TokenType::ErrorSingleExclamationMark) + { + ++it; + if (valid_kql_negative_suffix.find(String(it.get().begin, it.get().end)) == valid_kql_negative_suffix.end()) + break; + --it; + } + } + if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket) { stack.push_back(*it); diff --git a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp new file mode 100644 index 000000000000..d40b6da575bd --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp @@ -0,0 +1,101 @@ +#include +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Aggregate, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | summarize t = stdev(Age) by FirstName", + "SELECT\n FirstName,\n sqrt(varSamp(Age)) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = stdevif(Age, Age < 10) by FirstName", + "SELECT\n FirstName,\n sqrt(varSampIf(Age, Age < 10)) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = binary_all_and(Age) by FirstName", + "SELECT\n FirstName,\n groupBitAnd(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = binary_all_or(Age) by FirstName", + "SELECT\n FirstName,\n groupBitOr(Age) AS t\nFROM Customers\nGROUP BY FirstName" + + }, + { + "Customers | summarize t = binary_all_xor(Age) by FirstName", + "SELECT\n FirstName,\n groupBitXor(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName", + "SELECT\n FirstName,\n quantiles(30 / 100, 40 / 100, 50 / 100, 60 / 100, 70 / 100)(Age) AS percentiles_Age\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName", + "SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName", + "SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)", + "SELECT quantilesExactWeighted(50 / 100, 75 / 100, 99.9 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + }, + { + "DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))", + "SELECT quantilesExactWeighted(10 / 100, 50 / 100, 30 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + }, + { + "Customers | summarize t = percentile(Age, 50) by FirstName", + "SELECT\n FirstName,\n quantile(50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "DataTable | summarize t = percentilew(Bucket, Frequency, 50)", + "SELECT quantileExactWeighted(50 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + }, + { + "Customers | summarize t = make_list_with_nulls(Age) by FirstName", + "SELECT\n FirstName,\n arrayConcat(groupArray(Age), arrayMap(x -> NULL, range(0, toUInt32(count(*) - length(groupArray(Age))), 1))) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize count() by bin(Age, 10)", + "SELECT\n kql_bin(Age, 10) AS Age,\n count() AS count_\nFROM Customers\nGROUP BY Age" + }, + { + "Customers | summarize count(Age+1) by bin(Age+1, 10)", + "SELECT\n kql_bin(Age + 1, 10) AS Columns1,\n count(Age + 1) AS count_\nFROM Customers\nGROUP BY Columns1" + }, + { + "Customers | summarize count(Age) by bin(Age, 10)", + "SELECT\n kql_bin(Age, 10) AS Age,\n count(Age) AS count_Age\nFROM Customers\nGROUP BY Age" + }, + { + "Customers | summarize count_distinct(Education)", + "SELECT countDistinct(Education) AS Columns1\nFROM Customers" + }, + { + "Customers | summarize count_distinctif(Education,Age >30)", + "SELECT countIfDistinct(Education, Age > 30) AS Columns1\nFROM Customers" + }, + { + "Customers | summarize take_any(FirstName)" + "SELECT any(FirstName) AS take_any_FirstName\nFROM Customers" + }, + { + "Customers | summarize take_any(FirstName), take_any(LastName)" + "SELECT\n any(FirstName) AS take_any_FirstName,\n any(LastName) AS take_any_LastName\nFROM Customers" + }, + { + "Customers | summarize take_any(FirstName, LastName) by FirstName, LastName" + "SELECT\n FirstName,\n LastName,\n any(FirstName),\n any(LastName) AS take_any_FirstName\nFROM Customers\nGROUP BY\n FirstName,\n LastName" + }, + { + "Customers | summarize take_anyif(FirstName, LastName has 'Diaz')" + "SELECT anyIf(FirstName, hasTokenCaseInsensitive(LastName, 'Diaz')) AS take_anyif_FirstName\nFROM Customers" + }, + { + "Customers | summarize take_anyif(FirstName, LastName has 'Diaz'), dcount(FirstName)" + "SELECT\n anyIf(FirstName, hasTokenCaseInsensitive(LastName, 'Diaz')) AS take_anyif_FirstName,\n countDistinct(FirstName) AS dcount_FirstName\nFROM Customers" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp new file mode 100644 index 000000000000..a1b26ee56147 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp @@ -0,0 +1,37 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Binary, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print binary_and(A, B)", + "SELECT bitAnd(CAST(A, 'Int64'), CAST(B, 'Int64'))" + }, + { + "print binary_not(A)", + "SELECT bitNot(CAST(A, 'Int64'))" + }, + { + "print binary_or(A, B)", + "SELECT bitOr(CAST(A, 'Int64'), CAST(B, 'Int64'))" + }, + { + "print binary_shift_left(A, B)", + "SELECT if(B < 0, NULL, bitShiftLeft(CAST(A, 'Int64'), B))" + }, + { + "print binary_shift_right(A, B)", + "SELECT if(B < 0, NULL, bitShiftRight(CAST(A, 'Int64'), B))" + }, + { + "print binary_xor(A, B)", + "SELECT bitXor(CAST(A, 'Int64'), CAST(B, 'Int64'))" + }, + { + "print bitset_count_ones(A)", + "SELECT bitCount(A)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp b/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp new file mode 100644 index 000000000000..afaf0a20d55b --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp @@ -0,0 +1,80 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P( + ParserKQLQuery_Conversion, + ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print tobool(A)", + "SELECT multiIf(toString(A) = 'true', true, toString(A) = 'false', false, toInt64OrNull(toString(A)) != 0)" + }, + { + "print toboolean(A)", + "SELECT multiIf(toString(A) = 'true', true, toString(A) = 'false', false, toInt64OrNull(toString(A)) != 0)" + }, + { + "print todouble(A)", + "SELECT toFloat64OrNull(toString(A)) / if(toTypeName(A) = 'IntervalNanosecond', 100, 1)" + }, + { + "print toint(A)", + "SELECT toInt32OrNull(toString(A)) / if(toTypeName(A) = 'IntervalNanosecond', 100, 1)" + }, + { + "print tolong(A)", + "SELECT toInt64OrNull(toString(A)) / if(toTypeName(A) = 'IntervalNanosecond', 100, 1)" + }, + { + "print toreal(A)", + "SELECT toFloat64OrNull(toString(A)) / if(toTypeName(A) = 'IntervalNanosecond', 100, 1)" + }, + { + "print tostring(A)", + "SELECT ifNull(kql_tostring(A), '')" + }, + { + "print decimal(123.345)", + "SELECT toDecimal128(CAST('123.345', 'String'), 32)" + }, + { + "print decimal(NULL)", + "SELECT NULL" + }, + { + "print todecimal('123.45')", + "SELECT if((toTypeName('123.45') = 'String') OR (toTypeName('123.45') = 'FixedString'), toDecimal128OrNull(CAST('123.45', 'String'), abs(34 - CAST(if(position(CAST('123.45', 'String'), 'e') = 0, if(countSubstrings(CAST('123.45', 'String'), '.') = 1, length(substr(CAST('123.45', 'String'), position(CAST('123.45', 'String'), '.') + 1)), 0), toUInt64(multiIf((position(CAST('123.45', 'String'), 'e+') AS x) > 0, substr(CAST('123.45', 'String'), x + 2), (position(CAST('123.45', 'String'), 'e-') AS y) > 0, substr(CAST('123.45', 'String'), y + 2), (position(CAST('123.45', 'String'), 'e-') = 0) AND (position(CAST('123.45', 'String'), 'e+') = 0) AND (position(CAST('123.45', 'String'), 'e') > 0), substr(CAST('123.45', 'String'), position(CAST('123.45', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8'))), toDecimal128OrNull(CAST('123.45', 'String'), abs(17 - CAST(if(position(CAST('123.45', 'String'), 'e') = 0, if(countSubstrings(CAST('123.45', 'String'), '.') = 1, length(substr(CAST('123.45', 'String'), position(CAST('123.45', 'String'), '.') + 1)), 0), toUInt64(multiIf(x > 0, substr(CAST('123.45', 'String'), x + 2), y > 0, substr(CAST('123.45', 'String'), y + 2), (position(CAST('123.45', 'String'), 'e-') = 0) AND (position(CAST('123.45', 'String'), 'e+') = 0) AND (position(CAST('123.45', 'String'), 'e') > 0), substr(CAST('123.45', 'String'), position(CAST('123.45', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8'))))" + }, + { + "print todecimal(NULL)", + "SELECT toDecimal128OrNull(CAST(NULL, 'Nullable(String)'), 17) / if(toTypeName(NULL) = 'IntervalNanosecond', 100, 1)" + }, + { + "print todecimal(123456.3456)", + "SELECT if((toTypeName(123456.3456) = 'String') OR (toTypeName(123456.3456) = 'FixedString'), toDecimal128OrNull(CAST('123456.3456', 'String'), abs(34 - CAST(if(position(CAST('123456.3456', 'String'), 'e') = 0, if(countSubstrings(CAST('123456.3456', 'String'), '.') = 1, length(substr(CAST('123456.3456', 'String'), position(CAST('123456.3456', 'String'), '.') + 1)), 0), toUInt64(multiIf((position(CAST('123456.3456', 'String'), 'e+') AS x) > 0, substr(CAST('123456.3456', 'String'), x + 2), (position(CAST('123456.3456', 'String'), 'e-') AS y) > 0, substr(CAST('123456.3456', 'String'), y + 2), (position(CAST('123456.3456', 'String'), 'e-') = 0) AND (position(CAST('123456.3456', 'String'), 'e+') = 0) AND (position(CAST('123456.3456', 'String'), 'e') > 0), substr(CAST('123456.3456', 'String'), position(CAST('123456.3456', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8'))), toDecimal128OrNull(CAST('123456.3456', 'String'), abs(17 - CAST(if(position(CAST('123456.3456', 'String'), 'e') = 0, if(countSubstrings(CAST('123456.3456', 'String'), '.') = 1, length(substr(CAST('123456.3456', 'String'), position(CAST('123456.3456', 'String'), '.') + 1)), 0), toUInt64(multiIf(x > 0, substr(CAST('123456.3456', 'String'), x + 2), y > 0, substr(CAST('123456.3456', 'String'), y + 2), (position(CAST('123456.3456', 'String'), 'e-') = 0) AND (position(CAST('123456.3456', 'String'), 'e+') = 0) AND (position(CAST('123456.3456', 'String'), 'e') > 0), substr(CAST('123456.3456', 'String'), position(CAST('123456.3456', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8'))))" + }, + { + "print todecimal('abc')", + "SELECT if((toTypeName('abc') = 'String') OR (toTypeName('abc') = 'FixedString'), toDecimal128OrNull(CAST('abc', 'String'), abs(34 - CAST(if(position(CAST('abc', 'String'), 'e') = 0, if(countSubstrings(CAST('abc', 'String'), '.') = 1, length(substr(CAST('abc', 'String'), position(CAST('abc', 'String'), '.') + 1)), 0), toUInt64(multiIf((position(CAST('abc', 'String'), 'e+') AS x) > 0, substr(CAST('abc', 'String'), x + 2), (position(CAST('abc', 'String'), 'e-') AS y) > 0, substr(CAST('abc', 'String'), y + 2), (position(CAST('abc', 'String'), 'e-') = 0) AND (position(CAST('abc', 'String'), 'e+') = 0) AND (position(CAST('abc', 'String'), 'e') > 0), substr(CAST('abc', 'String'), position(CAST('abc', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8'))), toDecimal128OrNull(CAST('abc', 'String'), abs(17 - CAST(if(position(CAST('abc', 'String'), 'e') = 0, if(countSubstrings(CAST('abc', 'String'), '.') = 1, length(substr(CAST('abc', 'String'), position(CAST('abc', 'String'), '.') + 1)), 0), toUInt64(multiIf(x > 0, substr(CAST('abc', 'String'), x + 2), y > 0, substr(CAST('abc', 'String'), y + 2), (position(CAST('abc', 'String'), 'e-') = 0) AND (position(CAST('abc', 'String'), 'e+') = 0) AND (position(CAST('abc', 'String'), 'e') > 0), substr(CAST('abc', 'String'), position(CAST('abc', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8'))))" + }, + { + "print todecimal('1e5')", + "SELECT if((toTypeName('1e5') = 'String') OR (toTypeName('1e5') = 'FixedString'), toDecimal128OrNull(CAST('1e5', 'String'), abs(34 - CAST(if(position(CAST('1e5', 'String'), 'e') = 0, if(countSubstrings(CAST('1e5', 'String'), '.') = 1, length(substr(CAST('1e5', 'String'), position(CAST('1e5', 'String'), '.') + 1)), 0), toUInt64(multiIf((position(CAST('1e5', 'String'), 'e+') AS x) > 0, substr(CAST('1e5', 'String'), x + 2), (position(CAST('1e5', 'String'), 'e-') AS y) > 0, substr(CAST('1e5', 'String'), y + 2), (position(CAST('1e5', 'String'), 'e-') = 0) AND (position(CAST('1e5', 'String'), 'e+') = 0) AND (position(CAST('1e5', 'String'), 'e') > 0), substr(CAST('1e5', 'String'), position(CAST('1e5', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8'))), toDecimal128OrNull(CAST('1e5', 'String'), abs(17 - CAST(if(position(CAST('1e5', 'String'), 'e') = 0, if(countSubstrings(CAST('1e5', 'String'), '.') = 1, length(substr(CAST('1e5', 'String'), position(CAST('1e5', 'String'), '.') + 1)), 0), toUInt64(multiIf(x > 0, substr(CAST('1e5', 'String'), x + 2), y > 0, substr(CAST('1e5', 'String'), y + 2), (position(CAST('1e5', 'String'), 'e-') = 0) AND (position(CAST('1e5', 'String'), 'e+') = 0) AND (position(CAST('1e5', 'String'), 'e') > 0), substr(CAST('1e5', 'String'), position(CAST('1e5', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8'))))" + }, + { + "print decimal(1e-5)", + "SELECT toDecimal128(CAST('1e-5', 'String'), 5)" + }, + { + "print time(9nanoseconds)", + "SELECT toIntervalNanosecond(0)" + }, + { + "print time(1tick)", + "SELECT toIntervalNanosecond(100)" + } + +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Count.cpp b/src/Parsers/tests/KQL/gtest_KQL_Count.cpp new file mode 100644 index 000000000000..fed500360713 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Count.cpp @@ -0,0 +1,25 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Count, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | count", + "SELECT count() AS Count\nFROM Customers" + }, + { + "Customers | where Age< 30 | count", + "SELECT count() AS Count\nFROM Customers\nWHERE Age < 30" + }, + { + "Customers | where Age< 30 | limit 2| count", + "SELECT count() AS Count\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n LIMIT 2\n)" + }, + { + "Customers | where Age< 30 | limit 2 | count | project Count", + "SELECT Count\nFROM\n(\n SELECT count() AS Count\n FROM\n (\n SELECT *\n FROM Customers\n WHERE Age < 30\n LIMIT 2\n )\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_DataType.cpp b/src/Parsers/tests/KQL/gtest_KQL_DataType.cpp new file mode 100644 index 000000000000..dae854c9a089 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_DataType.cpp @@ -0,0 +1,65 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DataType, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print dynamic(null)", + "SELECT NULL" + }, + { + "print dynamic(1)", + "SELECT 1" + }, + { + "print dynamic(datetime(1))", + "SELECT kql_datetime(1)" + }, + { + "print dynamic(timespan(1d))", + "SELECT toIntervalNanosecond(86400000000000)" + }, + { + "print dynamic(parse_ipv4('127.0.0.1'))", + "throws" + }, + { + "print dynamic({ \"a\": 9 })", + "throws" + }, + { + "print dynamic([1, 2, 3])", + "SELECT [1, 2, 3]" + }, + { + "print dynamic([1, dynamic([2]), 3])", + "SELECT [1, [2], 3]" + }, + { + "print dynamic([[1], [2], [3]])", + "SELECT [[1], [2], [3]]" + }, + { + "print dynamic(['a', \"b\", 'c'])", + "SELECT ['a', 'b', 'c']" + }, + { + "print dynamic([1, 'a', true, false])", + "SELECT [1, 'a', true, false]" + }, + { + "print dynamic([date(1), time(1d), 1, 2])", + "SELECT [kql_datetime(1), toIntervalNanosecond(86400000000000), 1, 2]" + }, + { + "print time('13:00:40.00000')", + "SELECT toIntervalNanosecond(46840000000000)" + }, + { + "print timespan('12.23:12:23');", + "SELECT toIntervalNanosecond(1120343000000000)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Distinct.cpp b/src/Parsers/tests/KQL/gtest_KQL_Distinct.cpp new file mode 100644 index 000000000000..7200230bfc7e --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Distinct.cpp @@ -0,0 +1,33 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Distinct, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | distinct *", + "SELECT DISTINCT *\nFROM Customers" + }, + { + "Customers | distinct Occupation", + "SELECT DISTINCT Occupation\nFROM Customers" + }, + { + "Customers | distinct Occupation, Education", + "SELECT DISTINCT\n Occupation,\n Education\nFROM Customers" + }, + { + "Customers |where Age <30| distinct Occupation, Education", + "SELECT DISTINCT\n Occupation,\n Education\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n)" + }, + { + "Customers |where Age <30 | order by Age| distinct Occupation, Education", + "SELECT DISTINCT\n Occupation,\n Education\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n ORDER BY Age DESC\n)" + }, + { + "Customers | project a = (Age % 10) | distinct a;", + "SELECT DISTINCT a\nFROM\n(\n SELECT Age % 10 AS a\n FROM Customers\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp new file mode 100644 index 000000000000..a3212103508b --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp @@ -0,0 +1,136 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicExactMatch, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print array_concat(A, B)", + "SELECT arrayConcat(A, B)" + }, + { + "print array_concat(A, B, C, D)", + "SELECT arrayConcat(A, B, C, D)" + }, + { + "print array_iff(A, B, C)", + "SELECT kql_ArrayIif(A, B, C)" + }, + { + "print output = array_index_of(dynamic([1, 2, 3]), 2)", + "SELECT indexOf([1, 2, 3], 2) - 1 AS output" + }, + { + "print output = array_index_of(dynamic(['a', 'b', 'c']), 'b')", + "SELECT indexOf(['a', 'b', 'c'], 'b') - 1 AS output" + }, + { + "print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')", + "SELECT indexOf(['John', 'Denver', 'Bob', 'Marley'], 'Marley') - 1 AS output" + }, + + { + "print array_length(dynamic([1, 2, 3]))", + "SELECT arrayLastIndex(x -> true, [1, 2, 3])" + }, + { + "print array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))", + "SELECT arrayLastIndex(x -> true, ['John', 'Denver', 'Bob', 'Marley'])" + }, + { + "print array_reverse(A)", + "SELECT arrayReverse(A)" + }, + { + "print array_rotate_left(A, B)", + "SELECT arrayMap(x -> (A[moduloOrZero((x + length(A)) + moduloOrZero(B, toInt64(length(A))), length(A)) + 1]), range(0, length(A)))" + }, + { + "print array_rotate_right(A, B)", + "SELECT arrayMap(x -> (A[moduloOrZero((x + length(A)) + moduloOrZero(-1 * B, toInt64(length(A))), length(A)) + 1]), range(0, length(A)))" + }, + { + "print array_sum(dynamic([2, 5, 3]))", + "SELECT if(multiSearchAny(extract(toTypeName(arrayMap(x -> assumeNotNull(x), arrayFilter(x -> (x IS NOT NULL), [2, 5, 3]))), 'Array\\\\((.*)\\\\)'), ['Bool', 'Decimal', 'Float', 'Int', 'Nothing', 'UInt']), arraySum(x -> toFloat64OrDefault(x), [2, 5, 3]), NULL)" + }, + { + "print array_sum(dynamic([2.5, 5.5, 3]))", + "SELECT if(multiSearchAny(extract(toTypeName(arrayMap(x -> assumeNotNull(x), arrayFilter(x -> (x IS NOT NULL), [2.5, 5.5, 3]))), 'Array\\\\((.*)\\\\)'), ['Bool', 'Decimal', 'Float', 'Int', 'Nothing', 'UInt']), arraySum(x -> toFloat64OrDefault(x), [2.5, 5.5, 3]), NULL)" + }, + { + "print jaccard_index(A, B)", + "SELECT length(arrayIntersect(A, B)) / length(arrayDistinct(arrayConcat(A, B)))" + }, + { + "print pack_array(A, B, C, D)", + "SELECT [A, B, C, D]" + }, + { + "print set_difference(A, B)", + "SELECT arrayFilter(x -> (NOT has(arrayDistinct(arrayConcat(B)), x)), arrayDistinct(A))" + }, + { + "print set_difference(A, B, C)", + "SELECT arrayFilter(x -> (NOT has(arrayDistinct(arrayConcat(B, C)), x)), arrayDistinct(A))" + }, + { + "print set_has_element(A, B)", + "SELECT has(A, B)" + }, + { + "print set_intersect(A, B)", + "SELECT arrayIntersect(A, B)" + }, + { + "print set_intersect(A, B, C)", + "SELECT arrayIntersect(A, B, C)" + }, + { + "print set_union(A, B)", + "SELECT arrayDistinct(arrayConcat(A, B))" + }, + { + "print set_union(A, B, C)", + "SELECT arrayDistinct(arrayConcat(A, B, C))" + } +}))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicRegex, ParserRegexTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print array_shift_left(A, B)", + R"(SELECT arrayResize\(if\(B > 0, arraySlice\(A, B \+ 1\), arrayConcat\(arrayWithConstant\(abs\(B\), fill_value_\d+\), A\)\), length\(A\), if\(\(NULL IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)*'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), NULL\) AS fill_value_\d+\))" + }, + { + "print array_shift_left(A, B, C)", + R"(SELECT arrayResize\(if\(B > 0, arraySlice\(A, B \+ 1\), arrayConcat\(arrayWithConstant\(abs\(B\), fill_value_\d+\), A\)\), length\(A\), if\(\(C IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), C\) AS fill_value_\d+\))" + }, + { + "print array_shift_right(A, B)", + R"(SELECT arrayResize\(if\(\(-1 \* B\) > 0, arraySlice\(A, \(-1 \* B\) \+ 1\), arrayConcat\(arrayWithConstant\(abs\(-1 \* B\), fill_value_\d+\), A\)\), length\(A\), if\(\(NULL IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), NULL\) AS fill_value_\d+\))" + }, + { + "print array_shift_right(A, B, C)", + R"(SELECT arrayResize\(if\(\(-1 \* B\) > 0, arraySlice\(A, \(-1 \* B\) \+ 1\), arrayConcat\(arrayWithConstant\(abs\(-1 \* B\), fill_value_\d+\), A\)\), length\(A\), if\(\(C IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), C\) AS fill_value_\d+\))" + }, + { + "print array_slice(A, B, C)", + R"(SELECT arraySlice\(A, 1 \+ if\(B >= 0, B, arrayMax\(\[-length\(A\), B\]\) \+ length\(A\)\) AS offset_\d+, \(\(1 \+ if\(C >= 0, C, arrayMax\(\[-length\(A\), C\]\) \+ length\(A\)\)\) - offset_\d+\) \+ 1\))" + }, + { + "print array_split(A, B)", + R"(SELECT if\(empty\(arrayMap\(x -> if\(x >= 0, x, arrayMax\(\[0, x \+ CAST\(length\(A\), 'Int\d+'\)\]\)\), flatten\(\[B\]\)\) AS indices_\d+\), \[A\], arrayConcat\(\[arraySlice\(A, 1, indices_\d+\[1\]\)\], arrayMap\(i -> arraySlice\(A, \(indices_\d+\[i\]\) \+ 1, if\(i = length\(indices_\d+\), CAST\(length\(A\), 'Int\d+'\), CAST\(indices_\d+\[i \+ 1\], 'Int\d+'\)\) - \(indices_\d+\[i\]\)\), range\(1, length\(indices_\d+\) \+ 1\)\)\)\))" + }, + { + "print zip(A, B)", + R"(SELECT arrayMap\(t -> \[untuple\(t\)\], arrayZip\(arrayResize\(arg0_\d+, arrayMax\(\[length\(if\(match\(toTypeName\(A\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), A, CAST\(A, concat\('Array\(', extract\(toTypeName\(if\(length\(A\) = 0, \[NULL\], A\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg0_\d+\), length\(if\(match\(toTypeName\(B\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), B, CAST\(B, concat\('Array\(', extract\(toTypeName\(if\(length\(B\) = 0, \[NULL\], B\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg1_\d+\)\]\) AS max_length_\d+, NULL\), arrayResize\(arg1_\d+, max_length_\d+, NULL\)\)\))" + }, + { + "print zip(A, B, C)", + R"(SELECT arrayMap\(t -> \[untuple\(t\)\], arrayZip\(arrayResize\(arg0_\d+, arrayMax\(\[length\(if\(match\(toTypeName\(A\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), A, CAST\(A, concat\('Array\(', extract\(toTypeName\(if\(length\(A\) = 0, \[NULL\], A\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg0_\d+\), length\(if\(match\(toTypeName\(B\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), B, CAST\(B, concat\('Array\(', extract\(toTypeName\(if\(length\(B\) = 0, \[NULL\], B\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg1_\d+\), length\(if\(match\(toTypeName\(C\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), C, CAST\(C, concat\('Array\(', extract\(toTypeName\(if\(length\(C\) = 0, \[NULL\], C\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg2_\d+\)\]\) AS max_length_\d+, NULL\), arrayResize\(arg1_\d+, max_length_\d+, NULL\), arrayResize\(arg2_\d+, max_length_\d+, NULL\)\)\))" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_General.cpp b/src/Parsers/tests/KQL/gtest_KQL_General.cpp new file mode 100644 index 000000000000..c88c2751b96b --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_General.cpp @@ -0,0 +1,61 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_General, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print t = case(5 <= 10, 'A', 12 <= 20, 'B', 22 <= 30, 'C', 'D')", + "SELECT multiIf(5 <= 10, 'A', 12 <= 20, 'B', 22 <= 30, 'C', 'D') AS t" + }, + { + "Customers | extend t = case(Age <= 10, 'A', Age <= 20, 'B', Age <= 30, 'C', 'D')", + "SELECT\n * EXCEPT t,\n multiIf(Age <= 10, 'A', Age <= 20, 'B', Age <= 30, 'C', 'D') AS t\nFROM Customers" + }, + { + "Customers | extend t = iff(Age < 20, 'little', 'big')", + "SELECT\n * EXCEPT t,\n If(Age < 20, 'little', 'big') AS t\nFROM Customers" + }, + { + "Customers | extend t = iif(Age < 20, 'little', 'big')", + "SELECT\n * EXCEPT t,\n If(Age < 20, 'little', 'big') AS t\nFROM Customers" + }, + { + "print res = bin_at(6.5, 2.5, 7)", + "SELECT kql_bin_at(6.5, 2.5, 7) AS res" + }, + { + "print res = bin_at(1h, 1d, 12h)", + "SELECT kql_bin_at(toIntervalNanosecond(3600000000000), toIntervalNanosecond(86400000000000), toIntervalNanosecond(43200000000000)) AS res" + }, + { + "print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))", + "SELECT kql_bin_at(kql_datetime('2017-05-15 10:20:00.0'), toIntervalNanosecond(86400000000000), kql_datetime('1970-01-01 12:00:00.0')) AS res" + }, + { + "print bin(4.5, 1)", + "SELECT kql_bin(4.5, 1)" + }, + { + "print bin(4.5, -1)", + "SELECT kql_bin(4.5, -1)" + }, + { + "print bin(time(16d), 7d)", + "SELECT kql_bin(toIntervalNanosecond(1382400000000000), toIntervalNanosecond(604800000000000))" + }, + { + "print bin(datetime(1970-05-11 13:45:07), 1d)", + "SELECT kql_bin(kql_datetime('1970-05-11 13:45:07'), toIntervalNanosecond(86400000000000))" + }, + { + "print bin(datetime(1970-05-11 13:45:07.456345672), 1ms)", + "SELECT kql_bin(kql_datetime('1970-05-11 13:45:07.456345672'), toIntervalNanosecond(1000000))" + }, + { + "print bin(datetime(1970-05-11 13:45:07.456345672), 1microseconds)", + "SELECT kql_bin(kql_datetime('1970-05-11 13:45:07.456345672'), toIntervalNanosecond(1000))" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp new file mode 100644 index 000000000000..bccc6b3bb282 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -0,0 +1,85 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print format_ipv4(A)", + R"(SELECT ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(A\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(A\) = 'String'\)\) OR \(32 < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - 32\) - 1\)\)\)\)\), ''\))" + }, + { + "print format_ipv4(A, B)", + R"(SELECT ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(A\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(A\) = 'String'\)\) OR \(B < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - B\) - 1\)\)\)\)\), ''\))" + }, + { + "print format_ipv4_mask(A)", + R"(SELECT if\(empty\(ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(A\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(A\) = 'String'\)\) OR \(32 < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\)\.1, NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - 32\) - 1\)\)\)\)\), ''\) AS formatted_ip_\d+\) OR \(position\(toTypeName\(32\), 'Int'\) = 0\) OR \(NOT \(\(32 >= 0\) AND \(32 <= 32\)\)\), '', concat\(formatted_ip_\d+, '/', toString\(toInt64\(min2\(32, ifNull\(multiIf\(\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS suffix_\d+, 32\)\)\)\)\)\))" + }, + { + "print format_ipv4_mask(A, B)", + R"(SELECT if\(empty\(ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(A\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(A\) = 'String'\)\) OR \(B < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\)\.1, NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - B\) - 1\)\)\)\)\), ''\) AS formatted_ip_\d+\) OR \(position\(toTypeName\(B\), 'Int'\) = 0\) OR \(NOT \(\(B >= 0\) AND \(B <= 32\)\)\), '', concat\(formatted_ip_\d+, '/', toString\(toInt64\(min2\(B, ifNull\(multiIf\(\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS suffix_\d+, 32\)\)\)\)\)\))" + }, + { + "print ipv4_compare(A, B)", + R"(SELECT if\(\(\(multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, NULL\) AS lhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS lhs_mask_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, NULL\) AS rhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS rhs_mask_\d+\) IS NULL\), NULL, sign\(toInt64\(IPv4CIDRToRange\(assumeNotNull\(lhs_ip_\d+\), toUInt8\(min2\(32, min2\(assumeNotNull\(lhs_mask_\d+\), assumeNotNull\(rhs_mask_\d+\)\)\)\) AS mask_\d+\).1\) - toInt64\(IPv4CIDRToRange\(assumeNotNull\(rhs_ip_\d+\), mask_\d+\).1\)\)\))" + }, + { + "print ipv4_compare(A, B, C)", + R"(SELECT if\(\(\(multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, NULL\) AS lhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS lhs_mask_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, NULL\) AS rhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS rhs_mask_\d+\) IS NULL\), NULL, sign\(toInt64\(IPv4CIDRToRange\(assumeNotNull\(lhs_ip_\d+\), toUInt8\(min2\(C, min2\(assumeNotNull\(lhs_mask_\d+\), assumeNotNull\(rhs_mask_\d+\)\)\)\) AS mask_\d+\).1\) - toInt64\(IPv4CIDRToRange\(assumeNotNull\(rhs_ip_\d+\), mask_\d+\).1\)\)\))" + }, + { + "print ipv6_compare(A, B)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS lhs_tokens_\d+\) > 2\) OR \(length\(splitByChar\('/', B\) AS rhs_tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(lhs_tokens_\d+\[1\]\) AS lhs_ipv6_\d+\) IS NULL\) OR \(\(length\(lhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(lhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(lhs_tokens_\d+\[-1\]\)\) AS lhs_suffix_\d+\) IS NULL\)\) OR \(\(IPv6StringToNumOrNull\(rhs_tokens_\d+\[1\]\) AS rhs_ipv6_\d+\) IS NULL\) OR \(\(length\(rhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(rhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(rhs_tokens_\d+\[-1\]\)\) AS rhs_suffix_\d+\) IS NULL\)\) OR \(\(toUInt8\(min2\(128, min2\(ifNull\(lhs_suffix_\d+, 128\), ifNull\(rhs_suffix_\d+, 128\)\)\)\) AS suffix_\d+\) IS NULL\) OR \(\(bitShiftLeft\(bitShiftRight\(bitNot\(reinterpretAsFixedString\(CAST\('0', 'UInt128'\)\)\), 128 - suffix_\d+ AS zeroes_\d+\), zeroes_\d+\) AS mask_\d+\) IS NULL\) OR \(\(bitAnd\(lhs_ipv6_\d+, mask_\d+\) AS lhs_base_\d+\) IS NULL\) OR \(\(bitAnd\(rhs_ipv6_\d+, mask_\d+\) AS rhs_base_\d+\) IS NULL\), NULL, multiIf\(lhs_base_\d+ < rhs_base_\d+, -1, lhs_base_\d+ > rhs_base_\d+, 1, 0\)\))" + }, + { + "print ipv6_compare(A, B, C)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS lhs_tokens_\d+\) > 2\) OR \(length\(splitByChar\('/', B\) AS rhs_tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(lhs_tokens_\d+\[1\]\) AS lhs_ipv6_\d+\) IS NULL\) OR \(\(length\(lhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(lhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(lhs_tokens_\d+\[-1\]\)\) AS lhs_suffix_\d+\) IS NULL\)\) OR \(\(IPv6StringToNumOrNull\(rhs_tokens_\d+\[1\]\) AS rhs_ipv6_\d+\) IS NULL\) OR \(\(length\(rhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(rhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(rhs_tokens_\d+\[-1\]\)\) AS rhs_suffix_\d+\) IS NULL\)\) OR \(\(toUInt8\(min2\(C, min2\(ifNull\(lhs_suffix_\d+, 128\), ifNull\(rhs_suffix_\d+, 128\)\)\)\) AS suffix_\d+\) IS NULL\) OR \(\(bitShiftLeft\(bitShiftRight\(bitNot\(reinterpretAsFixedString\(CAST\('0', 'UInt128'\)\)\), 128 - suffix_\d+ AS zeroes_\d+\), zeroes_\d+\) AS mask_\d+\) IS NULL\) OR \(\(bitAnd\(lhs_ipv6_\d+, mask_\d+\) AS lhs_base_\d+\) IS NULL\) OR \(\(bitAnd\(rhs_ipv6_\d+, mask_\d+\) AS rhs_base_\d+\) IS NULL\), NULL, multiIf\(lhs_base_\d+ < rhs_base_\d+, -1, lhs_base_\d+ > rhs_base_\d+, 1, 0\)\))" + }, + { + "print ipv4_is_in_range(A, B)", + R"(SELECT if\(\(\(IPv4StringToNumOrNull\(A\) AS ip_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, NULL\) AS range_start_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS range_mask_\d+\) IS NULL\), NULL, bitXor\(range_start_ip_\d+, bitAnd\(ip_\d+, bitNot\(toUInt32\(intExp2\(32 - range_mask_\d+\) - 1\)\)\)\) = 0\))" + }, + { + "print ipv4_is_match(A, B)", + R"(SELECT if\(\(\(multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, NULL\) AS lhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS lhs_mask_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, NULL\) AS rhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS rhs_mask_\d+\) IS NULL\), NULL, sign\(toInt64\(IPv4CIDRToRange\(assumeNotNull\(lhs_ip_\d+\), toUInt8\(min2\(32, min2\(assumeNotNull\(lhs_mask_\d+\), assumeNotNull\(rhs_mask_\d+\)\)\)\) AS mask_\d+\).1\) - toInt64\(IPv4CIDRToRange\(assumeNotNull\(rhs_ip_\d+\), mask_\d+\).1\)\)\) = 0)" + }, + { + "print ipv4_is_match(A, B, C)", + R"(SELECT if\(\(\(multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, NULL\) AS lhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS lhs_mask_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, NULL\) AS rhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS rhs_mask_\d+\) IS NULL\), NULL, sign\(toInt64\(IPv4CIDRToRange\(assumeNotNull\(lhs_ip_\d+\), toUInt8\(min2\(C, min2\(assumeNotNull\(lhs_mask_\d+\), assumeNotNull\(rhs_mask_\d+\)\)\)\) AS mask_\d+\).1\) - toInt64\(IPv4CIDRToRange\(assumeNotNull\(rhs_ip_\d+\), mask_\d+\).1\)\)\) = 0)" + }, + { + "print ipv6_is_match(A, B)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS lhs_tokens_\d+\) > 2\) OR \(length\(splitByChar\('/', B\) AS rhs_tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(lhs_tokens_\d+\[1\]\) AS lhs_ipv6_\d+\) IS NULL\) OR \(\(length\(lhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(lhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(lhs_tokens_\d+\[-1\]\)\) AS lhs_suffix_\d+\) IS NULL\)\) OR \(\(IPv6StringToNumOrNull\(rhs_tokens_\d+\[1\]\) AS rhs_ipv6_\d+\) IS NULL\) OR \(\(length\(rhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(rhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(rhs_tokens_\d+\[-1\]\)\) AS rhs_suffix_\d+\) IS NULL\)\) OR \(\(toUInt8\(min2\(128, min2\(ifNull\(lhs_suffix_\d+, 128\), ifNull\(rhs_suffix_\d+, 128\)\)\)\) AS suffix_\d+\) IS NULL\) OR \(\(bitShiftLeft\(bitShiftRight\(bitNot\(reinterpretAsFixedString\(CAST\('0', 'UInt128'\)\)\), 128 - suffix_\d+ AS zeroes_\d+\), zeroes_\d+\) AS mask_\d+\) IS NULL\) OR \(\(bitAnd\(lhs_ipv6_\d+, mask_\d+\) AS lhs_base_\d+\) IS NULL\) OR \(\(bitAnd\(rhs_ipv6_\d+, mask_\d+\) AS rhs_base_\d+\) IS NULL\), NULL, multiIf\(lhs_base_\d+ < rhs_base_\d+, -1, lhs_base_\d+ > rhs_base_\d+, 1, 0\)\) = 0)" + }, + { + "print ipv6_is_match(A, B, C)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS lhs_tokens_\d+\) > 2\) OR \(length\(splitByChar\('/', B\) AS rhs_tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(lhs_tokens_\d+\[1\]\) AS lhs_ipv6_\d+\) IS NULL\) OR \(\(length\(lhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(lhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(lhs_tokens_\d+\[-1\]\)\) AS lhs_suffix_\d+\) IS NULL\)\) OR \(\(IPv6StringToNumOrNull\(rhs_tokens_\d+\[1\]\) AS rhs_ipv6_\d+\) IS NULL\) OR \(\(length\(rhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(rhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(rhs_tokens_\d+\[-1\]\)\) AS rhs_suffix_\d+\) IS NULL\)\) OR \(\(toUInt8\(min2\(C, min2\(ifNull\(lhs_suffix_\d+, 128\), ifNull\(rhs_suffix_\d+, 128\)\)\)\) AS suffix_\d+\) IS NULL\) OR \(\(bitShiftLeft\(bitShiftRight\(bitNot\(reinterpretAsFixedString\(CAST\('0', 'UInt128'\)\)\), 128 - suffix_\d+ AS zeroes_\d+\), zeroes_\d+\) AS mask_\d+\) IS NULL\) OR \(\(bitAnd\(lhs_ipv6_\d+, mask_\d+\) AS lhs_base_\d+\) IS NULL\) OR \(\(bitAnd\(rhs_ipv6_\d+, mask_\d+\) AS rhs_base_\d+\) IS NULL\), NULL, multiIf\(lhs_base_\d+ < rhs_base_\d+, -1, lhs_base_\d+ > rhs_base_\d+, 1, 0\)\) = 0)" + }, + { + "print ipv4_is_private(A)", + R"(SELECT multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(\(toIPv4OrNull\(tokens_\d+\[1\]\) AS nullable_ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, ignore\(assumeNotNull\(nullable_ip_\d+\) AS ip_\d+, IPv4CIDRToRange\(ip_\d+, assumeNotNull\(mask_\d+\)\) AS range_\d+, IPv4NumToString\(range_\d+.1\) AS begin_\d+, IPv4NumToString\(range_\d+.2\) AS end_\d+\), NULL, \(\(length\(tokens_\d+\) = 1\) AND isIPAddressInRange\(IPv4NumToString\(ip_\d+\), '10.0.0.0/8'\)\) OR \(\(length\(tokens_\d+\) = 2\) AND isIPAddressInRange\(begin_\d+, '10.0.0.0/8'\) AND isIPAddressInRange\(end_\d+, '10.0.0.0/8'\)\) OR \(\(length\(tokens_\d+\) = 1\) AND isIPAddressInRange\(IPv4NumToString\(ip_\d+\), '172.16.0.0/12'\)\) OR \(\(length\(tokens_\d+\) = 2\) AND isIPAddressInRange\(begin_\d+, '172.16.0.0/12'\) AND isIPAddressInRange\(end_\d+, '172.16.0.0/12'\)\) OR \(\(length\(tokens_\d+\) = 1\) AND isIPAddressInRange\(IPv4NumToString\(ip_\d+\), '192.168.0.0/16'\)\) OR \(\(length\(tokens_\d+\) = 2\) AND isIPAddressInRange\(begin_\d+, '192.168.0.0/16'\) AND isIPAddressInRange\(end_\d+, '192.168.0.0/16'\)\)\))" + }, + { + "print ipv4_netmask_suffix(A)", + R"(SELECT multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\))" + }, + { + "print parse_ipv4(A)", + R"(SELECT multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, NULL\))" + }, + { + "print parse_ipv4_mask(A, B)", + R"(SELECT if\(\(\(toIPv4OrNull\(A\) AS ip_\d+\) IS NULL\) OR \(\(toUInt8OrNull\(toString\(B\)\) AS mask_\d+\) IS NULL\), NULL, toUInt32\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), arrayMax\(\[0, arrayMin\(\[32, assumeNotNull\(mask_\d+\)\]\)\]\)\).1\)\))" + }, + { + "print parse_ipv6(A)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, arrayStringConcat\(flatten\(extractAllGroups\(lower\(hex\(IPv6CIDRToRange\(assumeNotNull\(ip_\d+\), toUInt8\(ifNull\(mask_\d+ \+ if\(isIPv4String\(tokens_\d+\[1\]\), 96, 0\), 128\)\)\).1\)\), '\(\[\\\\da-f\]\{4\}\)'\)\), ':'\)\))" + }, + { + "print parse_ipv6_mask(A, B)", + R"(SELECT if\(empty\(ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(replaceRegexpOne\(A, concat\('\^', '::'\), ''\)\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(replaceRegexpOne\(A, concat\('\^', '::'\), ''\)\) = 'String'\)\) OR \(\(B - 96\) < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(replaceRegexpOne\(A, concat\('\^', '::'\), ''\)\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\)\.1, NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - \(B - 96\)\) - 1\)\)\)\)\), ''\) AS ipv4_\d+\), if\(\(length\(splitByChar\('/', concat\(ifNull\(kql_tostring\(ifNull\(kql_tostring\(if\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, arrayStringConcat\(flatten\(extractAllGroups\(lower\(hex\(IPv6CIDRToRange\(assumeNotNull\(ip_\d+\), toUInt8\(ifNull\(mask_\d+ \+ if\(isIPv4String\(tokens_\d+\[1\]\), 96, 0\), 128\)\)\)\.1\)\), '\(\[\\\\da-f\]\{4\}\)'\)\), ':'\)\)\), ''\)\), ''\), ifNull\(kql_tostring\('/'\), ''\), ifNull\(kql_tostring\(ifNull\(kql_tostring\(B\), ''\)\), ''\), ''\)\) AS tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, arrayStringConcat\(flatten\(extractAllGroups\(lower\(hex\(IPv6CIDRToRange\(assumeNotNull\(ip_\d+\), toUInt8\(ifNull\(mask_\d+ \+ if\(isIPv4String\(tokens_\d+\[1\]\), 96, 0\), 128\)\)\)\.1\)\), '\(\[\\\\da-f\]\{4\}\)'\)\), ':'\)\), if\(\(length\(splitByChar\('/', ipv4_\d+\) AS tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, arrayStringConcat\(flatten\(extractAllGroups\(lower\(hex\(IPv6CIDRToRange\(assumeNotNull\(ip_\d+\), toUInt8\(ifNull\(mask_\d+ \+ if\(isIPv4String\(tokens_\d+\[1\]\), 96, 0\), 128\)\)\)\.1\)\), '\(\[\\\\da-f\]\{4\}\)'\)\), ':'\)\)\))" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_MVExpand.cpp b/src/Parsers/tests/KQL/gtest_KQL_MVExpand.cpp new file mode 100644 index 000000000000..a58d0799820f --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_MVExpand.cpp @@ -0,0 +1,45 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_MVExpand, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "T | mv-expand c", + "SELECT *\nFROM T\nARRAY JOIN c\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand c, d", + "SELECT *\nFROM T\nARRAY JOIN\n c,\n d\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand c to typeof(bool)", + "SELECT\n * EXCEPT c_ali,\n c_ali AS c\nFROM\n(\n SELECT\n * EXCEPT c,\n accurateCastOrNull(toInt64OrNull(toString(c)), 'Boolean') AS c_ali\n FROM\n (\n SELECT *\n FROM T\n ARRAY JOIN c\n )\n)\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand b | mv-expand c", + "SELECT *\nFROM\n(\n SELECT *\n FROM T\n ARRAY JOIN b\n SETTINGS enable_unaligned_array_join = 1\n)\nARRAY JOIN c\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand with_itemindex=index b, c, d", + "SELECT\n index,\n *\nFROM T\nARRAY JOIN\n b,\n c,\n d,\n range(0, arrayMax([length(b), length(c), length(d)])) AS index\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand array_concat(c,d)", + "SELECT\n *,\n array_concat_\nFROM T\nARRAY JOIN arrayConcat(c, d) AS array_concat_\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand x = c, y = d", + "SELECT\n *,\n x,\n y\nFROM T\nARRAY JOIN\n c AS x,\n d AS y\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand xy = array_concat(c, d)", + "SELECT\n *,\n xy\nFROM T\nARRAY JOIN arrayConcat(c, d) AS xy\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand with_itemindex=index c,d to typeof(bool)", + "SELECT\n * EXCEPT d_ali,\n d_ali AS d\nFROM\n(\n SELECT\n * EXCEPT d,\n accurateCastOrNull(toInt64OrNull(toString(d)), 'Boolean') AS d_ali\n FROM\n (\n SELECT\n index,\n *\n FROM T\n ARRAY JOIN\n c,\n d,\n range(0, arrayMax([length(c), length(d)])) AS index\n )\n)\nSETTINGS enable_unaligned_array_join = 1" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp b/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp new file mode 100644 index 000000000000..5c94ab4665e5 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp @@ -0,0 +1,25 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_MakeSeries, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga)) < 0, 0, length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 0, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 0, 9, 'UTC'), range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toDateTime64('2016-09-10', 9, 'UTC')) + (toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) - toFloat64(toDateTime64('2016-09-10', 9, 'UTC'))) / 86400) * 86400) AS Purchase_ali\n FROM T\n WHERE (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) >= toUInt64(toDateTime64('2016-09-10', 9, 'UTC'))) AND (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) < toUInt64(toDateTime64('2016-09-13', 9, 'UTC')))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 1 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(10), toUInt64(15), toUInt64(1))) - length(ga)) < 0, 0, length(range(toUInt64(10), toUInt64(15), toUInt64(1))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(Purchase_ali), arrayMap(x -> toFloat64(x), range(toUInt64(10), toUInt64(15), toUInt64(1))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(10) + (toInt64((toFloat64(Purchase) - toFloat64(10)) / 1) * 1) AS Purchase_ali\n FROM T2\n WHERE (toInt64(toFloat64(Purchase)) >= toUInt64(10)) AND (toInt64(toFloat64(Purchase)) < toUInt64(15))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(low, high, toUInt64(86400))) - length(ga)) < 0, 0, length(range(low, high, toUInt64(86400))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 62135596800, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 62135596800, 9, 'UTC'), range(low, high, toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) + 62135596800) / 86400) * 86400) AS Purchase_ali\n FROM T\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 1.0 by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 1 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(low, high, toUInt64(1))) - length(ga)) < 0, 0, length(range(low, high, toUInt64(1))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(Purchase_ali), arrayMap(x -> toFloat64(x), range(low, high, toUInt64(1))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toInt64((toFloat64(Purchase) + 0) / 1) * 1) AS Purchase_ali\n FROM T2\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp new file mode 100644 index 000000000000..0db961bddfb4 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -0,0 +1,213 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_String, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print base64_encode_fromguid(A)", + "SELECT if(toTypeName(A) NOT IN ['UUID', 'Nullable(UUID)'], toString(throwIf(true, 'Expected guid as argument')), base64Encode(UUIDStringToNum(toString(A), 2)))" + }, + { + "print base64_decode_toguid(A)", + "SELECT toUUIDOrNull(UUIDNumToString(toFixedString(base64Decode(A), 16), 2))" + }, + { + "print base64_decode_toarray('S3VzdG8=')", + "SELECT arrayMap(x -> reinterpretAsUInt8(x), splitByRegexp('', base64Decode('S3VzdG8=')))" + }, + { + "print replace_regex('Hello, World!', '.', '\\0\\0')", + "SELECT replaceRegexpAll('Hello, World!', '.', '\\0\\0')" + }, + { + "print idx = has_any_index('this is an example', dynamic(['this', 'example'])) ", + "SELECT if(empty(['this', 'example']), -1, indexOf(arrayMap(x -> (x IN splitByChar(' ', 'this is an example')), if(empty(['this', 'example']), [''], arrayMap(x -> toString(x), ['this', 'example']))), 1) - 1) AS idx" + }, + { + "print idx = has_any_index('this is an example', dynamic([]))", + "SELECT if(empty([]), -1, indexOf(arrayMap(x -> (x IN splitByChar(' ', 'this is an example')), if(empty([]), [''], arrayMap(x -> toString(x), []))), 1) - 1) AS idx" + }, + { + "print translate('krasp', 'otsku', 'spark')", + "SELECT if(length('otsku') = 0, '', translate('spark', 'krasp', multiIf(length('otsku') = 0, 'krasp', (length('krasp') - length('otsku')) > 0, concat('otsku', repeat(substr('otsku', length('otsku'), 1), toUInt16(length('krasp') - length('otsku')))), (length('krasp') - length('otsku')) < 0, substr('otsku', 1, length('krasp')), 'otsku')))" + }, + { + "print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))", + "SELECT replaceRegexpOne(concat(ifNull(kql_tostring('- '), ''), ifNull(kql_tostring('Te st1'), ''), ifNull(kql_tostring('// $'), ''), ''), concat('^', '[^\\\\w]+'), '')" + }, + { + "print trim_end('.com', 'bing.com')", + "SELECT replaceRegexpOne('bing.com', concat('.com', '$'), '')" + }, + { + "print trim('--', '--https://bing.com--')", + "SELECT replaceRegexpOne(replaceRegexpOne('--https://bing.com--', concat('--', '$'), ''), concat('^', '--'), '')" + }, + { + "print bool(1)", + "SELECT if((toTypeName(1) = 'IntervalNanosecond') OR ((accurateCastOrNull(1, 'Bool') IS NULL) != (1 IS NULL)), accurateCastOrNull(throwIf(true, 'Failed to parse Bool literal'), 'Bool'), accurateCastOrNull(1, 'Bool'))" + }, + { + "print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)", + "SELECT toUUIDOrNull('74be27de-1e4e-49d9-b579-fe0b331d3642')" + }, + { + "print guid('74be27de-1e4e-49d9-b579-fe0b331d3642')", + "SELECT toUUIDOrNull('74be27de-1e4e-49d9-b579-fe0b331d3642')" + }, + { + "print guid('74be27de1e4e49d9b579fe0b331d3642')", + "SELECT toUUIDOrNull('74be27de1e4e49d9b579fe0b331d3642')" + }, + { + "print int(32.5)", + "SELECT if((toTypeName(32.5) = 'IntervalNanosecond') OR ((accurateCastOrNull(32.5, 'Int32') IS NULL) != (32.5 IS NULL)), accurateCastOrNull(throwIf(true, 'Failed to parse Int32 literal'), 'Int32'), accurateCastOrNull(32.5, 'Int32'))" + }, + { + "print long(32.5)", + "SELECT if((toTypeName(32.5) = 'IntervalNanosecond') OR ((accurateCastOrNull(32.5, 'Int64') IS NULL) != (32.5 IS NULL)), accurateCastOrNull(throwIf(true, 'Failed to parse Int64 literal'), 'Int64'), accurateCastOrNull(32.5, 'Int64'))" + }, + { + "print real(32.5)", + "SELECT if((toTypeName(32.5) = 'IntervalNanosecond') OR ((accurateCastOrNull(32.5, 'Float64') IS NULL) != (32.5 IS NULL)), accurateCastOrNull(throwIf(true, 'Failed to parse Float64 literal'), 'Float64'), accurateCastOrNull(32.5, 'Float64'))" + }, + { + "print time('1.22:34:8.128')", + "SELECT toIntervalNanosecond(167648128000000)" + }, + { + "print time('1d')", + "SELECT toIntervalNanosecond(86400000000000)" + }, + { + "print time('1.5d')", + "SELECT toIntervalNanosecond(129600000000000)" + }, + { + "print timespan('1.5d')", + "SELECT toIntervalNanosecond(129600000000000)" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(bool));", + "SELECT accurateCastOrNull(toInt64OrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1)), 'Boolean')" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(date));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'DateTime')" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(guid));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'UUID')" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(int));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'Int32')" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(long));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'Int64')" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(real));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'Float64')" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(decimal));", + "SELECT toDecimal128OrNull(if(countSubstrings(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), '.') > 1, NULL, kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1)), length(substr(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), position(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), '.') + 1)))" + }, + { + "print parse_version('1.2.3.40')", + "SELECT if((length(splitByChar('.', '1.2.3.40')) > 4) OR (length(splitByChar('.', '1.2.3.40')) < 1) OR (match('1.2.3.40', '.*[a-zA-Z]+.*') = 1) OR empty('1.2.3.40') OR hasAll(splitByChar('.', '1.2.3.40'), ['']), toDecimal128OrNull('NULL', 0), toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', '1.2.3.40'), 4)))), 8), 0))" + }, + { + "print parse_version('1')", + "SELECT if((length(splitByChar('.', '1')) > 4) OR (length(splitByChar('.', '1')) < 1) OR (match('1', '.*[a-zA-Z]+.*') = 1) OR empty('1') OR hasAll(splitByChar('.', '1'), ['']), toDecimal128OrNull('NULL', 0), toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', '1'), 4)))), 8), 0))" + }, + { + "print parse_version('')", + "SELECT if((length(splitByChar('.', '')) > 4) OR (length(splitByChar('.', '')) < 1) OR (match('', '.*[a-zA-Z]+.*') = 1) OR empty('') OR hasAll(splitByChar('.', ''), ['']), toDecimal128OrNull('NULL', 0), toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', ''), 4)))), 8), 0))" + }, + { + "print parse_version('...')", + "SELECT if((length(splitByChar('.', '...')) > 4) OR (length(splitByChar('.', '...')) < 1) OR (match('...', '.*[a-zA-Z]+.*') = 1) OR empty('...') OR hasAll(splitByChar('.', '...'), ['']), toDecimal128OrNull('NULL', 0), toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', '...'), 4)))), 8), 0))" + }, + { + "print parse_json( dynamic([1, 2, 3]))", + "SELECT [1, 2, 3]" + }, + { + "print parse_json('{\"a\":123.5, \"b\":\"{\\\"c\\\":456}\"}')", + "SELECT if(isValidJSON('{\"a\":123.5, \"b\":\"{\"c\":456}\"}'), JSON_QUERY('{\"a\":123.5, \"b\":\"{\"c\":456}\"}', '$'), toJSONString('{\"a\":123.5, \"b\":\"{\"c\":456}\"}'))" + }, + { + "print extract_json( '$.a' , '{\"a\":123, \"b\":\"{\"c\":456}\"}' , typeof(long))", + "SELECT accurateCastOrNull(JSON_VALUE('{\"a\":123, \"b\":\"{\"c\":456}\"}', '$.a'), 'Int64')" + }, + { + "print parse_command_line('echo \"hello world!\" print$?', 'windows')", + "SELECT if(empty('echo \"hello world!\" print$?') OR hasAll(splitByChar(' ', 'echo \"hello world!\" print$?'), ['']), arrayMap(x -> NULL, splitByChar(' ', '')), splitByChar(' ', 'echo \"hello world!\" print$?'))" + }, + { + "print reverse(123)", + "SELECT reverse(ifNull(kql_tostring(123), ''))" + }, + { + "print reverse(123.34)", + "SELECT reverse(ifNull(kql_tostring(123.34), ''))" + }, + { + "print reverse('clickhouse')", + "SELECT reverse(ifNull(kql_tostring('clickhouse'), ''))" + }, + { + "print result=parse_csv('aa,b,cc')", + "SELECT if(CAST(position('aa,b,cc', '\\n'), 'UInt8'), splitByChar(',', substring('aa,b,cc', 1, position('aa,b,cc', '\\n') - 1)), splitByChar(',', substring('aa,b,cc', 1, length('aa,b,cc')))) AS result" + }, + { + "print result_multi_record=parse_csv('record1,a,b,c\nrecord2,x,y,z')", + "SELECT if(CAST(position('record1,a,b,c\\nrecord2,x,y,z', '\\n'), 'UInt8'), splitByChar(',', substring('record1,a,b,c\\nrecord2,x,y,z', 1, position('record1,a,b,c\\nrecord2,x,y,z', '\\n') - 1)), splitByChar(',', substring('record1,a,b,c\\nrecord2,x,y,z', 1, length('record1,a,b,c\\nrecord2,x,y,z')))) AS result_multi_record" + }, + { + "Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))| order by LastName", + "SELECT concat(ifNull(kql_tostring(if(toInt64(length(FirstName)) <= 0, '', substr(FirstName, (((0 % toInt64(length(FirstName))) + toInt64(length(FirstName))) % toInt64(length(FirstName))) + 1, 3))), ''), ifNull(kql_tostring(' '), ''), ifNull(kql_tostring(if(toInt64(length(LastName)) <= 0, '', substr(LastName, (((2 % toInt64(length(LastName))) + toInt64(length(LastName))) % toInt64(length(LastName))) + 1))), ''), '') AS name_abbr\nFROM Customers\nORDER BY LastName DESC" + }, + { + "print idx1 = indexof('abcdefg','cde')", + "SELECT kql_indexof('abcdefg', 'cde', 0, -1, 1) AS idx1" + }, + { + "print idx2 = indexof('abcdefg','cde',0,3)", + "SELECT kql_indexof('abcdefg', 'cde', 0, 3, 1) AS idx2" + }, + { + "print idx3 = indexof('abcdefg','cde',1,2)", + "SELECT kql_indexof('abcdefg', 'cde', 1, 2, 1) AS idx3" + }, + { + "print idx5 = indexof('abcdefg','cde',-5) ", + "SELECT kql_indexof('abcdefg', 'cde', -5, -1, 1) AS idx5" + }, + { + "print idx6 = indexof(1234567,5,1,4) ", + "SELECT kql_indexof(1234567, 5, 1, 4, 1) AS idx6" + }, + { + "print idx7 = indexof('abcdefg','cde',2,-1)", + "SELECT kql_indexof('abcdefg', 'cde', 2, -1, 1) AS idx7" + }, + { + "print idx8 = indexof('abcdefgabcdefg', 'cde', 3)", + "SELECT kql_indexof('abcdefgabcdefg', 'cde', 3, -1, 1) AS idx8" + }, + { + "print idx9 = indexof('abcdefgabcdefg', 'cde', 1, 13, 3) ", + "SELECT kql_indexof('abcdefgabcdefg', 'cde', 1, 13, 3) AS idx9" + }, + { + "print from_time = strrep(3s,2,' ')", + "SELECT substr(repeat(concat(ifNull(kql_tostring(toIntervalNanosecond(3000000000)), ''), ' '), 2), 1, length(repeat(concat(ifNull(kql_tostring(toIntervalNanosecond(3000000000)), ''), ' '), 2)) - length(' ')) AS from_time" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_TopHitter.cpp b/src/Parsers/tests/KQL/gtest_KQL_TopHitter.cpp new file mode 100644 index 000000000000..4162461e1dc2 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_TopHitter.cpp @@ -0,0 +1,57 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_TopHitters, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | top 5 by Age", + "SELECT *\nFROM Customers\nORDER BY Age DESC\nLIMIT 5" + }, + { + "Customers | top 5 by Age desc", + "SELECT *\nFROM Customers\nORDER BY Age DESC\nLIMIT 5" + }, + { + "Customers | top 5 by Age asc", + "SELECT *\nFROM Customers\nORDER BY Age ASC\nLIMIT 5" + }, + { + "Customers | top 5 by FirstName desc nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST\nLIMIT 5" + }, + { + "Customers | top 5 by FirstName desc nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST\nLIMIT 5" + }, + { + "Customers | top 5 by Age | top 2 by FirstName", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n ORDER BY Age DESC\n LIMIT 5\n)\nORDER BY FirstName DESC\nLIMIT 2" + }, + { + "Customers| top-hitters a = 3 of Age by extra", + "SELECT *\nFROM\n(\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_sum_extra DESC\nLIMIT 3 AS a" + }, + { + "Customers| top-hitters 3 of Age", + "SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC\nLIMIT 3" + }, + { + "Customers| top-hitters 3 of Age by extra | top-hitters 2 of Age", + "SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM\n (\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC\n LIMIT 3\n )\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC\nLIMIT 2" + }, + { + "Customers| top-hitters 3 of Age by extra | where Age > 30", + "SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC\n LIMIT 3\n)\nWHERE Age > 30" + }, + { + "Customers| top-hitters 3 of Age by extra | where approximate_sum_extra < 200", + "SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC\n LIMIT 3\n)\nWHERE approximate_sum_extra < 200" + }, + { + "Customers| top-hitters 3 of Age | where approximate_count_Age > 2", + "SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_count_Age DESC\n LIMIT 3\n)\nWHERE approximate_count_Age > 2" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_TopNested.cpp b/src/Parsers/tests/KQL/gtest_KQL_TopNested.cpp new file mode 100644 index 000000000000..a9f7cdf269b2 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_TopNested.cpp @@ -0,0 +1,61 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_TopNested, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "sales | top-nested 3 of region by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n )\nSELECT\n region,\n aggregated_region\nFROM tb0_normal" + }, + { + "sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n )\nSELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\nFROM tb1_normal" + }, + { + "sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount), top-nested 2 of salesdate by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb2_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate AS salesdate,\n sum(amount) AS aggregated_salesdate\n FROM tb1_normal\n INNER JOIN source_table AS join1 USING (region, salesperson)\n GROUP BY\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate\n ),\n tb2_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate,\n ROW_NUMBER() OVER (PARTITION BY region, salesperson ORDER BY aggregated_salesdate DESC) AS row2\n FROM tb2_prev\n ),\n tb2_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb2_partition\n WHERE row2 <= 2\n )\nSELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\nFROM tb2_normal" + }, + { + "sales | top-nested 3 of region with others = 'all other region' by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region\n FROM tb0_normal\n UNION ALL\n SELECT\n 'all other region' AS region,\n aggregated_region_value AS aggregated_region\n FROM tb0_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb1_others_prev AS\n (\n SELECT\n region,\n sum(amount) AS aggregated_salesperson_value\n FROM source_table\n LEFT JOIN tb1_normal USING (region, salesperson)\n WHERE empty(tb1_normal.salesperson) AND (source_table.region IN (\n SELECT region\n FROM tb1_normal\n ))\n GROUP BY region\n ),\n tb1_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n aggregated_salesperson_value\n FROM tb1_others_prev\n RIGHT JOIN tb1_normal USING (region)\n ),\n tb0_all_others AS\n (\n SELECT\n 'all other region' AS region,\n aggregated_region_value AS aggregated_region,\n 'all other person' AS salesperson,\n aggregated_region_value AS aggregated_salesperson\n FROM tb0_others\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_normal\n UNION ALL\n SELECT\n region,\n aggregated_region,\n 'all other person' AS salesperson,\n aggregated_salesperson_value AS aggregated_salesperson\n FROM tb1_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb0_all_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb1_others_prev AS\n (\n SELECT\n region,\n sum(amount) AS aggregated_salesperson_value\n FROM source_table\n LEFT JOIN tb1_normal USING (region, salesperson)\n WHERE empty(tb1_normal.salesperson) AND (source_table.region IN (\n SELECT region\n FROM tb1_normal\n ))\n GROUP BY region\n ),\n tb1_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n aggregated_salesperson_value\n FROM tb1_others_prev\n RIGHT JOIN tb1_normal USING (region)\n ),\n tb0_all_others AS\n (\n SELECT\n 'all other region' AS region,\n aggregated_region_value AS aggregated_region,\n NULL AS salesperson,\n NULL AS aggregated_salesperson\n FROM tb0_others\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_normal\n UNION ALL\n SELECT\n region,\n aggregated_region,\n NULL AS salesperson,\n NULL AS aggregated_salesperson\n FROM tb1_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb0_all_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb1_others_prev AS\n (\n SELECT\n region,\n sum(amount) AS aggregated_salesperson_value\n FROM source_table\n LEFT JOIN tb1_normal USING (region, salesperson)\n WHERE empty(tb1_normal.salesperson) AND (source_table.region IN (\n SELECT region\n FROM tb1_normal\n ))\n GROUP BY region\n ),\n tb1_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n aggregated_salesperson_value\n FROM tb1_others_prev\n RIGHT JOIN tb1_normal USING (region)\n ),\n tb0_all_others AS\n (\n SELECT\n NULL AS region,\n NULL AS aggregated_region,\n 'all other person' AS salesperson,\n aggregated_region_value AS aggregated_salesperson\n FROM tb0_others\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_normal\n UNION ALL\n SELECT\n region,\n aggregated_region,\n 'all other person' AS salesperson,\n aggregated_salesperson_value AS aggregated_salesperson\n FROM tb1_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb0_all_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount), top-nested 2 of salesdate with others = 'all other date' by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb1_others_prev AS\n (\n SELECT\n region,\n sum(amount) AS aggregated_salesperson_value\n FROM source_table\n LEFT JOIN tb1_normal USING (region, salesperson)\n WHERE empty(tb1_normal.salesperson) AND (source_table.region IN (\n SELECT region\n FROM tb1_normal\n ))\n GROUP BY region\n ),\n tb1_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n aggregated_salesperson_value\n FROM tb1_others_prev\n RIGHT JOIN tb1_normal USING (region)\n ),\n tb2_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate AS salesdate,\n sum(amount) AS aggregated_salesdate\n FROM tb1_normal\n INNER JOIN source_table AS join1 USING (region, salesperson)\n GROUP BY\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate\n ),\n tb2_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate,\n ROW_NUMBER() OVER (PARTITION BY region, salesperson ORDER BY aggregated_salesdate DESC) AS row2\n FROM tb2_prev\n ),\n tb2_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb2_partition\n WHERE row2 <= 2\n ),\n tb2_others_prev AS\n (\n SELECT\n region,\n salesperson,\n sum(amount) AS aggregated_salesdate_value\n FROM source_table\n LEFT JOIN tb2_normal USING (region, salesperson, salesdate)\n WHERE empty(tb2_normal.salesdate) AND (source_table.region IN (\n SELECT region\n FROM tb2_normal\n )) AND (source_table.salesperson IN (\n SELECT salesperson\n FROM tb2_normal\n ))\n GROUP BY\n region,\n salesperson\n ),\n tb2_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n aggregated_salesdate_value\n FROM tb2_others_prev\n RIGHT JOIN tb2_normal USING (region, salesperson)\n ),\n tb0_all_others AS\n (\n SELECT\n 'all other region' AS region,\n aggregated_region_value AS aggregated_region,\n 'all other person' AS salesperson,\n aggregated_region_value AS aggregated_salesperson,\n 'all other date' AS salesdate,\n aggregated_region_value AS aggregated_salesdate\n FROM tb0_others\n ),\n tb1_all_others AS\n (\n SELECT\n region,\n aggregated_region,\n 'all other person' AS salesperson,\n aggregated_salesperson_value AS aggregated_salesperson,\n 'all other date' AS salesdate,\n aggregated_salesperson_value AS aggregated_salesdate\n FROM tb1_others\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb2_normal\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n 'all other date' AS salesdate,\n aggregated_salesdate_value AS aggregated_salesdate\n FROM tb2_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb0_all_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb1_all_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region by sum(amount)*2 + 5", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n (sum(amount) * 2) + 5 AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n )\nSELECT\n region,\n aggregated_region\nFROM tb0_normal" + }, + { + "sales | top-nested strlen('abc') of region by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT lengthUTF8('abc')\n )\nSELECT\n region,\n aggregated_region\nFROM tb0_normal" + }, + { + "sales | top-nested 3 of region with others = strcat('all other',' region') by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region\n FROM tb0_normal\n UNION ALL\n SELECT\n concat(ifNull(kql_tostring('all other'), ''), ifNull(kql_tostring(' region'), ''), '') AS region,\n aggregated_region_value AS aggregated_region\n FROM tb0_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested of region by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n )\nSELECT\n region,\n aggregated_region\nFROM tb0_normal" + }, + { + "sales | top-nested of substring(region,0,1) by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n if(toInt64(length(region)) <= 0, '', substr(region, (((0 % toInt64(length(region))) + toInt64(length(region))) % toInt64(length(region))) + 1, 1)) AS Column1,\n sum(amount) AS aggregated_Column1\n FROM source_table\n GROUP BY Column1\n ORDER BY aggregated_Column1 DESC\n )\nSELECT\n Column1,\n aggregated_Column1\nFROM tb0_normal" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp new file mode 100644 index 000000000000..aa94ba3940c2 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -0,0 +1,230 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Datetime, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print week_of_year(datetime(2020-12-31))", + "SELECT toWeek(kql_datetime('2020-12-31'), 3, 'UTC')" + }, + { + "print startofweek(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addWeeks(toStartOfWeek(kql_datetime('2017-01-01 10:10:17')), -1))" + }, + { + "print startofmonth(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addMonths(toStartOfMonth(kql_datetime('2017-01-01 10:10:17')), -1))" + }, + { + "print startofday(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-01 10:10:17')), -1))" + }, + { + "print startofyear(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addYears(toStartOfYear(kql_datetime('2017-01-01 10:10:17')), -1))" + }, + { + "print monthofyear(datetime(2015-12-14))", + "SELECT toMonth(kql_datetime('2015-12-14'))" + }, + { + "print hourofday(datetime(2015-12-14 10:54:00))", + "SELECT toHour(kql_datetime('2015-12-14 10:54:00'))" + }, + { + "print getyear(datetime(2015-10-12))", + "SELECT toYear(kql_datetime('2015-10-12'))" + }, + { + "print getmonth(datetime(2015-10-12))", + "SELECT toMonth(kql_datetime('2015-10-12'))" + }, + { + "print dayofyear(datetime(2015-10-12))", + "SELECT toDayOfYear(kql_datetime('2015-10-12'))" + }, + { + "print dayofmonth(datetime(2015-10-12))", + "SELECT toDayOfMonth(kql_datetime('2015-10-12'))" + }, + { + "print unixtime_seconds_todatetime(1546300899)", + "SELECT if(toTypeName(assumeNotNull(1546300899)) IN ['Int32', 'Int64', 'Float64', 'UInt32', 'UInt64'], kql_todatetime(1546300899), kql_todatetime(throwIf(true, 'unixtime_seconds_todatetime only accepts int, long and double type of arguments')))" + }, + { + "print dayofweek(datetime(2015-12-20))", + "SELECT (toDayOfWeek(kql_datetime('2015-12-20')) % 7) * toIntervalNanosecond(86400000000000)" + }, + { + "print now()", + "SELECT now64(9, 'UTC')" + }, + { + "print now(1d)", + "SELECT now64(9, 'UTC') + toIntervalNanosecond(86400000000000)" + }, + { + "print ago(2d)", + "SELECT now64(9, 'UTC') + (-1 * toIntervalNanosecond(172800000000000))" + }, + { + "print endofday(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-01 10:10:17')), -1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofday(datetime(2017-01-01 10:10:17), 1)", + "SELECT kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-01 10:10:17')), 1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofmonth(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addMonths(toStartOfMonth(kql_datetime('2017-01-01 10:10:17')), -1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofmonth(datetime(2017-01-01 10:10:17), 1)", + "SELECT kql_todatetime(addMonths(toStartOfMonth(kql_datetime('2017-01-01 10:10:17')), 1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofweek(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addWeeks(toStartOfWeek(kql_datetime('2017-01-01 10:10:17')), -1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofweek(datetime(2017-01-01 10:10:17), 1)", + "SELECT kql_todatetime(addWeeks(toStartOfWeek(kql_datetime('2017-01-01 10:10:17')), 1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofyear(datetime(2017-01-01 10:10:17), -1) ", + "SELECT kql_todatetime(addYears(toStartOfYear(kql_datetime('2017-01-01 10:10:17')), -1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofyear(datetime(2017-01-01 10:10:17), 1)" , + "SELECT kql_todatetime(addYears(toStartOfYear(kql_datetime('2017-01-01 10:10:17')), 1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print make_datetime(2017,10,01)", + "SELECT if(((2017 >= 1900) AND (2017 <= 2261)) AND ((10 >= 1) AND (10 <= 12)) AND ((0 >= 0) AND (0 <= 59)) AND ((0 >= 0) AND (0 <= 59)) AND (0 >= 0) AND (0 < 60) AND (toModifiedJulianDayOrNull(concat(leftPad(toString(2017), 4, '0'), '-', leftPad(toString(10), 2, '0'), '-', leftPad(toString(1), 2, '0'))) IS NOT NULL), toDateTime64OrNull(toString(makeDateTime64(2017, 10, 1, 0, 0, truncate(0), (0 - truncate(0)) * 10000000., 7, 'UTC')), 9), NULL)" + }, + { + "print make_datetime(2017,10,01,12,10)", + "SELECT if(((2017 >= 1900) AND (2017 <= 2261)) AND ((10 >= 1) AND (10 <= 12)) AND ((12 >= 0) AND (12 <= 59)) AND ((10 >= 0) AND (10 <= 59)) AND (0 >= 0) AND (0 < 60) AND (toModifiedJulianDayOrNull(concat(leftPad(toString(2017), 4, '0'), '-', leftPad(toString(10), 2, '0'), '-', leftPad(toString(1), 2, '0'))) IS NOT NULL), toDateTime64OrNull(toString(makeDateTime64(2017, 10, 1, 12, 10, truncate(0), (0 - truncate(0)) * 10000000., 7, 'UTC')), 9), NULL)" + }, + { + "print make_datetime(2017,10,01,12,11,0.1234567)", + "SELECT if(((2017 >= 1900) AND (2017 <= 2261)) AND ((10 >= 1) AND (10 <= 12)) AND ((12 >= 0) AND (12 <= 59)) AND ((11 >= 0) AND (11 <= 59)) AND (0.1234567 >= 0) AND (0.1234567 < 60) AND (toModifiedJulianDayOrNull(concat(leftPad(toString(2017), 4, '0'), '-', leftPad(toString(10), 2, '0'), '-', leftPad(toString(1), 2, '0'))) IS NOT NULL), toDateTime64OrNull(toString(makeDateTime64(2017, 10, 1, 12, 11, truncate(0.1234567), (0.1234567 - truncate(0.1234567)) * 10000000., 7, 'UTC')), 9), NULL)" + }, + { + "print unixtime_microseconds_todatetime(1546300800000000)", + "SELECT kql_todatetime(fromUnixTimestamp64Micro(1546300800000000, 'UTC'))" + }, + { + "print unixtime_milliseconds_todatetime(1546300800000)", + "SELECT kql_todatetime(fromUnixTimestamp64Milli(1546300800000, 'UTC'))" + }, + { + "print unixtime_nanoseconds_todatetime(1546300800000000000)", + "SELECT kql_todatetime(fromUnixTimestamp64Nano(1546300800000000000, 'UTC'))" + }, + { + "print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))", + "SELECT dateDiff('year', kql_datetime('2000-12-31'), kql_datetime('2017-01-01'))" + }, + { + "print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))", + "SELECT dateDiff('minute', kql_datetime('2017-10-30 23:00:59'), kql_datetime('2017-10-30 23:05:01'))" + }, + { + "print datetime(null)", + "SELECT kql_datetime(NULL)" + }, + { + "print datetime('2014-05-25T08:20:03.123456Z')", + "SELECT kql_datetime('2014-05-25T08:20:03.123456Z')" + }, + { + "print datetime(2015-12-14 18:54)", + "SELECT kql_datetime('2015-12-14 18:54')" + }, + { + "print datetime(2015-12-31 23:59:59.9)", + "SELECT kql_datetime('2015-12-31 23:59:59.9')" + }, + { + "print datetime(\"2015-12-31 23:59:59.9\")", + "SELECT kql_datetime('2015-12-31 23:59:59.9')" + }, + { + "print datetime('2015-12-31 23:59:59.9')", + "SELECT kql_datetime('2015-12-31 23:59:59.9')" + }, + { + "print make_timespan(67,12,30,59.9799)", + "SELECT (((67 * toIntervalNanosecond(86400000000000)) + (12 * toIntervalNanosecond(3600000000000))) + (30 * toIntervalNanosecond(60000000000))) + (59.9799 * toIntervalNanosecond(1000000000))" + }, + { + "print todatetime('2014-05-25T08:20:03.123456Z')", + "SELECT kql_todatetime('2014-05-25T08:20:03.123456Z')" + }, + { + "print format_datetime(todatetime('2009-06-15T13:45:30.6175425'), 'yy-M-dd [H:mm:ss.fff]')", + "SELECT concat(substring(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%M:%S.]')), 1, position(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%M:%S.]')), '.')), substring(substring(toString(kql_todatetime('2009-06-15T13:45:30.6175425')), position(toString(kql_todatetime('2009-06-15T13:45:30.6175425')), '.') + 1), 1, 3), substring(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%M:%S.]')), position(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%M:%S.]')), '.') + 1, length(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%M:%S.]')))))" + }, + { + "print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s tt')", + "SELECT formatDateTime(kql_datetime('2015-12-14 02:03:04.12345'), '%y-%m-%e %I:%M:%S %p')" + }, + { + "print format_timespan(time(1d), 'd-[hh:mm:ss]')", + "SELECT concat(if(length(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(86400000000000)))) < 1, leftPad(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(86400000000000))), 1, '0'), toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(86400000000000)))), '-', '[', if(length(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(3600000000000)) % 24)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(3600000000000)) % 24), 2, '0'), toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(3600000000000)) % 24)), ':', if(length(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(60000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(60000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(60000000000)) % 60)), ':', if(length(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(1000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(1000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(1000000000)) % 60)), ']', '')" + }, + { + "print format_timespan(time('12:30:55.123'), 'ddddd-[hh:mm:ss.ffff]')", + "SELECT concat(if(length(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(86400000000000)))) < 5, leftPad(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(86400000000000))), 5, '0'), toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(86400000000000)))), '-', '[', if(length(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(3600000000000)) % 24)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(3600000000000)) % 24), 2, '0'), toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(3600000000000)) % 24)), ':', if(length(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(60000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(60000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(60000000000)) % 60)), ':', if(length(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(1000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(1000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(1000000000)) % 60)), '.', if(length(substring(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(100)) % 10000000), 1, 4)) < 4, rightPad(substring(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(100)) % 10000000), 1, 4), 4, '0'), substring(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(100)) % 10000000), 1, 4)), ']', '')" + }, + { + "print v1=format_timespan(time('29.09:00:05.12345'), 'dd.hh:mm:ss:FF')", + "SELECT concat(if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000)))) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000))), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000)))), '.', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24)), ':', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60)), ':', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60)), ':', substring(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(100)) % 10000000), 1, 2), '') AS v1" + }, + { + "print v2=format_timespan(time('29.09:00:05.12345'), 'ddd.h:mm:ss [fffffff]');", + "SELECT concat(if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000)))) < 3, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000))), 3, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000)))), '.', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24)) < 1, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24), 1, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24)), ':', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60)), ':', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60)), ' ', '[', if(length(substring(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(100)) % 10000000), 1, 7)) < 7, rightPad(substring(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(100)) % 10000000), 1, 7), 7, '0'), substring(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(100)) % 10000000), 1, 7)), ']', '') AS v2" + }, + { + "print datetime_part('day', datetime(2017-10-30 01:02:03.7654321))", + "SELECT formatDateTime(kql_datetime('2017-10-30 01:02:03.7654321'), '%e')" + }, + { + "print datetime_add('day',1,datetime(2017-10-30 01:02:03.7654321))", + "SELECT kql_datetime('2017-10-30 01:02:03.7654321') + toIntervalDay(1)" + }, + { + "print totimespan(time(1d))", + "SELECT kql_totimespan(toIntervalNanosecond(86400000000000))" + }, + { + "print totimespan('0.01:34:23')", + "SELECT kql_totimespan('0.01:34:23')" + }, + { + "print totimespan(time('-1:12:34'))", + "SELECT kql_totimespan(toIntervalNanosecond(-4354000000000))" + }, + { + "print totimespan(-1d)", + "SELECT kql_totimespan(-toIntervalNanosecond(86400000000000))" + }, + { + "print totimespan('abc')", + "SELECT kql_totimespan('abc')" + }, + { + "print time(2)", + "SELECT toIntervalNanosecond(172800000000000)" + }, + { + "hits | project bin(todatetime(EventTime), 1m)", + "SELECT kql_bin(kql_todatetime(EventTime), toIntervalNanosecond(60000000000))\nFROM hits" + } + +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dynamicFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dynamicFunctions.cpp new file mode 100644 index 000000000000..38bf08e9900c --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_dynamicFunctions.cpp @@ -0,0 +1,140 @@ +#include +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Dynamic, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print t = array_sort_asc(dynamic([null, 'd', 'a', 'c', 'c']))", + "SELECT kql_array_sort_asc([NULL, 'd', 'a', 'c', 'c']).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([4, 1, 3, 2]))", + "SELECT kql_array_sort_asc([4, 1, 3, 2]).1 AS t" + }, + { + "print t = array_sort_asc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))", + "SELECT kql_array_sort_asc(['b', 'a', 'c'], ['q', 'p', 'r']).1 AS t" + }, + { + "print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , false)", + "SELECT kql_array_sort_asc(['d', NULL, 'a', 'c', 'c'], false).1 AS t" + }, + { + "print t = array_sort_asc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]) , false)", + "SELECT kql_array_sort_asc([NULL, 'd', NULL, NULL, 'a', 'c', 'c', NULL, NULL, NULL], false).1 AS t" + }, + { + "print t = array_sort_asc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]) , true)", + "SELECT kql_array_sort_asc([NULL, 'd', NULL, NULL, 'a', 'c', 'c', NULL, NULL, NULL], true).1 AS t" + }, + { + "print t = array_sort_asc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]))", + "SELECT kql_array_sort_asc([NULL, 'd', NULL, NULL, 'a', 'c', 'c', NULL, NULL, NULL]).1 AS t" + }, + { + "print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']), 1 < 2)", + "SELECT kql_array_sort_asc(['d', NULL, 'a', 'c', 'c'], 1 < 2).1 AS t" + }, + { + "print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2)", + "SELECT kql_array_sort_asc(['d', NULL, 'a', 'c', 'c'], 1 > 2).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), false)", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30], false).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), 1 > 2)", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30], 1 > 2).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), true)", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30], true).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), 1 < 2)", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30], 1 < 2).1 AS t" + }, + { + "print t = array_sort_desc(dynamic([null, 'd', 'a', 'c', 'c']))", + "SELECT kql_array_sort_desc([NULL, 'd', 'a', 'c', 'c']).1 AS t" + }, + { + "print t = array_sort_desc(dynamic([4, 1, 3, 2]))", + "SELECT kql_array_sort_desc([4, 1, 3, 2]).1 AS t" + }, + { + "print t = array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))", + "SELECT kql_array_sort_desc(['b', 'a', 'c'], ['q', 'p', 'r']).1 AS t" + }, + { + "print array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))", + "SELECT kql_array_sort_desc(['b', 'a', 'c'], ['q', 'p', 'r'])" + }, + { + "print t = array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , false)", + "SELECT kql_array_sort_desc(['d', NULL, 'a', 'c', 'c'], false).1 AS t" + }, + { + "print array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]))[0]", + "SELECT tupleElement(kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]), if(0 >= 0, 0 + 1, 0))" + }, + { + "print (t) = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]))", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t" + }, + { + "print (t,w) = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]))", + "SELECT\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t,\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).2 AS w" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30])),y=5", + "SELECT\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t,\n 5 AS y" + }, + { + "print 5, (t) = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30])),y=5", + "SELECT\n 5,\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t,\n 5 AS y" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30])),w = array_sort_asc(dynamic([2, 1, 3]))", + "SELECT\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t,\n kql_array_sort_asc([2, 1, 3]).1 AS w" + }, + { + "print A[0]", + "SELECT A[if(0 >= 0, 0 + 1, 0)]" + }, + { + "print A[0][1]", + "SELECT (A[if(0 >= 0, 0 + 1, 0)])[if(1 >= 0, 1 + 1, 1)]" + }, + { + "print dynamic([[1,2,3,4,5],[20,30]])[0]", + "SELECT [[1, 2, 3, 4, 5], [20, 30]][if(0 >= 0, 0 + 1, 0)]" + }, + { + "print dynamic([[1,2,3,4,5],[20,30]])[1][1]", + "SELECT ([[1, 2, 3, 4, 5], [20, 30]][if(1 >= 0, 1 + 1, 1)])[if(1 >= 0, 1 + 1, 1)]" + }, + { + "print A[B[1]]", + "SELECT A[if((B[if(1 >= 0, 1 + 1, 1)]) >= 0, (B[if(1 >= 0, 1 + 1, 1)]) + 1, B[if(1 >= 0, 1 + 1, 1)])]" + }, + { + "print A[strlen('a')-1]", + "SELECT A[if((lengthUTF8('a') - 1) >= 0, (lengthUTF8('a') - 1) + 1, lengthUTF8('a') - 1)]" + }, + { + "print strlen(A[0])", + "SELECT lengthUTF8(A[if(0 >= 0, 0 + 1, 0)])" + }, + { + "print repeat(1, 3)", + "SELECT if(3 < 0, [NULL], arrayWithConstant(abs(3), 1))" + }, + { + "print repeat(1, -3)", + "SELECT if(-3 < 0, [NULL], arrayWithConstant(abs(-3), 1))" + } + }))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_lookup_join.cpp b/src/Parsers/tests/KQL/gtest_KQL_lookup_join.cpp new file mode 100644 index 000000000000..d6b566ac413e --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_lookup_join.cpp @@ -0,0 +1,106 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_lookup_join, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "FactTable | lookup kind=leftouter DimTable on Personal, Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | lookup kind=inner DimTable on Personal, Family", + "SELECT *\nFROM FactTable AS left_\nINNER JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | lookup kind=leftouter (DimTable | where Personal == 'Bill') on Personal, Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN\n(\n SELECT *\n FROM DimTable\n WHERE Personal = 'Bill'\n) AS right_ USING (Personal, Family)" + }, + { + "FactTable | project Personal , Family| lookup kind=leftouter DimTable on Personal, Family", + "SELECT *\nFROM\n(\n SELECT\n Personal,\n Family\n FROM FactTable\n) AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | lookup kind=leftouter DimTable on $left.Personal == $right.Personal, $left.Family == $right.Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | lookup kind=leftouter DimTable on Personal , $left.Family == $right.Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable|lookup kind=leftouter DimTable on Personal , ($left.Family == $right.Family)", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | project Row, Personal , Family | lookup kind=leftouter (FactTable | lookup kind=leftouter DimTable on Personal) on Personal, Family", + "SELECT *\nFROM\n(\n SELECT\n Row,\n Personal,\n Family\n FROM FactTable\n) AS left_\nLEFT JOIN\n(\n SELECT *\n FROM FactTable AS left_\n LEFT JOIN DimTable AS right_ USING (Personal)\n) AS right_ USING (Personal, Family)" + }, + { + "FactTable|project Row, Personal , Family| lookup kind=leftouter (DimTable | where Personal == 'Bill') on Personal, Family| lookup kind=inner DimTable on Personal, Family", + "SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Row,\n Personal,\n Family\n FROM FactTable\n ) AS left_\n LEFT JOIN\n (\n SELECT *\n FROM DimTable\n WHERE Personal = 'Bill'\n ) AS right_ USING (Personal, Family)\n) AS left_\nINNER JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable| lookup kind=leftouter DimTable on $left.Personal == $right.FirstName, Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ ON (left_.Personal = right_.FirstName) AND (left_.Family = right_.Family)" + }, + { + "FactTable| lookup kind=leftouter DimTable on $left.Personal == $right.FirstName, Family| lookup kind=inner DimTable on $left.Personal == $right.FirstName", + "SELECT *\nFROM\n(\n SELECT *\n FROM FactTable AS left_\n LEFT JOIN DimTable AS right_ ON (left_.Personal = right_.FirstName) AND (left_.Family = right_.Family)\n) AS left_\nINNER JOIN DimTable AS right_ ON left_.Personal = right_.FirstName" + }, + { + "X | join Y on Key", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=innerunique Y on Key", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=inner Y on Key", + "SELECT *\nFROM X AS left_\nINNER JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=leftouter Y on Key", + "SELECT *\nFROM X AS left_\nLEFT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=rightouter Y on Key", + "SELECT *\nFROM X AS left_\nRIGHT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=fullouter Y on Key", + "SELECT *\nFROM X AS left_\nFULL OUTER JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=leftanti Y on Key", + "SELECT *\nFROM X AS left_\nANTI LEFT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=rightanti Y on Key", + "SELECT *\nFROM X AS left_\nANTI RIGHT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=leftsemi Y on Key", + "SELECT *\nFROM X AS left_\nSEMI LEFT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=rightsemi Y on Key", + "SELECT *\nFROM X AS left_\nSEMI RIGHT JOIN Y AS right_ USING (Key)" + }, + { + "X | join Y on $left.Key == $right.Key", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN Y AS right_ USING (Key)" + }, + { + "X | join Y on $left.Key == $right.Key2", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN Y AS right_ ON left_.Key = right_.Key2" + }, + { + "X | join (Y | project Key, value2) on $left.Key == $right.Key", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN\n(\n SELECT\n Key,\n value2\n FROM Y\n) AS right_ USING (Key)" + } + +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_operator_in_sql.cpp b/src/Parsers/tests/KQL/gtest_KQL_operator_in_sql.cpp new file mode 100644 index 000000000000..a1495dc4fe1a --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_operator_in_sql.cpp @@ -0,0 +1,110 @@ +#include + +#include +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_operator_in_sql, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "select * from kql(Customers | where FirstName !in ('Peter', 'Latoya'))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE FirstName NOT IN ('Peter', 'Latoya')\n)" + }, + { + "select * from kql(Customers | where FirstName !contains 'Pet');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName ILIKE '%Pet%')\n)" + }, + { + "select * from kql(Customers | where FirstName !contains_cs 'Pet');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName LIKE '%Pet%')\n)" + }, + { + "select * from kql(Customers | where FirstName !endswith 'ter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName ILIKE '%ter')\n)" + }, + { + "select * from kql(Customers | where FirstName !endswith_cs 'ter');" + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT endsWith(FirstName, 'ter')\n)" + }, + { + "select * from kql(Customers | where FirstName != 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE FirstName != 'Peter'\n)" + }, + { + "select * from kql(Customers | where FirstName !has 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT ifNull(hasTokenCaseInsensitiveOrNull(FirstName, 'Peter'), FirstName = 'Peter')\n)" + }, + { + "select * from kql(Customers | where FirstName !has_cs 'peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT ifNull(hasTokenOrNull(FirstName, 'peter'), FirstName = 'peter')\n)" + }, + { + "select * from kql(Customers | where FirstName !hasprefix 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (NOT (FirstName ILIKE 'Peter%')) AND (NOT (FirstName ILIKE '% Peter%'))\n)" + }, + { + "select * from kql(Customers | where FirstName !hasprefix_cs 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (NOT startsWith(FirstName, 'Peter')) AND (NOT (FirstName LIKE '% Peter%'))\n)" + }, + { + "select * from kql(Customers | where FirstName !hassuffix 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (NOT (FirstName ILIKE '%Peter')) AND (NOT (FirstName ILIKE '%Peter %'))\n)" + }, + { + "select * from kql(Customers | where FirstName !hassuffix_cs 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (NOT endsWith(FirstName, 'Peter')) AND (NOT (FirstName LIKE '%Peter %'))\n)" + }, + { + "select * from kql(Customers | where FirstName !startswith 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName ILIKE 'Peter%')\n)" + }, + { + "select * from kql(Customers | where FirstName !startswith_cs 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT startsWith(FirstName, 'Peter')\n)" + }, + { + "select * from kql(print t = 'a' in~ ('A', 'b', 'c'))", + "SELECT *\nFROM\n(\n SELECT lower('a') IN (lower('A'), lower('b'), lower('c')) AS t\n)" + }, + { + "select * from kql(Customers | where FirstName in~ ('peter', 'apple'))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) IN (lower('peter'), lower('apple'))\n)" + }, + { + "select * from kql(Customers | where FirstName in~ ((Customers | project FirstName | where FirstName == 'Peter')))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE FirstName = 'Peter'\n )\n)" + }, + { + "select * from kql(Customers | where FirstName in~ ((Customers | project FirstName | where Age < 30)))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE Age < 30\n )\n)" + }, + { + "select * from kql(print t = 'a' !in~ ('A', 'b', 'c'))", + "SELECT *\nFROM\n(\n SELECT lower('a') NOT IN (lower('A'), lower('b'), lower('c')) AS t\n)" + }, + { + "select * from kql(print t = 'a' !in~ (dynamic(['A', 'b', 'c'])))", + "SELECT *\nFROM\n(\n SELECT lower('a') NOT IN (lower('A'), lower('b'), lower('c')) AS t\n)" + }, + { + "select * from kql(Customers | where FirstName !in~ ('peter', 'apple'))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) NOT IN (lower('peter'), lower('apple'))\n)" + }, + { + "select * from kql(Customers | where FirstName !in~ ((Customers | project FirstName | where FirstName == 'Peter')))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) NOT IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE FirstName = 'Peter'\n )\n)" + }, + { + "select * from kql(Customers | where FirstName !in~ ((Customers | project FirstName | where Age < 30)))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) NOT IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE Age < 30\n )\n)" + }, + { + "select * from kql(Customers | where FirstName =~ 'peter' and LastName =~ 'naRA')", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (lower(FirstName) = lower('peter')) AND (lower(LastName) = lower('naRA'))\n)" + }, + { + "select * from kql(Customers | where FirstName !~ 'nEyMaR' and LastName =~ 'naRA')", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (lower(FirstName) != lower('nEyMaR')) AND (lower(LastName) = lower('naRA'))\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_range.cpp b/src/Parsers/tests/KQL/gtest_KQL_range.cpp new file mode 100644 index 000000000000..decc34c6a600 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_range.cpp @@ -0,0 +1,81 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Range, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print range(1, 10, 2)", + "SELECT kql_range(1, 10, 2)" + }, + { + "print range(1, 10)", + "SELECT kql_range(1, 10)" + }, + { + "print range(1.2, 10.3, 2.2)", + "SELECT kql_range(1.2, 10.3, 2.2)" + }, + { + "print range(1.2, 10.3, 2)", + "SELECT kql_range(1.2, 10.3, 2)" + }, + { + "print range(1.2, 10,2.2)", + "SELECT kql_range(1.2, 10, 2.2)" + }, + { + "print range(1, 10, 2.2)", + "SELECT kql_range(1, 10, 2.2)" + }, + { + "print range(1, 10.5, 2.2)", + "SELECT kql_range(1, 10.5, 2.2)" + }, + { + "print range(1.1, 10 ,2.2)", + "SELECT kql_range(1.1, 10, 2.2)" + }, + { + "print range(1.2, 10, 2)", + "SELECT kql_range(1.2, 10, 2)" + }, + { + "print range(datetime('2001-01-01'), datetime('2001-01-02'), 5h)", + "SELECT kql_range(kql_datetime('2001-01-01'), kql_datetime('2001-01-02'), toIntervalNanosecond(18000000000000))" + }, + { + "print range(datetime('2001-01-01'), datetime('2001-01-02'))", + "SELECT kql_range(kql_datetime('2001-01-01'), kql_datetime('2001-01-02'))" + }, + { + "print range(1h, 5h, 2h)", + "SELECT kql_range(toIntervalNanosecond(3600000000000), toIntervalNanosecond(18000000000000), toIntervalNanosecond(7200000000000))" + }, + { + "print range(1.5h, 5h, 2h)", + "SELECT kql_range(toIntervalNanosecond(5400000000000), toIntervalNanosecond(18000000000000), toIntervalNanosecond(7200000000000))" + }, + { + "print range(ago(1d),now(),1d)", + "SELECT kql_range(now64(9, 'UTC') + (-1 * toIntervalNanosecond(86400000000000)), now64(9, 'UTC'), toIntervalNanosecond(86400000000000))" + }, + { + "print range(endofday(datetime(2017-01-01 10:10:17)), endofday(datetime(2017-01-03 10:10:17)), 1d)", + "SELECT kql_range(kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-01 10:10:17')), 0 + 1)) - toIntervalNanosecond(100), kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-03 10:10:17')), 0 + 1)) - toIntervalNanosecond(100), toIntervalNanosecond(86400000000000))" + }, + { + "range Age from 20 to 25 step 1", + "SELECT *\nFROM\n(\n SELECT kql_range(20, 25, 1) AS Age\n)\nARRAY JOIN Age" + }, + { + "range LastWeek from ago(7d) to now() step 1d", + "SELECT *\nFROM\n(\n SELECT kql_range(now64(9, 'UTC') + (-1 * toIntervalNanosecond(604800000000000)), now64(9, 'UTC'), toIntervalNanosecond(86400000000000)) AS LastWeek\n)\nARRAY JOIN LastWeek" + }, + { + "range FirstWeek from datetime('2023-01-01') to datetime('2023-01-07') step 1d", + "SELECT *\nFROM\n(\n SELECT kql_range(kql_datetime('2023-01-01'), kql_datetime('2023-01-07'), toIntervalNanosecond(86400000000000)) AS FirstWeek\n)\nARRAY JOIN FirstWeek" + } +}))); diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index b452bd276429..6bfff1973b8f 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -1,9 +1,9 @@ +#include #include #include #include #include #include -#include #include #include #include @@ -13,9 +13,6 @@ #include #include #include -#include -#include -#include namespace { @@ -23,13 +20,6 @@ using namespace DB; using namespace std::literals; } - -struct ParserTestCase -{ - const std::string_view input_text; - const char * expected_ast = nullptr; -}; - std::ostream & operator<<(std::ostream & ostr, const std::shared_ptr parser) { return ostr << "Parser: " << parser->getName(); @@ -40,51 +30,6 @@ std::ostream & operator<<(std::ostream & ostr, const ParserTestCase & test_case) return ostr << "ParserTestCase input: " << test_case.input_text; } -class ParserTest : public ::testing::TestWithParam, ParserTestCase>> -{}; - -TEST_P(ParserTest, parseQuery) -{ - const auto & parser = std::get<0>(GetParam()); - const auto & [input_text, expected_ast] = std::get<1>(GetParam()); - - ASSERT_NE(nullptr, parser); - - if (expected_ast) - { - if (std::string(expected_ast).starts_with("throws")) - { - EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } - else - { - ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); - if (std::string("CREATE USER or ALTER USER query") != parser->getName() - && std::string("ATTACH access entity query") != parser->getName()) - { - EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); - } - else - { - if (input_text.starts_with("ATTACH")) - { - auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); - EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); - } - else - { - EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); - } - } - } - } - else - { - ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } -} - INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), @@ -326,10 +271,6 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" }, - { - "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", - "SELECT\n FirstName,\n LastName,\n Education\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" - }, { "Customers | sort by FirstName desc", "SELECT *\nFROM Customers\nORDER BY FirstName DESC" @@ -412,23 +353,23 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers |summarize count() by Occupation", - "SELECT\n count(),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n count() AS count_\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize sum(Age) by Occupation", - "SELECT\n sum(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n sum(Age) AS sum_Age\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize avg(Age) by Occupation", - "SELECT\n avg(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n avg(Age) AS avg_Age\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize min(Age) by Occupation", - "SELECT\n min(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n min(Age) AS min_Age\nFROM Customers\nGROUP BY Occupation" }, { "Customers |summarize max(Age) by Occupation", - "SELECT\n max(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n max(Age) AS max_Age\nFROM Customers\nGROUP BY Occupation" }, { "Customers | where FirstName contains 'pet'", @@ -446,32 +387,163 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | where FirstName !endswith 'er'", "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" }, + { + "Customers | where FirstName matches regex 'P.*r'", + "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" + }, + { + "Customers | where Age in ((Customers|project Age|where Age < 30))", + "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" + }, { "Customers | where Education has 'School'", - "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')" + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenCaseInsensitiveOrNull(Education, 'School'), Education = 'School')" }, { "Customers | where Education !has 'School'", - "SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')" + "SELECT *\nFROM Customers\nWHERE NOT ifNull(hasTokenCaseInsensitiveOrNull(Education, 'School'), Education = 'School')" }, { "Customers | where Education has_cs 'School'", - "SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')" + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenOrNull(Education, 'School'), Education = 'School')" }, { "Customers | where Education !has_cs 'School'", - "SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')" + "SELECT *\nFROM Customers\nWHERE NOT ifNull(hasTokenOrNull(Education, 'School'), Education = 'School')" }, { - "Customers | where FirstName matches regex 'P.*r'", - "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + "Customers|where Occupation has_any ('Skilled','abcd')", + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'Skilled'), Occupation = 'Skilled') OR ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'abcd'), Occupation = 'abcd')" }, { - "Customers | where FirstName startswith 'pet'", - "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + "Customers|where Occupation has_all ('Skilled','abcd')", + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'Skilled'), Occupation = 'Skilled') AND ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'abcd'), Occupation = 'abcd')" }, { - "Customers | where FirstName !startswith 'pet'", - "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" - } + "Customers|where Occupation has_all (strcat('Skill','ed'),'Manual')", + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenCaseInsensitiveOrNull(Occupation, concat(ifNull(kql_tostring('Skill'), ''), ifNull(kql_tostring('ed'), ''), '')), Occupation = concat(ifNull(kql_tostring('Skill'), ''), ifNull(kql_tostring('ed'), ''), '')) AND ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'Manual'), Occupation = 'Manual')" + }, + { + "Customers | where Occupation == strcat('Pro','fessional') | take 1", + "SELECT *\nFROM Customers\nWHERE Occupation = concat(ifNull(kql_tostring('Pro'), ''), ifNull(kql_tostring('fessional'), ''), '')\nLIMIT 1" + }, + { + "Customers | project countof('The cat sat on the mat', 'at')", + "SELECT countSubstrings('The cat sat on the mat', 'at')\nFROM Customers" + }, + { + "Customers | project countof('The cat sat on the mat', 'at', 'normal')", + "SELECT countSubstrings('The cat sat on the mat', 'at')\nFROM Customers" + }, + { + "Customers | project countof('The cat sat on the mat', 'at', 'regex')", + "SELECT countMatches('The cat sat on the mat', 'at')\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 10')", + "SELECT kql_extract('The price of PINEAPPLE ice cream is 10', '(\\b[A-Z]+\\b).+(\\b\\\\d+)', 0)\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20')", + "SELECT kql_extract('The price of PINEAPPLE ice cream is 20', '(\\b[A-Z]+\\b).+(\\b\\\\d+)', 1)\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 30')", + "SELECT kql_extract('The price of PINEAPPLE ice cream is 30', '(\\b[A-Z]+\\b).+(\\b\\\\d+)', 2)\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 40', typeof(int))", + "SELECT accurateCastOrNull(kql_extract('The price of PINEAPPLE ice cream is 40', '(\\b[A-Z]+\\b).+(\\b\\\\d+)', 2), 'Int32')\nFROM Customers" + }, + { + "Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 50')", + "SELECT extractAllGroups('The price of PINEAPPLE ice cream is 50', '(\\\\w)(\\\\w+)(\\\\w)')\nFROM Customers" + }, + { + " Customers | project split('aa_bb', '_')", + "SELECT if(empty('_'), splitByString(' ', 'aa_bb'), splitByString('_', 'aa_bb'))\nFROM Customers" + }, + { + "Customers | project split('aaa_bbb_ccc', '_', 1)", + "SELECT multiIf((length(if(empty('_'), splitByString(' ', 'aaa_bbb_ccc'), splitByString('_', 'aaa_bbb_ccc'))) >= 2) AND (2 > 0), arrayPushBack([], if(empty('_'), splitByString(' ', 'aaa_bbb_ccc'), splitByString('_', 'aaa_bbb_ccc'))[2]), 2 = 0, if(empty('_'), splitByString(' ', 'aaa_bbb_ccc'), splitByString('_', 'aaa_bbb_ccc')), arrayPushBack([], NULL[1]))\nFROM Customers" + }, + { + "Customers | project strcat_delim('-', '1', '2', 'A')", + "SELECT concat('1', '-', '2', '-', 'A')\nFROM Customers" + }, + { + "print x=1, s=strcat('Hello', ', ', 'World!')", + "SELECT\n 1 AS x,\n concat(ifNull(kql_tostring('Hello'), ''), ifNull(kql_tostring(', '), ''), ifNull(kql_tostring('World!'), ''), '') AS s" + }, + { + "print parse_urlquery('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')", + "SELECT concat('{', concat('\"Query Parameters\":', concat('{\"', replace(replace(if(position('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment', '?') > 0, queryString('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), 'https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '=', '\":\"'), '&', '\",\"'), '\"}')), '}')" + }, + { + "print strcmp('a','b')", + "SELECT multiIf('a' = 'b', 0, 'a' < 'b', -1, 1)" + }, + { + "print parse_url('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')", + "SELECT concat('{', concat('\"Scheme\":\"', protocol('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), ',', concat('\"Host\":\"', domain('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), ',', concat('\"Port\":\"', toString(port('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')), '\"'), ',', concat('\"Path\":\"', path('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), ',', concat('\"Username\":\"', splitByChar(':', splitByChar('@', netloc('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'))[1])[1], '\"'), ',', concat('\"Password\":\"', splitByChar(':', splitByChar('@', netloc('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'))[1])[2], '\"'), ',', concat('\"Query Parameters\":', concat('{\"', replace(replace(queryString('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '=', '\":\"'), '&', '\",\"'), '\"}')), ',', concat('\"Fragment\":\"', fragment('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), '}')" + },{ + "Customers | summarize t = make_list(FirstName) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(FirstName, FirstName IS NOT NULL) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list(FirstName, 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(10)(FirstName, FirstName IS NOT NULL) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(10)(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set(FirstName) by FirstName", + "SELECT\n FirstName,\n groupUniqArray(FirstName) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set(FirstName, 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArray(10)(FirstName) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArrayIf(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArrayIf(10)(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "print output = dynamic([1, 2, 3])", + "SELECT [1, 2, 3] AS output" + }, + { + "print output = dynamic(['a', 'b', 'c'])", + "SELECT ['a', 'b', 'c'] AS output" + }, + { + "T | extend duration = endTime - startTime", + "SELECT\n * EXCEPT duration,\n endTime - startTime AS duration\nFROM T" + }, + { + "T |project endTime, startTime | extend duration = endTime - startTime", + "SELECT\n * EXCEPT duration,\n endTime - startTime AS duration\nFROM\n(\n SELECT\n endTime,\n startTime\n FROM T\n)" + }, + { + "T | extend c =c*2, b-a, d = a +b , a*b", + "SELECT\n * EXCEPT c EXCEPT d,\n c * 2 AS c,\n b - a AS Column1,\n a + b AS d,\n a * b AS Column2\nFROM T" + } }))); diff --git a/src/Parsers/tests/gtest_common.cpp b/src/Parsers/tests/gtest_common.cpp new file mode 100644 index 000000000000..c9efdbe105c8 --- /dev/null +++ b/src/Parsers/tests/gtest_common.cpp @@ -0,0 +1,64 @@ +#include "gtest_common.h" + +#include +#include +#include + +#include + +#include + +TEST_P(ParserTest, parseQuery) +{ + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + + ASSERT_NE(nullptr, parser); + + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + DB::ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +TEST_P(ParserRegexTest, parseQuery) +{ + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + + ASSERT_TRUE(parser); + ASSERT_TRUE(expected_ast); + + DB::ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + EXPECT_THAT(serializeAST(*ast->clone(), false), ::testing::MatchesRegex(expected_ast)); +} diff --git a/src/Parsers/tests/gtest_common.h b/src/Parsers/tests/gtest_common.h new file mode 100644 index 000000000000..4eca9390d92c --- /dev/null +++ b/src/Parsers/tests/gtest_common.h @@ -0,0 +1,17 @@ +#include + +#include + +#include + +struct ParserTestCase +{ + const std::string_view input_text; + const char * expected_ast = nullptr; +}; + +class ParserTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +class ParserRegexTest : public ::testing::TestWithParam, ParserTestCase>> +{}; diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 86039342c492..4fd6e7c11ddd 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -79,9 +79,9 @@ Block generateOutputHeader(const Block & input_header, const Names & keys, bool } -static Block appendGroupingColumn(Block block, const Names & keys, const GroupingSetsParamsList & params, bool use_nulls) +Block AggregatingStep::appendGroupingColumn(Block block, const Names & keys, bool has_grouping, bool use_nulls) { - if (params.empty()) + if (!has_grouping) return block; return generateOutputHeader(block, keys, use_nulls); @@ -104,7 +104,7 @@ AggregatingStep::AggregatingStep( bool memory_bound_merging_of_aggregation_results_enabled_) : ITransformingStep( input_stream_, - appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, grouping_sets_params_, group_by_use_nulls_), + appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, !grouping_sets_params_.empty(), group_by_use_nulls_), getTraits(should_produce_results_in_order_of_bucket_number_), false) , params(std::move(params_)) @@ -469,7 +469,7 @@ void AggregatingStep::updateOutputStream() { output_stream = createOutputStream( input_streams.front(), - appendGroupingColumn(params.getHeader(input_streams.front().header, final), params.keys, grouping_sets_params, group_by_use_nulls), + appendGroupingColumn(params.getHeader(input_streams.front().header, final), params.keys, !grouping_sets_params.empty(), group_by_use_nulls), getDataStreamTraits()); } diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 9cb56432797b..0dc06649d2d6 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -42,6 +42,8 @@ class AggregatingStep : public ITransformingStep bool should_produce_results_in_order_of_bucket_number_, bool memory_bound_merging_of_aggregation_results_enabled_); + static Block appendGroupingColumn(Block block, const Names & keys, bool has_grouping, bool use_nulls); + String getName() const override { return "Aggregating"; } void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; diff --git a/src/Processors/Transforms/AddingDefaultsTransform.cpp b/src/Processors/Transforms/AddingDefaultsTransform.cpp index f55d4d88ae88..8299120d2c68 100644 --- a/src/Processors/Transforms/AddingDefaultsTransform.cpp +++ b/src/Processors/Transforms/AddingDefaultsTransform.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Server/CertificateReloader.cpp b/src/Server/CertificateReloader.cpp index ba23414e9cff..49c6ce6c6304 100644 --- a/src/Server/CertificateReloader.cpp +++ b/src/Server/CertificateReloader.cpp @@ -8,7 +8,7 @@ #include #include - +#update testing namespace DB { diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index f018a7112845..d6cbb4fc38aa 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -241,6 +241,10 @@ class IStorage : public std::enable_shared_from_this, public TypePromo /// Return true if storage can execute lightweight delete mutations. virtual bool supportsLightweightDelete() const { return false; } + /// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete + /// because those are internally translated into 'ALTER UDPATE' mutations. + virtual bool supportsDelete() const { return false; } + private: StorageID storage_id; diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index bd7e3a64749b..5b6b0f09bc39 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -68,12 +68,13 @@ IMergeTreeSelectAlgorithm::IMergeTreeSelectAlgorithm( size_t non_const_columns_offset = header_without_const_virtual_columns.columns(); injectNonConstVirtualColumns(0, header_without_const_virtual_columns, virt_column_names); - /// Reverse order is to minimize reallocations when removing columns from the block for (size_t col_num = non_const_columns_offset; col_num < header_without_const_virtual_columns.columns(); ++col_num) non_const_virtual_column_names.emplace_back(header_without_const_virtual_columns.getByPosition(col_num).name); result_header = header_without_const_virtual_columns; injectPartConstVirtualColumns(0, result_header, nullptr, partition_value_type, virt_column_names); + + LOG_TEST(log, "PREWHERE actions: {}", (prewhere_actions ? prewhere_actions->dump() : std::string(""))); } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 1a5a4d918067..6f8da624449e 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -30,13 +30,17 @@ namespace ErrorCodes } -static void filterColumns(Columns & columns, const IColumn::Filter & filter) +static void filterColumns(Columns & columns, const IColumn::Filter & filter, size_t filter_bytes) { for (auto & column : columns) { if (column) { - column = column->filter(filter, -1); + if (column->size() != filter.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of column {} doesn't match size of filter {}", + column->size(), filter.size()); + + column = column->filter(filter, filter_bytes); if (column->empty()) { @@ -47,13 +51,12 @@ static void filterColumns(Columns & columns, const IColumn::Filter & filter) } } -static void filterColumns(Columns & columns, const ColumnPtr & filter) +static void filterColumns(Columns & columns, const FilterWithCachedCount & filter) { - ConstantFilterDescription const_descr(*filter); - if (const_descr.always_true) + if (filter.alwaysTrue()) return; - if (const_descr.always_false) + if (filter.alwaysFalse()) { for (auto & col : columns) if (col) @@ -62,8 +65,7 @@ static void filterColumns(Columns & columns, const ColumnPtr & filter) return; } - FilterDescription descr(*filter); - filterColumns(columns, *descr.data); + filterColumns(columns, filter.getData(), filter.countBytesInFilter()); } @@ -320,11 +322,13 @@ void MergeTreeRangeReader::ReadResult::clear() num_rows_to_skip_in_last_granule += rows_per_granule.back(); rows_per_granule.assign(rows_per_granule.size(), 0); total_rows_per_granule = 0; - filter_holder = nullptr; - filter = nullptr; + final_filter = FilterWithCachedCount(); + num_rows = 0; + columns.clear(); + additional_columns.clear(); } -void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns) +void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns, const NumRows & rows_per_granule_previous) const { for (auto & column : old_columns) { @@ -337,9 +341,12 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns) continue; } + LOG_TEST(log, "ReadResult::shrink() column size: {} total_rows_per_granule: {}", + column->size(), total_rows_per_granule); + auto new_column = column->cloneEmpty(); new_column->reserve(total_rows_per_granule); - for (size_t j = 0, pos = 0; j < rows_per_granule_original.size(); pos += rows_per_granule_original[j++]) + for (size_t j = 0, pos = 0; j < rows_per_granule_previous.size(); pos += rows_per_granule_previous[j++]) { if (rows_per_granule[j]) new_column->insertRangeFrom(*column, pos, rows_per_granule[j]); @@ -348,74 +355,267 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns) } } +/// The main invariant of the data in the read result is that he number of rows is +/// either equal to total_rows_per_granule (if filter has not been applied) or to the number of +/// 1s in the filter (if filter has been applied). +void MergeTreeRangeReader::ReadResult::checkInternalConsistency() const +{ + /// Check that filter size matches number of rows that will be read. + if (final_filter.present() && final_filter.size() != total_rows_per_granule) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Final filter size {} doesn't match total_rows_per_granule {}", + final_filter.size(), total_rows_per_granule); + + /// Check that num_rows is consistent with final_filter and rows_per_granule. + if (final_filter.present() && final_filter.countBytesInFilter() != num_rows && total_rows_per_granule != num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Number of rows {} doesn't match neither filter 1s count {} nor total_rows_per_granule {}", + num_rows, final_filter.countBytesInFilter(), total_rows_per_granule); + + /// Check that additional columns have the same number of rows as the main columns. + if (additional_columns && additional_columns.rows() != num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Number of rows in additional columns {} is not equal to number of rows in result columns {}", + additional_columns.rows(), num_rows); + + for (const auto & column : columns) + { + if (column) + chassert(column->size() == num_rows); + } +} + +std::string MergeTreeRangeReader::ReadResult::dumpInfo() const +{ + WriteBufferFromOwnString out; + out << "num_rows: " << num_rows + << ", columns: " << columns.size() + << ", total_rows_per_granule: " << total_rows_per_granule; + if (final_filter.present()) + { + out << ", filter size: " << final_filter.size() + << ", filter 1s: " << final_filter.countBytesInFilter(); + } + else + { + out << ", no filter"; + } + for (size_t ci = 0; ci < columns.size(); ++ci) + { + out << ", column[" << ci << "]: "; + if (!columns[ci]) + out << " nullptr"; + else + { + out << " " << columns[ci]->dumpStructure(); + } + } + if (additional_columns) + { + out << ", additional_columns: " << additional_columns.dumpStructure(); + } + return out.str(); +} + +static std::string dumpNames(const NamesAndTypesList & columns) +{ + WriteBufferFromOwnString out; + for (auto it = columns.begin(); it != columns.end(); ++it) + { + if (it != columns.begin()) + out << ", "; + out << it->name; + } + return out.str(); +} + void MergeTreeRangeReader::ReadResult::setFilterConstTrue() { - clearFilter(); - filter_holder = DataTypeUInt8().createColumnConst(num_rows, 1u); + /// Remove the filter, so newly read columns will not be filtered. + final_filter = FilterWithCachedCount(); } -void MergeTreeRangeReader::ReadResult::setFilterConstFalse() +static ColumnPtr andFilters(ColumnPtr c1, ColumnPtr c2) { - clearFilter(); - columns.clear(); - num_rows = 0; + if (c1->size() != c2->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of filters don't match: {} and {}", + c1->size(), c2->size()); + + // TODO: use proper vectorized implementation of AND? + auto res = ColumnUInt8::create(c1->size()); + auto & res_data = res->getData(); + const auto & c1_data = typeid_cast(*c1).getData(); + const auto & c2_data = typeid_cast(*c2).getData(); + const size_t size = c1->size(); + const size_t step = 16; + size_t i = 0; + /// NOTE: '&&' must be used instead of '&' for 'AND' operation because UInt8 columns might contain any non-zero + /// value for true and we cannot bitwise AND them to get the correct result. + for (; i + step < size; i += step) + for (size_t j = 0; j < step; ++j) + res_data[i+j] = (c1_data[i+j] && c2_data[i+j]); + for (; i < size; ++i) + res_data[i] = (c1_data[i] && c2_data[i]); + return res; +} + +static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second); + +void MergeTreeRangeReader::ReadResult::applyFilter(const FilterWithCachedCount & filter) +{ + if (filter.size() != num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter size {} doesn't match number of rows {}", + filter.size(), num_rows); + + LOG_TEST(log, "ReadResult::applyFilter() num_rows before: {}", num_rows); + + filterColumns(columns, filter); + + { + auto tmp_columns = additional_columns.getColumns(); + filterColumns(tmp_columns, filter); + if (!tmp_columns.empty()) + additional_columns.setColumns(tmp_columns); + else + additional_columns.clear(); + } + + num_rows = filter.countBytesInFilter(); + + LOG_TEST(log, "ReadResult::applyFilter() num_rows after: {}", num_rows); } -void MergeTreeRangeReader::ReadResult::optimize(bool can_read_incomplete_granules, bool allow_filter_columns) +void MergeTreeRangeReader::ReadResult::optimize(const FilterWithCachedCount & current_filter, bool can_read_incomplete_granules) { - if (total_rows_per_granule == 0 || filter == nullptr) + checkInternalConsistency(); + + /// Combine new filter with the previous one if it is present. + /// This filter has the size of total_rows_per granule. It is applied after reading contiguous chunks from + /// the start of each granule. + FilterWithCachedCount filter = current_filter; + if (final_filter.present()) + { + /// If current filter has the same size as the final filter, it means that the final filter has not been applied. + /// In this case we AND current filter with the existing final filter. + /// In other case, when the final filter has been applied, the size of current step filter will be equal to number of ones + /// in the final filter. In this case we combine current filter with the final filter. + ColumnPtr combined_filter; + if (current_filter.size() == final_filter.size()) + combined_filter = andFilters(final_filter.getColumn(), current_filter.getColumn()); + else + combined_filter = combineFilters(final_filter.getColumn(), current_filter.getColumn()); + + filter = FilterWithCachedCount(combined_filter); + } + + if (total_rows_per_granule == 0 || !filter.present()) return; NumRows zero_tails; - auto total_zero_rows_in_tails = countZeroTails(filter->getData(), zero_tails, can_read_incomplete_granules); + auto total_zero_rows_in_tails = countZeroTails(filter.getData(), zero_tails, can_read_incomplete_granules); - if (total_zero_rows_in_tails == filter->size()) + LOG_TEST(log, "ReadResult::optimize() before: {}", dumpInfo()); + + SCOPE_EXIT( + if (!std::uncaught_exceptions()) + { + checkInternalConsistency(); + LOG_TEST(log, "ReadResult::optimize() after: {}", dumpInfo()); + } + ); + + if (total_zero_rows_in_tails == filter.size()) { + LOG_TEST(log, "ReadResult::optimize() combined filter is const False"); clear(); return; } - else if (total_zero_rows_in_tails == 0 && countBytesInResultFilter(filter->getData()) == filter->size()) + else if (total_zero_rows_in_tails == 0 && filter.countBytesInFilter() == filter.size()) { + LOG_TEST(log, "ReadResult::optimize() combined filter is const True"); setFilterConstTrue(); return; } /// Just a guess. If only a few rows may be skipped, it's better not to skip at all. - else if (2 * total_zero_rows_in_tails > filter->size()) + else if (2 * total_zero_rows_in_tails > filter.size()) { + const NumRows rows_per_granule_previous = rows_per_granule; + const size_t total_rows_per_granule_previous = total_rows_per_granule; + for (auto i : collections::range(0, rows_per_granule.size())) { - rows_per_granule_original.push_back(rows_per_granule[i]); rows_per_granule[i] -= zero_tails[i]; } - num_rows_to_skip_in_last_granule += rows_per_granule_original.back() - rows_per_granule.back(); + num_rows_to_skip_in_last_granule += rows_per_granule_previous.back() - rows_per_granule.back(); + total_rows_per_granule = total_rows_per_granule_previous - total_zero_rows_in_tails; + + /// Check if const 1 after shrink. + /// We can apply shrink only if after the previous step the number of rows in the result + /// matches the rows_per_granule info. Otherwise we will not be able to match newly added zeros in granule tails. + if (num_rows == total_rows_per_granule_previous && + filter.countBytesInFilter() + total_zero_rows_in_tails == total_rows_per_granule_previous) /// All zeros are in tails? + { + setFilterConstTrue(); - filter_original = filter; - filter_holder_original = std::move(filter_holder); + /// If all zeros are in granule tails, we can use shrink to filter out rows. + shrink(columns, rows_per_granule_previous); /// shrink acts as filtering in such case + auto c = additional_columns.getColumns(); + shrink(c, rows_per_granule_previous); + additional_columns.setColumns(c); - /// Check if const 1 after shrink - if (allow_filter_columns && countBytesInResultFilter(filter->getData()) + total_zero_rows_in_tails == total_rows_per_granule) - { - total_rows_per_granule = total_rows_per_granule - total_zero_rows_in_tails; num_rows = total_rows_per_granule; - setFilterConstTrue(); - shrink(columns); /// shrink acts as filtering in such case + + LOG_TEST(log, "ReadResult::optimize() after shrink {}", dumpInfo()); } else { - auto new_filter = ColumnUInt8::create(filter->size() - total_zero_rows_in_tails); + auto new_filter = ColumnUInt8::create(filter.size() - total_zero_rows_in_tails); IColumn::Filter & new_data = new_filter->getData(); - collapseZeroTails(filter->getData(), new_data); - total_rows_per_granule = new_filter->size(); - num_rows = total_rows_per_granule; - filter = new_filter.get(); - filter_holder = std::move(new_filter); + /// Shorten the filter by removing zeros from granule tails + collapseZeroTails(filter.getData(), rows_per_granule_previous, new_data); + if (total_rows_per_granule != new_filter->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "New filter size {} doesn't match number of rows to be read {}", + new_filter->size(), total_rows_per_granule); + + /// Need to apply combined filter here before replacing it with shortened one because otherwise + /// the filter size will not match the number of rows in the result columns. + if (num_rows == total_rows_per_granule_previous) + { + /// Filter from the previous steps has not been applied yet, do it now. + applyFilter(filter); + } + else + { + /// Filter was applied before, so apply only new filter from the current step. + applyFilter(current_filter); + } + + final_filter = FilterWithCachedCount(new_filter->getPtr()); + if (num_rows != final_filter.countBytesInFilter()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Count of 1s in final filter {} doesn't match number of rows {}", + final_filter.countBytesInFilter(), num_rows); + + LOG_TEST(log, "ReadResult::optimize() after colapseZeroTails {}", dumpInfo()); } - need_filter = true; } - /// Another guess, if it's worth filtering at PREWHERE - else if (countBytesInResultFilter(filter->getData()) < 0.6 * filter->size()) - need_filter = true; + else + { + /// Check if we have rows already filtered at the previous step. In such case we must apply the filter because + /// otherwise num_rows doesn't match total_rows_per_granule and the next read step will not know how to filter + /// newly read columns to match the num_rows. + if (num_rows != total_rows_per_granule) + { + applyFilter(current_filter); + } + /// Another guess, if it's worth filtering at PREWHERE + else if (filter.countBytesInFilter() < 0.6 * filter.size()) + { + applyFilter(filter); + } + + final_filter = std::move(filter); + } } size_t MergeTreeRangeReader::ReadResult::countZeroTails(const IColumn::Filter & filter_vec, NumRows & zero_tails, bool can_read_incomplete_granules) const @@ -441,7 +641,7 @@ size_t MergeTreeRangeReader::ReadResult::countZeroTails(const IColumn::Filter & return total_zero_rows_in_tails; } -void MergeTreeRangeReader::ReadResult::collapseZeroTails(const IColumn::Filter & filter_vec, IColumn::Filter & new_filter_vec) +void MergeTreeRangeReader::ReadResult::collapseZeroTails(const IColumn::Filter & filter_vec, const NumRows & rows_per_granule_previous, IColumn::Filter & new_filter_vec) const { const auto * filter_data = filter_vec.data(); auto * new_filter_data = new_filter_vec.data(); @@ -449,7 +649,7 @@ void MergeTreeRangeReader::ReadResult::collapseZeroTails(const IColumn::Filter & for (auto i : collections::range(0, rows_per_granule.size())) { memcpySmallAllowReadWriteOverflow15(new_filter_data, filter_data, rows_per_granule[i]); - filter_data += rows_per_granule_original[i]; + filter_data += rows_per_granule_previous[i]; new_filter_data += rows_per_granule[i]; } @@ -597,54 +797,6 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con return count; } -/// Filter size must match total_rows_per_granule -void MergeTreeRangeReader::ReadResult::setFilter(const ColumnPtr & new_filter) -{ - if (!new_filter && filter) - throw Exception("Can't replace existing filter with empty.", ErrorCodes::LOGICAL_ERROR); - - if (filter) - { - size_t new_size = new_filter->size(); - - if (new_size != total_rows_per_granule) - throw Exception("Can't set filter because it's size is " + toString(new_size) + " but " - + toString(total_rows_per_granule) + " rows was read.", ErrorCodes::LOGICAL_ERROR); - } - - ConstantFilterDescription const_description(*new_filter); - if (const_description.always_true) - { - setFilterConstTrue(); - } - else if (const_description.always_false) - { - clear(); - } - else - { - FilterDescription filter_description(*new_filter); - filter_holder = filter_description.data_holder ? filter_description.data_holder : new_filter; - filter = typeid_cast(filter_holder.get()); - if (!filter) - throw Exception("setFilter function expected ColumnUInt8.", ErrorCodes::LOGICAL_ERROR); - } -} - - -size_t MergeTreeRangeReader::ReadResult::countBytesInResultFilter(const IColumn::Filter & filter_) -{ - auto it = filter_bytes_map.find(&filter_); - if (it == filter_bytes_map.end()) - { - auto bytes = countBytesInFilter(filter_); - filter_bytes_map[&filter_] = bytes; - return bytes; - } - else - return it->second; -} - MergeTreeRangeReader::MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, @@ -659,30 +811,37 @@ MergeTreeRangeReader::MergeTreeRangeReader( , is_initialized(true) { if (prev_reader) - sample_block = prev_reader->getSampleBlock(); + result_sample_block = prev_reader->getSampleBlock(); for (const auto & name_and_type : merge_tree_reader->getColumns()) - sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + { + read_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + result_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + } for (const auto & column_name : non_const_virtual_column_names_) { - if (sample_block.has(column_name)) + if (result_sample_block.has(column_name)) continue; non_const_virtual_column_names.push_back(column_name); - if (column_name == "_part_offset") - sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); + if (column_name == "_part_offset" && !prev_reader) + { + /// _part_offset column is filled by the first reader. + read_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); + result_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); + } } if (prewhere_info) { const auto & step = *prewhere_info; if (step.actions) - step.actions->execute(sample_block, true); + step.actions->execute(result_sample_block, true); if (step.remove_column) - sample_block.erase(step.column_name); + result_sample_block.erase(step.column_name); } } @@ -765,7 +924,13 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar if (max_rows == 0) throw Exception("Expected at least 1 row to read, got 0.", ErrorCodes::LOGICAL_ERROR); - ReadResult read_result; + ReadResult read_result(log); + + SCOPE_EXIT( + if (!std::uncaught_exceptions()) + LOG_TEST(log, "read() returned {}, sample block {}", + read_result.dumpInfo(), this->result_sample_block.dumpNames()); + ); if (prev_reader) { @@ -778,69 +943,52 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar if (read_result.num_rows == 0) return read_result; - bool has_columns = false; + /// Calculate and update read bytes size_t total_bytes = 0; for (auto & column : columns) { if (column) { total_bytes += column->byteSize(); - has_columns = true; } } - read_result.addNumBytesRead(total_bytes); - bool should_evaluate_missing_defaults = false; - - if (has_columns) + if (!columns.empty()) { - /// num_read_rows >= read_result.num_rows - /// We must filter block before adding columns to read_result.block - - /// Fill missing columns before filtering because some arrays from Nested may have empty data. - merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_read_rows); + /// If all requested columns are absent in part num_read_rows will be 0. + /// In this case we need to use number of rows in the result to fill the default values and don't filter block. + if (num_read_rows == 0) + num_read_rows = read_result.num_rows; + + /// fillMissingColumns() must be called after reading but befoe any filterings because + /// some columns (e.g. arrays) might be only partially filled and thus not be valid and + /// fillMissingColumns() fixes this. + bool should_evaluate_missing_defaults; + merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, + num_read_rows); - if (read_result.getFilter()) - filterColumns(columns, read_result.getFilter()->getData()); - } - else - { - size_t num_rows = read_result.num_rows; + if (read_result.total_rows_per_granule == num_read_rows && read_result.num_rows != num_read_rows) + { + /// We have filter applied from the previous step + /// So we need to apply it to the newly read rows + if (!read_result.final_filter.present() || read_result.final_filter.countBytesInFilter() != read_result.num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Final filter is missing or has mistaching size, read_result: {}", + read_result.dumpInfo()); - /// If block is empty, we still may need to add missing columns. - /// In that case use number of rows in result block and don't filter block. - if (num_rows) - merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_rows); - } + filterColumns(columns, read_result.final_filter); + } - if (!columns.empty()) - { /// If some columns absent in part, then evaluate default values if (should_evaluate_missing_defaults) { - auto block = prev_reader->sample_block.cloneWithColumns(read_result.columns); - auto block_before_prewhere = read_result.block_before_prewhere; - for (const auto & column : block) - { - if (block_before_prewhere.has(column.name)) - block_before_prewhere.erase(column.name); - } + Block additional_columns = prev_reader->getSampleBlock().cloneWithColumns(read_result.columns); + for (const auto & col : read_result.additional_columns) + additional_columns.insert(col); - if (block_before_prewhere) - { - if (read_result.need_filter) - { - auto old_columns = block_before_prewhere.getColumns(); - filterColumns(old_columns, read_result.getFilterOriginal()->getData()); - block_before_prewhere.setColumns(old_columns); - } - - for (auto & column : block_before_prewhere) - block.insert(std::move(column)); - } - merge_tree_reader->evaluateMissingDefaults(block, columns); + merge_tree_reader->evaluateMissingDefaults(additional_columns, columns); } + /// If columns not empty, then apply on-fly alter conversions if any required merge_tree_reader->performRequiredConversions(columns); } @@ -854,11 +1002,15 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar read_result = startReadingChain(max_rows, ranges); read_result.num_rows = read_result.numReadRows(); - if (read_result.num_rows) + LOG_TEST(log, "First reader returned: {}, requested columns: {}", + read_result.dumpInfo(), dumpNames(merge_tree_reader->getColumns())); + + if (read_result.num_rows == 0) + return read_result; + { /// Physical columns go first and then some virtual columns follow - /// TODO: is there a better way to account for virtual columns that were filled by previous readers? - size_t physical_columns_count = read_result.columns.size() - read_result.extra_columns_filled.size(); + size_t physical_columns_count = merge_tree_reader->getColumns().size(); Columns physical_columns(read_result.columns.begin(), read_result.columns.begin() + physical_columns_count); bool should_evaluate_missing_defaults; @@ -875,8 +1027,6 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar for (size_t i = 0; i < physical_columns.size(); ++i) read_result.columns[i] = std::move(physical_columns[i]); } - else - read_result.columns.clear(); size_t total_bytes = 0; for (auto & column : read_result.columns) @@ -885,18 +1035,35 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar read_result.addNumBytesRead(total_bytes); } - if (read_result.num_rows == 0) - return read_result; - executePrewhereActionsAndFilterColumns(read_result); + read_result.checkInternalConsistency(); + + if (!read_result.can_return_prewhere_column_without_filtering) + { + if (!read_result.filterWasApplied()) + { + /// TODO: another solution might be to set all 0s from final filter into the prewhere column and not filter all the columns here + /// but rely on filtering in WHERE. + read_result.applyFilter(read_result.final_filter); + read_result.checkInternalConsistency(); + } + + read_result.can_return_prewhere_column_without_filtering = true; + } + + if (read_result.num_rows != 0 && read_result.columns.size() != getSampleBlock().columns()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Number of columns in result doesn't match number of columns in sample block, read_result: {}, sample block: {}", + read_result.dumpInfo(), getSampleBlock().dumpStructure()); + return read_result; } - MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t max_rows, MarkRanges & ranges) { - ReadResult result; + ReadResult result(log); result.columns.resize(merge_tree_reader->getColumns().size()); size_t current_task_last_mark = getLastMark(ranges); @@ -946,14 +1113,11 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t result.addRows(stream.finalize(result.columns)); /// Last granule may be incomplete. - if (!result.rowsPerGranule().empty()) + if (!result.rows_per_granule.empty()) result.adjustLastGranule(); - for (const auto & column_name : non_const_virtual_column_names) - { - if (column_name == "_part_offset") - fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); - } + if (read_sample_block.has("_part_offset")) + fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); return result; } @@ -968,11 +1132,13 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead UInt64 * pos = vec.data(); UInt64 * end = &vec[num_rows]; + /// Fill the reamining part of the previous range (it was started in the previous read request). while (pos < end && leading_begin_part_offset < leading_end_part_offset) *pos++ = leading_begin_part_offset++; - const auto start_ranges = result.startedRanges(); + const auto & start_ranges = result.started_ranges; + /// Fill the ranges which were started in the current read request. for (const auto & start_range : start_ranges) { UInt64 start_part_offset = index_granularity->getMarkStartingRow(start_range.range.begin); @@ -983,7 +1149,6 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead } result.columns.emplace_back(std::move(column)); - result.extra_columns_filled.push_back("_part_offset"); } Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, size_t & num_rows) @@ -995,7 +1160,7 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si if (merge_tree_reader->getColumns().empty()) return columns; - if (result.rowsPerGranule().empty()) + if (result.rows_per_granule.empty()) { /// If zero rows were read on prev step, than there is no more rows to read. /// Last granule may have less rows than index_granularity, so finish reading manually. @@ -1005,8 +1170,8 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si columns.resize(merge_tree_reader->numColumnsInResult()); - const auto & rows_per_granule = result.rowsPerGranule(); - const auto & started_ranges = result.startedRanges(); + const auto & rows_per_granule = result.rows_per_granule; + const auto & started_ranges = result.started_ranges; size_t current_task_last_mark = ReadResult::getLastMark(started_ranges); size_t next_range_to_start = 0; @@ -1027,13 +1192,13 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si num_rows += stream.read(columns, rows_per_granule[i], !last); } - stream.skip(result.numRowsToSkipInLastGranule()); + stream.skip(result.num_rows_to_skip_in_last_granule); num_rows += stream.finalize(columns); /// added_rows may be zero if all columns were read in prewhere and it's ok. - if (num_rows && num_rows != result.totalRowsPerGranule()) + if (num_rows && num_rows != result.total_rows_per_granule) throw Exception("RangeReader read " + toString(num_rows) + " rows, but " - + toString(result.totalRowsPerGranule()) + " expected.", ErrorCodes::LOGICAL_ERROR); + + toString(result.total_rows_per_granule) + " expected.", ErrorCodes::LOGICAL_ERROR); return columns; } @@ -1047,7 +1212,7 @@ static void checkCombinedFiltersSize(size_t bytes_in_first_filter, size_t second } /// Second filter size must be equal to number of 1s in the first filter. -/// The result size is equal to first filter size. +/// The result has size equal to first filter size and contains 1s only where both filters contain 1s. static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) { ConstantFilterDescription first_const_descr(*first); @@ -1100,23 +1265,22 @@ static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) return mut_first; } -void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & result) +void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & result) const { + result.checkInternalConsistency(); + if (!prewhere_info) return; - const auto & header = merge_tree_reader->getColumns(); - size_t num_columns = header.size(); + const auto & header = read_sample_block; + size_t num_columns = header.columns(); /// Check that we have columns from previous steps and newly read required columns - if (result.columns.size() < num_columns + result.extra_columns_filled.size()) + if (result.columns.size() < num_columns) throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of columns passed to MergeTreeRangeReader. Expected {}, got {}", num_columns, result.columns.size()); - /// This filter has the size of total_rows_per granule. It is applied after reading contiguous chunks from - /// the start of each granule. - ColumnPtr combined_filter; /// Filter computed at the current step. Its size is equal to num_rows which is <= total_rows_per_granule ColumnPtr current_step_filter; size_t prewhere_column_pos; @@ -1138,35 +1302,28 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r for (auto name_and_type = header.begin(); name_and_type != header.end() && pos < result.columns.size(); ++pos, ++name_and_type) block.insert({result.columns[pos], name_and_type->type, name_and_type->name}); - for (const auto & column_name : non_const_virtual_column_names) { - if (block.has(column_name)) - continue; + /// Columns might be projected out. We need to store them here so that default columns can be evaluated later. + Block additional_columns = block; - if (column_name == "_part_offset") - { - if (pos >= result.columns.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Invalid number of columns passed to MergeTreeRangeReader. Expected {}, got {}", - num_columns, result.columns.size()); + if (prewhere_info->actions) + prewhere_info->actions->execute(block); - block.insert({result.columns[pos], std::make_shared(), column_name}); - } - else if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name) + result.additional_columns.clear(); + /// Additional columns might only be needed if there are more steps in the chain. + if (!last_reader_in_chain) { - /// Do nothing, it will be added later + for (auto & col : additional_columns) + { + /// Exclude columns that are present in the result block to avoid storing them and filtering twice. + /// TODO: also need to exclude the columns that are not needed for the next steps. + if (block.has(col.name)) + continue; + result.additional_columns.insert(col); + } } - else - throw Exception("Unexpected non-const virtual column: " + column_name, ErrorCodes::LOGICAL_ERROR); - ++pos; } - /// Columns might be projected out. We need to store them here so that default columns can be evaluated later. - result.block_before_prewhere = block; - - if (prewhere_info->actions) - prewhere_info->actions->execute(block); - prewhere_column_pos = block.getPositionByName(prewhere_info->column_name); result.columns.clear(); @@ -1174,90 +1331,38 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r for (auto & col : block) result.columns.emplace_back(std::move(col.column)); - current_step_filter.swap(result.columns[prewhere_column_pos]); - combined_filter = current_step_filter; + current_step_filter = result.columns[prewhere_column_pos]; } - if (result.getFilter()) + if (prewhere_info->remove_column) + result.columns.erase(result.columns.begin() + prewhere_column_pos); + else { - ColumnPtr prev_filter = result.getFilterHolder(); - combined_filter = combineFilters(prev_filter, std::move(combined_filter)); + /// In case when we are not removing prewhere column the caller expects it to serve as a final filter: + /// it must contain 0s not only from the current step but also from all the previous steps. + /// One way to achieve this is to apply the final_filter if we know that the final _filter was not applied at + /// several previous steps but was accumulated instead. + result.can_return_prewhere_column_without_filtering = + (!result.final_filter.present() || result.final_filter.countBytesInFilter() == result.num_rows); } - result.setFilter(combined_filter); - - /// If there is a WHERE, we filter in there, and only optimize IO and shrink columns here - if (!last_reader_in_chain) - result.optimize(merge_tree_reader->canReadIncompleteGranules(), true); + FilterWithCachedCount current_filter(current_step_filter); - /// If we read nothing or filter gets optimized to nothing - if (result.totalRowsPerGranule() == 0) - result.setFilterConstFalse(); - /// If we need to filter in PREWHERE - else if (prewhere_info->need_filter || result.need_filter) - { - /// If there is a filter and without optimized - if (result.getFilter() && last_reader_in_chain) - { - const auto * result_filter = result.getFilter(); - /// optimize is not called, need to check const 1 and const 0 - size_t bytes_in_filter = result.countBytesInResultFilter(result_filter->getData()); - if (bytes_in_filter == 0) - result.setFilterConstFalse(); - else if (bytes_in_filter == result.num_rows) - result.setFilterConstTrue(); - } - - /// If there is still a filter, do the filtering now - if (result.getFilter()) - { - /// filter might be shrunk while columns not - const auto * result_filter = result.getFilterOriginal(); - - filterColumns(result.columns, current_step_filter); - - result.need_filter = true; - - bool has_column = false; - for (auto & column : result.columns) - { - if (column) - { - has_column = true; - result.num_rows = column->size(); - break; - } - } + result.optimize(current_filter, merge_tree_reader->canReadIncompleteGranules()); - /// There is only one filter column. Record the actual number - if (!has_column) - result.num_rows = result.countBytesInResultFilter(result_filter->getData()); - } - - /// Check if the PREWHERE column is needed - if (!result.columns.empty()) - { - if (prewhere_info->remove_column) - result.columns.erase(result.columns.begin() + prewhere_column_pos); - else - result.columns[prewhere_column_pos] = - getSampleBlock().getByName(prewhere_info->column_name).type-> - createColumnConst(result.num_rows, 1u)->convertToFullColumnIfConst(); - } - } - /// Filter in WHERE instead - else + if (prewhere_info->need_filter && !result.filterWasApplied()) { - if (prewhere_info->remove_column) - result.columns.erase(result.columns.begin() + prewhere_column_pos); - else - { - auto type = getSampleBlock().getByName(prewhere_info->column_name).type; - ColumnWithTypeAndName col(result.getFilterHolder()->convertToFullColumnIfConst(), std::make_shared(), ""); - result.columns[prewhere_column_pos] = castColumn(col, type); - result.clearFilter(); // Acting as a flag to not filter in PREWHERE - } + /// Depending on whether the final filter was applied at the previous step or not we need to apply either + /// just the current step filter or the accumulated filter. + FilterWithCachedCount filter_to_apply = + current_filter.size() == result.total_rows_per_granule ? + result.final_filter : + current_filter; + + result.applyFilter(filter_to_apply); } + + LOG_TEST(log, "After execute prewhere {}", result.dumpInfo()); } std::string PrewhereExprInfo::dump() const diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 06f3f5760fb5..039a499e9c18 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -1,6 +1,9 @@ #pragma once #include #include +#include +#include +#include #include namespace DB @@ -34,6 +37,45 @@ struct PrewhereExprInfo std::string dump() const; }; +class FilterWithCachedCount +{ + ConstantFilterDescription const_description; /// TODO: ConstantFilterDescription only checks always true/false for const columns + /// think how to handle when the column in not const but has all 0s or all 1s + ColumnPtr column = nullptr; + const IColumn::Filter * data = nullptr; + mutable size_t cached_count_bytes = -1; + +public: + explicit FilterWithCachedCount() = default; + + explicit FilterWithCachedCount(const ColumnPtr & column_) + : const_description(*column_) + { + ColumnPtr col = column_->convertToFullIfNeeded(); + FilterDescription desc(*col); + column = desc.data_holder ? desc.data_holder : col; + data = desc.data; + } + + bool present() const { return !!column; } + + bool alwaysTrue() const { return const_description.always_true; } + bool alwaysFalse() const { return const_description.always_false; } + + ColumnPtr getColumn() const { return column; } + + const IColumn::Filter & getData() const { return *data; } + + size_t size() const { return column->size(); } + + size_t countBytesInFilter() const + { + if (cached_count_bytes == size_t(-1)) + cached_count_bytes = DB::countBytesInFilter(*data); + return cached_count_bytes; + } +}; + /// MergeTreeReader iterator which allows sequential reading for arbitrary number of rows between pairs of marks in the same part. /// Stores reading state, which can be inside granule. Can skip rows in current granule and start reading from next mark. /// Used generally for reading number of rows less than index granularity to decrease cache misses for fat blocks. @@ -174,53 +216,46 @@ class MergeTreeRangeReader using RangesInfo = std::vector; - const RangesInfo & startedRanges() const { return started_ranges; } - const NumRows & rowsPerGranule() const { return rows_per_granule; } + explicit ReadResult(Poco::Logger * log_) : log(log_) {} static size_t getLastMark(const MergeTreeRangeReader::ReadResult::RangesInfo & ranges); - /// The number of rows were read at LAST iteration in chain. <= num_added_rows + num_filtered_rows. - size_t totalRowsPerGranule() const { return total_rows_per_granule; } - size_t numRowsToSkipInLastGranule() const { return num_rows_to_skip_in_last_granule; } - /// Filter you need to apply to newly-read columns in order to add them to block. - const ColumnUInt8 * getFilterOriginal() const { return filter_original ? filter_original : filter; } - const ColumnUInt8 * getFilter() const { return filter; } - ColumnPtr & getFilterHolder() { return filter_holder; } - void addGranule(size_t num_rows_); void adjustLastGranule(); void addRows(size_t rows) { num_read_rows += rows; } void addRange(const MarkRange & range) { started_ranges.push_back({rows_per_granule.size(), range}); } - /// Set filter or replace old one. Filter must have more zeroes than previous. - void setFilter(const ColumnPtr & new_filter); - /// For each granule calculate the number of filtered rows at the end. Remove them and update filter. - void optimize(bool can_read_incomplete_granules, bool allow_filter_columns); + /// Add current step filter to the result and then for each granule calculate the number of filtered rows at the end. + /// Remove them and update filter. + /// Apply the filter to the columns and update num_rows if required + void optimize(const FilterWithCachedCount & current_filter, bool can_read_incomplete_granules); /// Remove all rows from granules. void clear(); - void clearFilter() { filter = nullptr; } void setFilterConstTrue(); - void setFilterConstFalse(); void addNumBytesRead(size_t count) { num_bytes_read += count; } - void shrink(Columns & old_columns); + /// Shrinks columns according to the diff between current and previous rows_per_granule. + void shrink(Columns & old_columns, const NumRows & rows_per_granule_previous) const; + + /// Applies the filter to the columns and updates num_rows. + void applyFilter(const FilterWithCachedCount & filter); - size_t countBytesInResultFilter(const IColumn::Filter & filter); + /// Verifies that columns and filter sizes match. + /// The checks might be non-trivial so it make sense to have the only in debug builds. + void checkInternalConsistency() const; - /// If this flag is false than filtering form PREWHERE can be delayed and done in WHERE - /// to reduce memory copies and applying heavy filters multiple times - bool need_filter = false; + std::string dumpInfo() const; - Block block_before_prewhere; + /// Contains columns that are not included into result but might be needed for default values calculation. + Block additional_columns; RangesInfo started_ranges; /// The number of rows read from each granule. /// Granule here is not number of rows between two marks /// It's amount of rows per single reading act NumRows rows_per_granule; - NumRows rows_per_granule_original; /// Sum(rows_per_granule) size_t total_rows_per_granule = 0; /// The number of rows was read at first step. May be zero if no read columns present in part. @@ -229,29 +264,36 @@ class MergeTreeRangeReader size_t num_rows_to_skip_in_last_granule = 0; /// Without any filtration. size_t num_bytes_read = 0; - /// nullptr if prev reader hasn't prewhere_actions. Otherwise filter.size() >= total_rows_per_granule. - ColumnPtr filter_holder; - ColumnPtr filter_holder_original; - const ColumnUInt8 * filter = nullptr; - const ColumnUInt8 * filter_original = nullptr; - void collapseZeroTails(const IColumn::Filter & filter, IColumn::Filter & new_filter); + /// This filter has the size of total_rows_per_granule. This means that it can be applied to newly read columns. + /// The result of applying this filter is that only rows that pass all previous filtering steps will remain. + FilterWithCachedCount final_filter; + + /// This flag is true when prewhere column can be returned without filtering. + /// It's true when it contains 0s from all filtering steps (not just the step when it was calculated). + /// NOTE: If we accumulated the final_filter for several steps without applying it then prewhere column calculated at the last step + /// will not contain 0s from all previous steps. + bool can_return_prewhere_column_without_filtering = true; + + /// Checks if result columns have current final_filter applied. + bool filterWasApplied() const { return !final_filter.present() || final_filter.countBytesInFilter() == num_rows; } + + /// Builds updated filter by cutting zeros in granules tails + void collapseZeroTails(const IColumn::Filter & filter, const NumRows & rows_per_granule_previous, IColumn::Filter & new_filter) const; size_t countZeroTails(const IColumn::Filter & filter, NumRows & zero_tails, bool can_read_incomplete_granules) const; static size_t numZerosInTail(const UInt8 * begin, const UInt8 * end); - std::map filter_bytes_map; - - Names extra_columns_filled; + Poco::Logger * log; }; ReadResult read(size_t max_rows, MarkRanges & ranges); - const Block & getSampleBlock() const { return sample_block; } + const Block & getSampleBlock() const { return result_sample_block; } private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); Columns continueReadingChain(const ReadResult & result, size_t & num_rows); - void executePrewhereActionsAndFilterColumns(ReadResult & result); + void executePrewhereActionsAndFilterColumns(ReadResult & result) const; void fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); IMergeTreeReader * merge_tree_reader = nullptr; @@ -261,11 +303,14 @@ class MergeTreeRangeReader Stream stream; - Block sample_block; + Block read_sample_block; /// Block with columns that are actually read from disk + non-const virtual columns that are filled at this step. + Block result_sample_block; /// Block with columns that are returned by this step. bool last_reader_in_chain = false; bool is_initialized = false; Names non_const_virtual_column_names; + + Poco::Logger * log = &Poco::Logger::get("MergeTreeRangeReader"); }; } diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index ca0ab7a18408..eec817acd557 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -80,6 +80,8 @@ class StorageEmbeddedRocksDB final : public IStorage, public IKeyValueEntity, Wi const std::vector & keys, PaddedPODArray * out_null_map) const; + bool supportsDelete() const override { return true; } + private: const String primary_key; using RocksDBPtr = std::unique_ptr; diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f5b6829c7eff..f69f9f8ee7fb 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -94,6 +94,7 @@ const char * auto_contributors[] { "Aliaksandr Shylau", "Alina Terekhova", "Amesaru", + "Amila Welihinda", "Amir Vaza", "Amos Bird", "Amr Alaa", @@ -174,6 +175,7 @@ const char * auto_contributors[] { "Avogar", "Azat Khuzhin", "BSD_Conqueror", + "BSWaterB", "Babacar Diassé", "Bakhtiyor Ruziev", "BanyRule", @@ -186,6 +188,7 @@ const char * auto_contributors[] { "Bharat Nallan", "Bharat Nallan Chakravarthy", "Big Elephant", + "BigRedEye", "Bill", "BiteTheDDDDt", "BlahGeek", @@ -203,6 +206,7 @@ const char * auto_contributors[] { "Brett Hoerner", "Brian Hunter", "Bulat Gaifullin", + "Camden Cheek", "Camilo Sierra", "Carbyn", "Carlos Rodríguez Hernández", @@ -291,6 +295,7 @@ const char * auto_contributors[] { "Eldar Zaitov", "Elena", "Elena Baskakova", + "Elena Torró", "Elghazal Ahmed", "Elizaveta Mironyuk", "Elykov Alexandr", @@ -525,6 +530,7 @@ const char * auto_contributors[] { "Maksim Kita", "Mallik Hassan", "Malte", + "Manuel de la Peña", "Marat IDRISOV", "Marcelo Rodriguez", "Marek Vavrusa", @@ -534,6 +540,7 @@ const char * auto_contributors[] { "Mark Andreev", "Mark Frost", "Mark Papadakis", + "Mark Polokhov", "Maroun Maroun", "Marquitos", "Marsel Arduanov", @@ -709,6 +716,7 @@ const char * auto_contributors[] { "Quanfa Fu", "Quid37", "Radistka-75", + "Raevsky Rudolf", "Rafael Acevedo", "Rafael David Tinoco", "Rajkumar", @@ -779,6 +787,7 @@ const char * auto_contributors[] { "Sergey Mirvoda", "Sergey Ryzhkov", "Sergey Shtykov", + "Sergey Skvortsov", "Sergey Tulentsev", "Sergey V. Galtsev", "Sergey Zaikin", @@ -790,6 +799,7 @@ const char * auto_contributors[] { "Sherry Wang", "Shoh Jahon", "SiderZhang", + "Sidorov Pavel", "Silviu Caragea", "Simeon Emanuilov", "Simon Liu", @@ -878,6 +888,7 @@ const char * auto_contributors[] { "Viktor Taranenko", "Vincent Bernat", "Vitalii S", + "Vitaliy", "Vitaliy Fedorchenko", "Vitaliy Karnienko", "Vitaliy Kozlovskiy", @@ -922,6 +933,7 @@ const char * auto_contributors[] { "Weiqing Xu", "William Shallum", "Winter Zhang", + "Xbitz29", "XenoAmess", "Xianda Ke", "Xiang Zhou", @@ -1013,6 +1025,7 @@ const char * auto_contributors[] { "benbiti", "bgranvea", "bharatnc", + "bit-ranger", "bkuschel", "blazerer", "bluebirddm", @@ -1238,6 +1251,7 @@ const char * auto_contributors[] { "luc1ph3r", "lulichao", "luocongkai", + "lzydmxy", "m-ves", "madianjun", "maiha", @@ -1313,6 +1327,7 @@ const char * auto_contributors[] { "peter279k", "philip.han", "pingyu", + "pkubaj", "potya", "presto53", "proller", @@ -1378,6 +1393,7 @@ const char * auto_contributors[] { "taiyang-li", "tangjiangling", "tao jiang", + "taofengliu", "taojiatao", "tavplubix", "tchepavel", @@ -1394,6 +1410,7 @@ const char * auto_contributors[] { "turbo jason", "tyrionhuang", "ubuntu", + "unbyte", "unegare", "unknown", "urgordeadbeef", @@ -1481,6 +1498,7 @@ const char * auto_contributors[] { "Дмитрий Канатников", "Иванов Евгений", "Илья Исаев", + "Коренберг ☢️ Марк", "Павел Литвиненко", "Смитюх Вячеслав", "Сундуков Алексей", diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 2353be9b69f8..432d2c4ac647 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -46,7 +46,7 @@ static String getEngineFull(const DatabasePtr & database) break; /// Database was dropped - if (!locked_database && name == database->getDatabaseName()) + if (name == database->getDatabaseName()) return {}; guard.reset(); diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index c9e8dac2c008..a718bd53418d 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -122,7 +122,8 @@ def check_for_success_run( build_name: str, build_config: BuildConfig, ) -> None: - logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix) + # the final empty argument is necessary for distinguish build and build_suffix + logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix, "") logging.info("Checking for artifacts in %s", logged_prefix) try: # TODO: theoretically, it would miss performance artifact for pr==0, diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index c82d9da05e9a..fdf695010cd6 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -5,6 +5,7 @@ import requests # type: ignore from get_robot_token import get_parameter_from_ssm +from env_helper import GITHUB_REPOSITORY class InsertException(Exception): @@ -138,7 +139,7 @@ def prepare_tests_results_for_clickhouse( check_name, ): - pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + pull_request_url = "https://github.com/{}/commits/master".format(GITHUB_REPOSITORY) base_ref = "master" head_ref = "master" base_repo = pr_info.repo_full_name diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0618969f94cd..b6e4c49d791b 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -14,7 +14,13 @@ from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import post_commit_status -from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL +from env_helper import ( + GITHUB_WORKSPACE, + RUNNER_TEMP, + GITHUB_RUN_URL, + DOCKER_USER, + DOCKER_REPO, +) from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo from s3_helper import S3Helper @@ -91,7 +97,7 @@ def get_images_dict(repo_path: str, image_file_path: str) -> ImagesDict: def get_changed_docker_images( - pr_info: PRInfo, images_dict: ImagesDict + pr_info: PRInfo, images_dict: ImagesDict, docker_repo: str ) -> Set[DockerImage]: if not images_dict: @@ -111,7 +117,7 @@ def get_changed_docker_images( for dockerfile_dir, image_description in images_dict.items(): for f in files_changed: if f.startswith(dockerfile_dir): - name = image_description["name"] + name = docker_repo + "/" + image_description["name"] only_amd64 = image_description.get("only_amd64", False) logging.info( "Found changed file '%s' which affects " @@ -135,7 +141,7 @@ def get_changed_docker_images( dependent, image, ) - name = images_dict[dependent]["name"] + name = docker_repo + "/" + images_dict[dependent]["name"] only_amd64 = images_dict[dependent].get("only_amd64", False) changed_images.append(DockerImage(dependent, name, only_amd64, image)) index += 1 @@ -248,6 +254,7 @@ def build_and_push_one_image( "docker buildx build --builder default " f"--label build-url={GITHUB_RUN_URL} " f"{from_tag_arg}" + f"--build-arg DOCKER_REPO={DOCKER_REPO} " # A hack to invalidate cache, grep for it in docker/ dir f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " f"--tag {image.repo}:{version_string} " @@ -389,7 +396,6 @@ def parse_args() -> argparse.Namespace: default=argparse.SUPPRESS, help="don't push images to docker hub", ) - return parser.parse_args() @@ -404,10 +410,11 @@ def main(): changed_json = os.path.join(TEMP_PATH, f"changed_images_{args.suffix}.json") else: changed_json = os.path.join(TEMP_PATH, "changed_images.json") - if args.push: subprocess.check_output( # pylint: disable=unexpected-keyword-arg - "docker login --username 'robotclickhouse' --password-stdin", + "docker login {} --username '{}' --password-stdin".format( + DOCKER_REPO, DOCKER_USER + ), input=get_parameter_from_ssm("dockerhub_robot_password"), encoding="utf-8", shell=True, @@ -431,7 +438,7 @@ def main(): # If the event does not contain diff, nothing will be built pass - changed_images = get_changed_docker_images(pr_info, images_dict) + changed_images = get_changed_docker_images(pr_info, images_dict, DOCKER_REPO) if changed_images: logging.info( "Has changed images: %s", ", ".join([im.path for im in changed_images]) @@ -476,7 +483,6 @@ def main(): url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") - print(f'::set-output name=url_output::"{url}"') if not args.reports: return diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index 2ba5a99de0af..af7d4e4bd864 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -11,7 +11,7 @@ from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import post_commit_status -from env_helper import RUNNER_TEMP +from env_helper import RUNNER_TEMP, DOCKER_USER, DOCKER_REPO from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo from s3_helper import S3Helper @@ -60,7 +60,6 @@ def parse_args() -> argparse.Namespace: default=argparse.SUPPRESS, help="don't push images to docker hub", ) - args = parser.parse_args() if len(args.suffixes) < 2: parser.error("more than two --suffix should be given") @@ -173,7 +172,9 @@ def main(): args = parse_args() if args.push: subprocess.check_output( # pylint: disable=unexpected-keyword-arg - "docker login --username 'robotclickhouse' --password-stdin", + "docker login {} --username '{}' --password-stdin".format( + DOCKER_REPO, DOCKER_USER + ), input=get_parameter_from_ssm("dockerhub_robot_password"), encoding="utf-8", shell=True, @@ -208,7 +209,6 @@ def main(): url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") - print(f'::set-output name=url_output::"{url}"') if not args.reports: return diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index e0053f096643..fd28e5a18908 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -340,7 +340,6 @@ def main(): url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") - print(f'::set-output name=url_output::"{url}"') if not args.reports: return diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 8b18a580ed72..98c04955e81a 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -4,7 +4,7 @@ import unittest from unittest.mock import patch, MagicMock -from env_helper import GITHUB_RUN_URL +from env_helper import GITHUB_RUN_URL, DOCKER_REPO from pr_info import PRInfo import docker_images_check as di @@ -30,7 +30,9 @@ def test_get_changed_docker_images(self): images = sorted( list( di.get_changed_docker_images( - pr_info, di.get_images_dict("/", self.docker_images_path) + pr_info, + di.get_images_dict("/", self.docker_images_path), + DOCKER_REPO, ) ) ) @@ -129,6 +131,7 @@ def test_build_and_push_one_image(self, mock_machine, mock_popen, mock_open): self.assertIn( f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version " + f"--build-arg DOCKER_REPO={DOCKER_REPO} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version --cache-from type=registry,ref=name:version " "--cache-from type=registry,ref=name:latest " @@ -148,6 +151,7 @@ def test_build_and_push_one_image(self, mock_machine, mock_popen, mock_open): self.assertIn( f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version2 " + f"--build-arg DOCKER_REPO={DOCKER_REPO} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " @@ -166,6 +170,7 @@ def test_build_and_push_one_image(self, mock_machine, mock_popen, mock_open): mock_machine.assert_not_called() self.assertIn( f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + f"--build-arg DOCKER_REPO={DOCKER_REPO} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " @@ -186,6 +191,7 @@ def test_build_and_push_one_image(self, mock_machine, mock_popen, mock_open): mock_machine.assert_not_called() self.assertIn( f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + f"--build-arg DOCKER_REPO={DOCKER_REPO} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index ab0c3c6f6886..1ef6f2873426 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -31,6 +31,15 @@ "{pr_or_release}/{commit}/{build_name}/{artifact}" ) +DOCKER_REPO = os.getenv("DOCKER_REPO", "docker.io") +DOCKER_USER = os.getenv("DOCKER_USER", "robotclickhouse") +S3_REGION = os.getenv("S3_REGION", "us-east-1") +S3_ENDPOINT = os.getenv("S3_ENDPOINT", "https://s3.amazonaws.com") +VAULT_PATH = os.getenv("VAULT_PATH") +VAULT_TOKEN = os.getenv("VAULT_TOKEN") +VAULT_URL = os.getenv("VAULT_URL") +VAULT_MOUNT_POINT = os.getenv("VAULT_MOUNT_POINT", "secret") + # These parameters are set only on demand, and only once _GITHUB_JOB_ID = "" _GITHUB_JOB_URL = "" diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index 6ecaf468ed15..036fbd198bb4 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -3,6 +3,8 @@ from dataclasses import dataclass import boto3 # type: ignore +import hvac # type: ignore +from env_helper import VAULT_URL, VAULT_TOKEN, VAULT_PATH, VAULT_MOUNT_POINT from github import Github from github.AuthenticatedUser import AuthenticatedUser @@ -15,18 +17,44 @@ class Token: def get_parameter_from_ssm(name, decrypt=True, client=None): - if not client: - client = boto3.client("ssm", region_name="us-east-1") - return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"] + if VAULT_URL: + if not client: + client = hvac.Client(url=VAULT_URL, token=VAULT_TOKEN) + parameter = client.secrets.kv.v2.read_secret_version( + mount_point=VAULT_MOUNT_POINT, path=VAULT_PATH + )["data"]["data"][name] + else: + if not client: + client = boto3.client("ssm", region_name="us-east-1") + parameter = client.get_parameter(Name=name, WithDecryption=decrypt)[ + "Parameter" + ]["Value"] + return parameter def get_best_robot_token(token_prefix_env_name="github_robot_token_"): - client = boto3.client("ssm", region_name="us-east-1") - parameters = client.describe_parameters( - ParameterFilters=[ - {"Key": "Name", "Option": "BeginsWith", "Values": [token_prefix_env_name]} + client = None + if VAULT_URL: + client = hvac.Client(url=VAULT_URL, token=VAULT_TOKEN) + response = client.secrets.kv.read_secret_version( + path=VAULT_PATH, mount_point=VAULT_MOUNT_POINT + ) + parameters = [ + {"Name": p} + for p in response["data"]["data"] + if p.startswith(token_prefix_env_name) ] - )["Parameters"] + else: + client = boto3.client("ssm", region_name="us-east-1") + parameters = client.describe_parameters( + ParameterFilters=[ + { + "Key": "Name", + "Option": "BeginsWith", + "Values": [token_prefix_env_name], + } + ] + )["Parameters"] assert parameters token = None diff --git a/tests/ci/release.py b/tests/ci/release.py index 502efd791733..57d5c4cdd6e1 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -32,8 +32,6 @@ RELEASE_READY_STATUS = "Ready for release" -git = Git() - class Repo: VALID = ("ssh", "https", "origin") @@ -79,7 +77,7 @@ def __init__( self.release_commit = release_commit assert release_type in self.BIG + self.SMALL self.release_type = release_type - self._git = git + self._git = Git() self._version = get_version_from_repo(git=self._git) self._release_branch = "" self._rollback_stack = [] # type: List[str] diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 7119f4437199..994208774c90 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -21,6 +21,7 @@ TRUSTED_ORG_IDS = { 54801242, # clickhouse + 96197510, # ClicbMouse } OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"} diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 03e855a00575..29b63da1ac84 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -16,6 +16,8 @@ CI, S3_URL, S3_DOWNLOAD, + S3_REGION, + S3_ENDPOINT, ) from compress_files import compress_file_fast @@ -40,9 +42,10 @@ def _flatten_list(lst): class S3Helper: - def __init__(self, host=S3_URL, download_host=S3_DOWNLOAD): - self.session = boto3.session.Session(region_name="us-east-1") - self.client = self.session.client("s3", endpoint_url=host) + def __init__(self, host=S3_URL, download_host=S3_DOWNLOAD, endpoint=S3_ENDPOINT): + self.session = boto3.session.Session(region_name=S3_REGION) + self.client = self.session.client("s3", endpoint_url=endpoint) + self.endpoint = endpoint self.host = host self.download_host = download_host @@ -107,8 +110,13 @@ def _upload_file_to_s3(self, bucket_name: str, file_path: str, s3_path: str) -> logging.info("Upload %s to %s. Meta: %s", file_path, s3_path, metadata) # last two replacements are specifics of AWS urls: # https://jamesd3142.wordpress.com/2018/02/28/amazon-s3-and-the-plus-symbol/ - url = f"{self.download_host}/{bucket_name}/{s3_path}" - return url.replace("+", "%2B").replace(" ", "%20") + return ( + "{host}/{bucket}/{path}".format( + host=self.endpoint, bucket=bucket_name, path=s3_path + ) + .replace("+", "%2B") + .replace(" ", "%20") + ) def upload_test_report_to_s3(self, file_path: str, s3_path: str) -> str: if CI: @@ -179,7 +187,9 @@ def upload_task(file_path): t = time.time() except Exception as ex: logging.critical("Failed to upload file, expcetion %s", ex) - return f"{self.download_host}/{bucket_name}/{s3_path}" + return "{host}/{bucket}/{path}".format( + host=self.endpoint, bucket=bucket_name, path=s3_path + ) p = Pool(256) diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index d285e29943d9..3807e3cafaee 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -122,6 +122,19 @@ "BoloniniD", # Seasoned contributor, HSE "tonickkozlov", # Cloudflare "tylerhannan", # ClickHouse Employee + "ch-devops", + "larryluogit", + "bkuschel", + "SadiHassan", + "kashwy", + "HeenaBansal2009", + "umang8223", + "HarryLeeIBM", + "ltrk2", + "MeenaRenganathan22", + "mcmajam", + "bemitc", + "vibhaKulka", ] } diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 551466cf5837..487cf9b98695 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -358,10 +358,15 @@ def _compress_logs(self, dir, relpaths, result_path): subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL "sync", shell=True ) - subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL + retcode = subprocess.call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL "tar czf {} -C {} {}".format(result_path, dir, " ".join(relpaths)), shell=True, ) + # tar return 1 when the files are changed on compressing, we ignore it + if retcode in (0, 1): + return + # but even on the fatal errors it's better to retry + logging.error("Fatal error on compressing %s: %s", result_path, retcode) def _get_runner_opts(self): result = [] diff --git a/tests/queries/0_stateless/00609_prewhere_and_default.sql b/tests/queries/0_stateless/00609_prewhere_and_default.sql index 7da809cd140a..f1aa69c13201 100644 --- a/tests/queries/0_stateless/00609_prewhere_and_default.sql +++ b/tests/queries/0_stateless/00609_prewhere_and_default.sql @@ -3,11 +3,25 @@ create table `table_00609` (key UInt64, val UInt64) engine = MergeTree order by insert into `table_00609` select number, number / 8192 from system.numbers limit 100000; alter table `table_00609` add column def UInt64 default val + 1; select * from `table_00609` prewhere val > 2 format Null; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=100; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=1000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=10000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=20000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=30000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=40000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=80000; drop table if exists `table_00609`; create table `table_00609` (key UInt64, val UInt64) engine = MergeTree order by key settings index_granularity=8192; insert into `table_00609` select number, number / 8192 from system.numbers limit 100000; alter table `table_00609` add column def UInt64; select * from `table_00609` prewhere val > 2 format Null; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=100; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=1000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=10000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=20000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=30000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=40000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=80000; drop table if exists `table_00609`; diff --git a/tests/queries/0_stateless/00718_format_datetime.reference b/tests/queries/0_stateless/00718_format_datetime.reference index bc98dd59d5f1..17937514396c 100644 --- a/tests/queries/0_stateless/00718_format_datetime.reference +++ b/tests/queries/0_stateless/00718_format_datetime.reference @@ -34,3 +34,11 @@ no formatting pattern no formatting pattern -1100 +0300 +0530 +1234560 +000340 +2022-12-08 18:11:29.123400000 +2022-12-08 18:11:29.1 +2022-12-08 18:11:29.0 +2022-12-08 18:11:29.0 +2022-12-08 00:00:00.0 +2022-12-08 00:00:00.0 diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql index deb5fb96c6c1..f6fb2ce15bcc 100644 --- a/tests/queries/0_stateless/00718_format_datetime.sql +++ b/tests/queries/0_stateless/00718_format_datetime.sql @@ -54,3 +54,13 @@ SELECT formatDateTime(toDateTime('2020-01-01 01:00:00', 'UTC'), '%z'); SELECT formatDateTime(toDateTime('2020-01-01 01:00:00', 'US/Samoa'), '%z'); SELECT formatDateTime(toDateTime('2020-01-01 01:00:00', 'Europe/Moscow'), '%z'); SELECT formatDateTime(toDateTime('1970-01-01 00:00:00', 'Asia/Kolkata'), '%z'); + +select formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 7), '%f'); +select formatDateTime(toDateTime64('2022-12-08 18:11:29.00034', 6, 'UTC'), '%f'); + +select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 9, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 1, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 0, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); +select formatDateTime(toDate32('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); +select formatDateTime(toDate('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); diff --git a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference index 10e8f0d2c592..4b3beccf5f1d 100644 --- a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference +++ b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference @@ -1,3 +1,8 @@ +0 +0 2007 2007 2007 +0 +2007 +2007 diff --git a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql index ad50420b6ae3..d3b36cda0d8d 100644 --- a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql +++ b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql @@ -1,4 +1,3 @@ - DROP TABLE IF EXISTS bloom_filter; CREATE TABLE bloom_filter @@ -13,9 +12,19 @@ insert into bloom_filter select number+2000, 'abc,def,zzz' from numbers(8); insert into bloom_filter select number+3000, 'yyy,uuu' from numbers(1024); insert into bloom_filter select number+3000, 'abcdefzzz' from numbers(1024); +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS } +SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitive(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS } + +SELECT max(id) FROM bloom_filter WHERE hasTokenOrNull(s, 'abc,def,zzz'); +SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitiveOrNull(s, 'abc,def,zzz'); + +select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'ABC'); +select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'zZz'); + set max_rows_to_read = 16; SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc'); +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'ABC'); SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'def'); SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'zzz'); diff --git a/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference new file mode 100644 index 000000000000..b4dfe343bbe6 --- /dev/null +++ b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference @@ -0,0 +1,3 @@ +foo +foo +foo diff --git a/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql new file mode 100644 index 000000000000..244f58b6717d --- /dev/null +++ b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql @@ -0,0 +1,17 @@ +drop table if exists t; + +create table t (id UInt32, a Int) engine = MergeTree order by id settings min_bytes_for_wide_part=0; + +insert into t values (1, 0) (2, 1) (3, 0) (4, 0) (5, 0); +alter table t add column s String default 'foo'; +select s from t prewhere a = 1; + +drop table t; + +create table t (id UInt32, a Int) engine = MergeTree order by id settings min_bytes_for_wide_part=0; + +insert into t values (1, 1) (2, 1) (3, 0) (4, 0) (5, 0); +alter table t add column s String default 'foo'; +select s from t prewhere a = 1; + +drop table t; diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.reference b/tests/queries/0_stateless/01674_filter_by_uint8.reference index 6b522898280d..435423ba4552 100644 --- a/tests/queries/0_stateless/01674_filter_by_uint8.reference +++ b/tests/queries/0_stateless/01674_filter_by_uint8.reference @@ -2,7 +2,12 @@ 0 255 1 ['foo','bar'] 1 1 -2 ['foo','bar'] 2 1 -3 ['foo','bar'] 3 1 -4 ['foo','bar'] 4 1 -5 ['foo','bar'] 5 1 +2 ['foo','bar'] 2 2 +3 ['foo','bar'] 3 3 +4 ['foo','bar'] 4 4 +5 ['foo','bar'] 5 5 +1 ['foo','bar'] 1 1 +2 ['foo','bar'] 2 2 +3 ['foo','bar'] 3 3 +4 ['foo','bar'] 4 4 +5 ['foo','bar'] 5 5 diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.sql b/tests/queries/0_stateless/01674_filter_by_uint8.sql index 960153d9c5af..0bf11cea59be 100644 --- a/tests/queries/0_stateless/01674_filter_by_uint8.sql +++ b/tests/queries/0_stateless/01674_filter_by_uint8.sql @@ -10,5 +10,6 @@ ENGINE = MergeTree ORDER BY u; INSERT INTO t_filter SELECT toString(number), ['foo', 'bar'], number, toUInt8(number) FROM numbers(1000); SELECT * FROM t_filter WHERE f LIMIT 5; +SELECT * FROM t_filter WHERE f != 0 LIMIT 5; DROP TABLE IF EXISTS t_filter; diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference index 0edbea640652..2455f50b7f2a 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference @@ -32,9 +32,27 @@ 0 0 198401_1_1_1 1 1 198401_1_1_1 999998 999998 198401_1_1_1 +0 +1 +2 +0 foo +1 foo +2 foo SOME GRANULES FILTERED OUT 335872 166463369216 166463369216 34464 1510321840 1510321840 301408 164953047376 164953047376 +100000 +100001 +100002 +100000 foo +100001 foo +100002 foo PREWHERE 301408 164953047376 164953047376 +42 +10042 +20042 +42 foo +10042 foo +20042 foo diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql index eb1f01e65f7b..1de6447172d7 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql @@ -24,6 +24,8 @@ INSERT INTO t_1 select rowNumberInAllBlocks(), *, '1984-01-01' from t_random_1 l OPTIMIZE TABLE t_1 FINAL; +ALTER TABLE t_1 ADD COLUMN foo String DEFAULT 'foo'; + SELECT COUNT(DISTINCT(_part)) FROM t_1; SELECT min(_part_offset), max(_part_offset) FROM t_1; @@ -37,13 +39,19 @@ SELECT order_0, _part_offset, _part FROM t_1 WHERE order_0 <= 1 OR (order_0 BETW SELECT order_0, _part_offset, computed FROM t_1 ORDER BY order_0, _part_offset, computed LIMIT 3; SELECT order_0, _part_offset, computed FROM t_1 ORDER BY order_0 DESC, _part_offset DESC, computed DESC LIMIT 3; SELECT order_0, _part_offset, _part FROM t_1 WHERE order_0 <= 1 OR order_0 >= 999998 ORDER BY order_0 LIMIT 3; +SELECT _part_offset FROM t_1 ORDER BY order_0 LIMIT 3; +SELECT _part_offset, foo FROM t_1 ORDER BY order_0 LIMIT 3; SELECT 'SOME GRANULES FILTERED OUT'; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 where granule == 0; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 where granule == 0 AND _part_offset < 100000; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 where granule == 0 AND _part_offset >= 100000; +SELECT _part_offset FROM t_1 where granule == 0 AND _part_offset >= 100000 ORDER BY order_0 LIMIT 3; +SELECT _part_offset, foo FROM t_1 where granule == 0 AND _part_offset >= 100000 ORDER BY order_0 LIMIT 3; SELECT 'PREWHERE'; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere granule == 0 where _part_offset >= 100000; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10 } SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10 } +SELECT _part_offset FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; +SELECT _part_offset, foo FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql index 67513a1cdff6..c7f8b67e7406 100644 --- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql +++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql @@ -103,7 +103,7 @@ ALTER TABLE t_proj ADD PROJECTION p_1 (SELECT avg(a), avg(b), count()); INSERT INTO t_proj SELECT number + 1, number + 1 FROM numbers(1000); -DELETE FROM t_proj WHERE a < 100; -- { serverError NOT_IMPLEMENTED } +DELETE FROM t_proj WHERE a < 100; -- { serverError BAD_ARGUMENTS } SELECT avg(a), avg(b), count() FROM t_proj; diff --git a/tests/queries/0_stateless/02366_kql_count.reference b/tests/queries/0_stateless/02366_kql_count.reference new file mode 100644 index 000000000000..9ed4a5d16a54 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_count.reference @@ -0,0 +1,4 @@ +6 +4 +2 +2 diff --git a/tests/queries/0_stateless/02366_kql_count.sql b/tests/queries/0_stateless/02366_kql_count.sql new file mode 100644 index 000000000000..d7706876548a --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_count.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect = 'kusto'; + +Customers | count; +Customers | where Age< 30 | count; +Customers | where Age< 30 | limit 2 | count; +Customers | where Age< 30 | limit 2 | count | project Count; diff --git a/tests/queries/0_stateless/02366_kql_create_table.reference b/tests/queries/0_stateless/02366_kql_create_table.reference new file mode 100644 index 000000000000..35136b5ff425 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_create_table.reference @@ -0,0 +1,4 @@ +-- test create table -- +Theodore +Diaz +Theodore Diaz 28 diff --git a/tests/queries/0_stateless/02366_kql_create_table.sql b/tests/queries/0_stateless/02366_kql_create_table.sql new file mode 100644 index 000000000000..b266679b06aa --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_create_table.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); +Select '-- test create table --' ; +Select * from kql(Customers|project FirstName) limit 1;; +DROP TABLE IF EXISTS kql_table1; +CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName | filter LastName=='Diaz'); +select LastName from kql_table1 limit 1; +DROP TABLE IF EXISTS kql_table2; +CREATE TABLE kql_table2 +( + FirstName Nullable(String), + LastName String, + Age Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO kql_table2 select * from kql(Customers|project FirstName,LastName,Age | filter FirstName=='Theodore'); +select * from kql_table2 limit 1; +-- select * from kql(Customers | where FirstName !in ("test", "test2")); +DROP TABLE IF EXISTS Customers; +DROP TABLE IF EXISTS kql_table1; +DROP TABLE IF EXISTS kql_table2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_datatype.reference b/tests/queries/0_stateless/02366_kql_datatype.reference new file mode 100644 index 000000000000..e396a4159962 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_datatype.reference @@ -0,0 +1,141 @@ +-- bool +true +\N +-- int +123 +\N +-2147483648 +2147483647 +-- long +123 +255 +-1 +\N +-9223372036854775808 +9223372036854775807 +456 +-- real +0.01 +\N +nan +inf +-inf +-- datetime +2015-12-31 23:59:59.900000000 +2015-12-31 00:00:00.000000000 +2014-05-25 08:20:03.123456000 +2014-11-08 15:55:55.000000000 +2014-11-08 15:55:00.000000000 +2014-11-08 00:00:00.000000000 +\N +2014-05-25 08:20:03.123456000 +2014-11-08 15:55:55.123456000 +31536000000000000 +1970-05-11 13:45:07.456345700 +-- time +\N +93783000000000 +93783123000000 +-93783123000000 +93783000000000 +93780000000000 +7380000000000 +7384000000000 +7384567890100 +1216984123450000 +45055123000000 +86400000000000 +-86400000000000 +0 +600 +172800000000000 +259200000000000 +-- timespan (time) +\N +172800000000000 +5400000000000 +1800000000000 +10000000000 +100000000 +100000000 +10000 +100 +3 +1120343 +129600000000000 +false +true +864000000000 +864000000000 +1.00:00:00 +2.04:08:16.1234567 +331.08:12:40 +165.16:06:20 +-1.01:01:01.1234567 +864000000000 +-- guid +\N +-- null +1 +\N \N \N \N \N +-- decimal +\N +123.345 +100000 +-- dynamic +\N +1 +86400000000000 +[1,2,3] +[[1],[2],[3]] +['a','b','c'] +-- cast functions +true +1 +-- tobool("false") +false +1 +-- tobool(1) +true +1 +-- tobool(123) +true +1 +-- tobool("abc") +\N +\N +-- todouble() +123.4 +\N +-- toreal() +123.4 +\N +-- toint() +1 +\N +-- tostring() +123 + +-- todatetime() +1 +1 +1 +-- totimespan() +\N +100 +60000000000 +\N +1120343 +1120343 +59400000000000 +-- tolong() +123 +\N +-- todecimal() +123.345 +\N +\N +100000 +0.00001 +123.561 +653.4 diff --git a/tests/queries/0_stateless/02366_kql_datatype.sql b/tests/queries/0_stateless/02366_kql_datatype.sql new file mode 100644 index 000000000000..05a126c9cc65 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_datatype.sql @@ -0,0 +1,187 @@ + +DROP TABLE IF EXISTS tb1; +create table tb1 ( +str String +)ENGINE = Memory; +INSERT INTO tb1 VALUES ('123.561') , ('653.4'); + +-- datatable(s:string, i:long) [ +-- '0', 0, +-- '1899', 1899, +-- '1900', 1900, +-- '2261', 2261, +-- '2262', 2262, +-- '10000', 10000 +-- ] + +drop table if exists datetime_test; +create table datetime_test(s String, i Int64) engine = Memory; +insert into datetime_test values ('0', 0), ('1899', 1899), ('1900', 1900), ('2261', 2261), ('2262', 2262), ('10000', 10000); + +set dialect = 'kusto'; +print '-- bool'; +print bool(true); +print bool(null); +print bool('false'); -- { clientError BAD_ARGUMENTS } +print '-- int'; +print int(123); +print int(null); +print int(-2147483648); +print int(2147483647); +print int('4'); -- { clientError BAD_ARGUMENTS } +print int(-2147483649); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print int(2147483648); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print '-- long'; +print long(123); +print long(0xff); +print long(-1); +print long(null); +print long(-9223372036854775808); +print long(9223372036854775807); +print 456; +-- print long(-9223372036854775809); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print long(9223372036854775808); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print long('9023'); -- { clientError BAD_ARGUMENTS } +print '-- real'; +print real(0.01); +print real(null); +print real(nan); +print real(+inf); +print real(-inf); +print double('4.2'); -- { clientError BAD_ARGUMENTS } +print '-- datetime'; +print datetime(2015-12-31 23:59:59.9); +print datetime(2015-12-31); +print datetime('2014-05-25T08:20:03.123456'); +print datetime('2014-11-08 15:55:55'); +print datetime('2014-11-08 15:55'); +print datetime('2014-11-08'); +print datetime(null); +print datetime('2014-05-25T08:20:03.123456Z'); +print datetime('2014-11-08 15:55:55.123456Z'); +print datetime('2022') - datetime('2021'); +print datetime('1970-05-11 13:45:07.456345672'); +print '-- time'; +print time(null); +print time(1.2:3:3); +print time(1.2:3:3.123); +print time(-1.2:3:3.123); +print time(001.02:03:03); +print time(001.02:03); +print time(02:03); +print time(02:03:04); +print time(02:03:04.5678901); +print time(24:03:04.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:60:04.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:03:60.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:-03:04.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:03:-04.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:03:04.-5678901); -- { clientError BAD_ARGUMENTS } +print time(1.-02:03:04.5678901); -- { clientError BAD_ARGUMENTS } +print time(1.23); -- { clientError BAD_ARGUMENTS } +print time(02:03:04.56789012); -- { clientError BAD_ARGUMENTS } +print time(03:04.56789012); -- { clientError BAD_ARGUMENTS } +print time('14.02:03:04.12345'); +print time('12:30:55.123'); +print time(1d); +print time(-1d); +print time(6nanoseconds); +print time(6tick); +print time(2); +print time(2) + 1d; +print '-- timespan (time)'; +print timespan(null); +print timespan(2d); -- 2 days +print timespan(1.5h); -- 1.5 hour +print timespan(30m); -- 30 minutes +print timespan(10s); -- 10 seconds +print timespan(0.1s); -- 0.1 second +print timespan(100ms); -- 100 millisecond +print timespan(10microsecond); -- 10 microseconds +print timespan(1tick); -- 100 nanoseconds +print timespan(1.5h) / timespan(30m); +print timespan('12.23:12:23') / timespan(1s); +print (timespan(1.5d) / timespan(0.6d)) * timespan(0.6d); +print tobool(timespan(0s)); +print tobool(timespan(1d)); +print todouble(timespan(1d)); +-- print toint(timespan(1d)); -> 711573504 +print tolong(timespan(1d)); +print tostring(timespan(1d)); +print tostring(timespan(2d) + timespan(4h) + timespan(8m) + timespan(16s) + timespan(123millis) + timespan(456micros) + timespan(789nanos)); +print tostring((1h + 90d) * 2 + (6h + 32s + 30d + 2m) * 5); +print tostring(((1h + 90d) * 2 + (6h + 32s + 30d + 2m) * 5) / 2); +print tostring(-timespan(1d) - timespan(1h) - timespan(1m) - timespan(1s) - timespan(123456789nanos)); +print todecimal(timespan(1d)); +print 49h + (1h + 1m) * 999999h + 1s; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print 1h * 1h; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print 2h + 2; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print 2h - 2; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- guid' +print guid(74be27de-1e4e-49d9-b579-fe0b331d3642); +print guid(null); +print '-- null'; +print isnull(null); +print bool(null), int(null), long(null), real(null), double(null); +print '-- decimal'; +print decimal(null); +print decimal(123.345); +print decimal(1e5); +print '-- dynamic'; -- no support for mixed types and bags for now +print dynamic(null); +print dynamic(1); +print dynamic(timespan(1d)); +print dynamic([1,2,3]); +print dynamic([[1], [2], [3]]); +print dynamic(['a', "b", 'c']); +print '-- cast functions' +print '--tobool("true")'; -- == true +print tobool('true'); -- == true +print tobool('true') == toboolean('true'); -- == true +print '-- tobool("false")'; -- == false +print tobool('false'); -- == false +print tobool('false') == toboolean('false'); -- == false +print '-- tobool(1)'; -- == true +print tobool(1); -- == true +print tobool(1) == toboolean(1); -- == true +print '-- tobool(123)'; -- == true +print tobool(123); -- == true +print tobool(123) == toboolean(123); -- == true +print '-- tobool("abc")'; -- == null +print tobool('abc'); -- == null +print tobool('abc') == toboolean('abc'); -- == null +print '-- todouble()'; +print todouble('123.4'); +print todouble('abc') == null; +print '-- toreal()'; +print toreal("123.4"); +print toreal('abc') == null; +print '-- toint()'; +print toint("123") == int(123); +print toint('abc'); +print '-- tostring()'; +print tostring(123); +print tostring(null); +print '-- todatetime()'; +print todatetime("2015-12-24") == datetime(2015-12-24); +print isnull(todatetime('abc')); +print todatetime('1970-05-11 13:45:07.456345672') == datetime('1970-05-11 13:45:07.456345672'); +print '-- totimespan()'; +print totimespan(null); +print totimespan(1tick); +print totimespan('0.00:01:00'); +print totimespan('abc'); +print totimespan('12.23:12:23') / totimespan(1s); +print totimespan(strcat('12.', '23', ':12:', '23')) / timespan(1s); +print totimespan(timespan(16:30)); +print '-- tolong()'; +print tolong('123'); +print tolong('abc'); +print '-- todecimal()'; +print todecimal(123.345); +print todecimal(null); +print todecimal('abc'); +print todecimal(1e5); +print todecimal(1e-5); +tb1 | project todecimal(str); +-- print todecimal(4 * 2 + 3); -> 11 diff --git a/tests/queries/0_stateless/02366_kql_distinct.reference b/tests/queries/0_stateless/02366_kql_distinct.reference new file mode 100644 index 000000000000..74035603adfc --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_distinct.reference @@ -0,0 +1,30 @@ +-- distinct * -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +\N why Professional Partial College 38 +-- distinct one column -- +Skilled Manual +Management abcd defg +Professional +-- distinct two column -- +Skilled Manual Bachelors +Management abcd defg Bachelors +Skilled Manual Graduate Degree +Professional Graduate Degree +Professional Partial College +-- distinct with where -- +Skilled Manual Bachelors +Skilled Manual Graduate Degree +Professional Graduate Degree +-- distinct with where, order -- +Skilled Manual Bachelors +Skilled Manual Graduate Degree +Professional Graduate Degree +-- distinct with alias -- +8 +3 +6 +5 diff --git a/tests/queries/0_stateless/02366_kql_distinct.sql b/tests/queries/0_stateless/02366_kql_distinct.sql new file mode 100644 index 000000000000..04ef94b0e416 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_distinct.sql @@ -0,0 +1,31 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect = 'kusto'; + +print '-- distinct * --'; +Customers | distinct *; + +print '-- distinct one column --'; +Customers | distinct Occupation; + +print '-- distinct two column --'; +Customers | distinct Occupation, Education; + +print '-- distinct with where --'; +Customers | where Age <30 | distinct Occupation, Education; + +print '-- distinct with where, order --'; +Customers | where Age <30 | order by Age| distinct Occupation, Education; + +print '-- distinct with alias --'; +Customers | project a = (Age % 10) | distinct a; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_extend.reference b/tests/queries/0_stateless/02366_kql_extend.reference new file mode 100644 index 000000000000..ea841b6fb2ec --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_extend.reference @@ -0,0 +1,32 @@ +-- extend #1 -- +Aldi Apple 4 2016-09-10 400 +Costco Apple 2 2016-09-11 200 +-- extend #2 -- +Apple 200 +Apple 400 +-- extend #3 -- +Apple cost 480 on average based on 5 samples. +Snargaluff cost 28080 on average based on 5 samples. +-- extend #4 -- +1 +-- extend #5 -- +Aldi Apple 4 2016-09-10 Apple was purchased from Aldi for $4 on 2016-09-10T00:00:00.0000000Z 400 +Costco Apple 2 2016-09-11 Apple was purchased from Costco for $2 on 2016-09-11T00:00:00.0000000Z 200 +-- extend #6 -- +Aldi Apple 2016-09-10 400 +Costco Apple 2016-09-11 200 +Aldi Apple 2016-09-10 600 +Costco Snargaluff 2016-09-12 10000 +Aldi Apple 2016-09-12 700 +Aldi Snargaluff 2016-09-11 40000 +Costco Snargaluff 2016-09-12 10400 +Aldi Apple 2016-09-12 500 +Aldi Snargaluff 2016-09-11 60000 +Costco Snargaluff 2016-09-10 20000 +-- extend #7 -- +5 +-- extend #8 -- +-- extend #9 -- +-- extend #10 -- +-- extend #11 -- +5 [2,1] diff --git a/tests/queries/0_stateless/02366_kql_extend.sql b/tests/queries/0_stateless/02366_kql_extend.sql new file mode 100644 index 000000000000..9325a7662405 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_extend.sql @@ -0,0 +1,58 @@ +-- datatable(Supplier:string, Fruit:string, Price: real, Purchase:datetime) +-- [ +-- 'Aldi','Apple',4,'2016-09-10', +-- 'Costco','Apple',2,'2016-09-11', +-- 'Aldi','Apple',6,'2016-09-10', +-- 'Costco','Snargaluff',100,'2016-09-12', +-- 'Aldi','Apple',7,'2016-09-12', +-- 'Aldi','Snargaluff',400,'2016-09-11', +-- 'Costco','Snargaluff',104,'2016-09-12', +-- 'Aldi','Apple',5,'2016-09-12', +-- 'Aldi','Snargaluff',600,'2016-09-11', +-- 'Costco','Snargaluff',200,'2016-09-10', +-- ] + +DROP TABLE IF EXISTS Ledger; +CREATE TABLE Ledger +( + Supplier Nullable(String), + Fruit String , + Price Float64, + Purchase Date +) ENGINE = Memory; +INSERT INTO Ledger VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); + +set dialect = 'kusto'; + +print '-- extend #1 --'; +Ledger | extend PriceInCents = 100 * Price | take 2; + +print '-- extend #2 --'; +Ledger | extend PriceInCents = 100 * Price | sort by PriceInCents asc | project Fruit, PriceInCents | take 2; + +print '-- extend #3 --'; +Ledger | extend PriceInCents = 100 * Price | sort by PriceInCents asc | project Fruit, PriceInCents | summarize AveragePrice = avg(PriceInCents), Purchases = count() by Fruit | extend Sentence = strcat(Fruit, ' cost ', tostring(AveragePrice), ' on average based on ', tostring(Purchases), ' samples.') | project Sentence; + +print '-- extend #4 --'; +Ledger | extend a = Price | extend b = a | extend c = a, d = b + 500 | extend Pass = bool(b == a and c == a and d == b + 500) | summarize binary_all_and(Pass); + +print '-- extend #5 --'; +Ledger | take 2 | extend strcat(Fruit, ' was purchased from ', Supplier, ' for $', tostring(Price), ' on ', tostring(Purchase)) | extend PriceInCents = 100 * Price; + +print '-- extend #6 --'; +Ledger | extend Price = 100 * Price; + +print '-- extend #7 --'; +print a = 4 | extend a = 5; + +print '-- extend #8 --'; +-- print x = 5 | extend array_sort_desc(range(0, x), range(1, x + 1)) + +print '-- extend #9 --'; +print x = 19 | extend = 4 + ; -- { clientError SYNTAX_ERROR } + +print '-- extend #10 --'; +Ledger | extend PriceInCents = * Price | sort by PriceInCents asc | project Fruit, PriceInCents | summarize AveragePrice = avg(PriceInCents), Purchases = count() by Fruit | extend Sentence = strcat(Fruit, ' cost ', tostring(AveragePrice), ' on average based on ', tostring(Purchases), ' samples.') | project Sentence; -- { clientError SYNTAX_ERROR } + +print '-- extend #11 --'; +print x = 5 | extend ex = array_sort_desc(dynamic([1, 2]), dynamic([3, 4])); diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference new file mode 100644 index 000000000000..6276cd6d8675 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_binary.reference @@ -0,0 +1,7 @@ + -- binary functions +4 7 +1 +1 +1 +7 3 +1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql new file mode 100644 index 000000000000..824022b564ce --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_binary.sql @@ -0,0 +1,8 @@ +set dialect='kusto'; +print ' -- binary functions'; +print binary_and(4,7), binary_or(4,7); +print binary_shift_left(1, 1) == binary_shift_left(1, 65); +print binary_shift_right(2, 1) == binary_shift_right(2, 65); +print binary_shift_right(binary_shift_left(1, 65), 65) == 1; +print binary_xor(2, 5), bitset_count_ones(42); +print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.reference b/tests/queries/0_stateless/02366_kql_func_datetime.reference new file mode 100644 index 000000000000..d0a36d79e74c --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_datetime.reference @@ -0,0 +1,90 @@ +-- dayofmonth() +31 +-- dayofweek() +345600000000000 +172800000000000 +345600000000000 +-- dayofyear() +365 +-- getmonth() +10 +-- getyear() +2015 +-- hoursofday() +23 +-- startofday() +2017-01-01 00:00:00.000000000 +2016-12-31 00:00:00.000000000 +2017-01-02 00:00:00.000000000 +-- endofday() +2017-01-01 23:59:59.999999900 +2016-12-31 23:59:59.999999900 +2017-01-02 23:59:59.999999900 +-- endofmonth() +2017-01-31 23:59:59.999999900 +2016-12-31 23:59:59.999999900 +2017-02-28 23:59:59.999999900 +2022-09-30 23:59:59.999999900 +-- startofweek() +2017-01-01 00:00:00.000000000 +2016-12-25 00:00:00.000000000 +2017-01-08 00:00:00.000000000 +-- endofweek() +2017-01-07 23:59:59.999999900 +2016-12-31 23:59:59.999999900 +2017-01-14 23:59:59.999999900 +-- startofyear() +2017-01-01 00:00:00.000000000 +2016-01-01 00:00:00.000000000 +2018-01-01 00:00:00.000000000 +-- endofyear() +2017-12-31 23:59:59.999999900 +2016-12-31 23:59:59.999999900 +2018-12-31 23:59:59.999999900 +-- unixtime_seconds_todatetime() +2019-01-01 00:00:00.000000000 +1970-01-02 00:00:00.000000000 +1969-12-31 00:00:00.000000000 +-- unixtime_microseconds_todatetime +2019-01-01 00:00:00.000000000 +-- unixtime_milliseconds_todatetime() +2019-01-01 00:00:00.000000000 +-- unixtime_nanoseconds_todatetime() +2019-01-01 00:00:00.000000000 +-- weekofyear() +52 +-- monthofyear() +12 +-- weekofyear() +52 +-- now() +1 +-- make_datetime() +2017-10-01 12:10:00.000000000 +\N +\N +\N +2017-10-01 12:10:00.000000000 +2017-10-01 12:11:00.123456700 +-- format_datetime +15-12-14 02:03:04.1234500 +17-01-29 [09:00:05] +2017-01-29 [09:00:05] +17-01-29 [09:00:05 AM] +-- format_timespan() +2:3:4.1234500 +29.09:00:05:12 +029.9:00:05 [1234500] +05/5-29:29,029.29_9[12]121234512 09 0 00 +-- make_timespan() +4320000000000 +4350000000000 +131455123000000 +-- ago() +-86400000000000 +-- datetime_diff() +17 2 13 4 29 2 5 10 +-- datetime_part() +2017 4 10 44 30 303 01 02 03 +-- datetime_add() +2018-01-01 00:00:00.000000000 2017-04-01 00:00:00.000000000 2017-02-01 00:00:00.000000000 2017-01-08 00:00:00.000000000 2017-01-02 00:00:00.000000000 2017-01-01 01:00:00.000000000 2017-01-01 00:01:00.000000000 2017-01-01 00:00:01.000000000 diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.sql b/tests/queries/0_stateless/02366_kql_func_datetime.sql new file mode 100644 index 000000000000..554a3c408a94 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_datetime.sql @@ -0,0 +1,98 @@ +set dialect = 'kusto'; + +print '-- dayofmonth()'; +print dayofmonth(datetime(2015-12-31)); +print '-- dayofweek()'; +print dayofweek(datetime(2015-12-31)); +print dayofweek(datetime(2015-12-14 18:54:00)) + 1d; +print dayofweek(datetime(2015-12-18 18:54:00)) - dayofweek(datetime(2015-12-14 18:54:00)); +print '-- dayofyear()'; +print dayofyear(datetime(2015-12-31)); +print '-- getmonth()'; +print getmonth(datetime(2015-10-12)); +print '-- getyear()'; +print getyear(datetime(2015-10-12)); +print '-- hoursofday()'; +print hourofday(datetime(2015-12-31 23:59:59.9)); +print '-- startofday()'; +print startofday(datetime(2017-01-01 10:10:17)); +print startofday(datetime(2017-01-01 10:10:17), -1); +print startofday(datetime(2017-01-01 10:10:17), 1); +print '-- endofday()'; +print endofday(datetime(2017-01-01 10:10:17)); +print endofday(datetime(2017-01-01 10:10:17), -1); +print endofday(datetime(2017-01-01 10:10:17), 1); +print '-- endofmonth()'; +print endofmonth(datetime(2017-01-01 10:10:17)); +print endofmonth(datetime(2017-01-01 10:10:17), -1); +print endofmonth(datetime(2017-01-01 10:10:17), 1); +print endofmonth(datetime(2022-09-23)); +print '-- startofweek()'; +print startofweek(datetime(2017-01-01 10:10:17)); +print startofweek(datetime(2017-01-01 10:10:17), -1); +print startofweek(datetime(2017-01-01 10:10:17), 1); +print '-- endofweek()'; +print endofweek(datetime(2017-01-01 10:10:17)); +print endofweek(datetime(2017-01-01 10:10:17), -1); +print endofweek(datetime(2017-01-01 10:10:17), 1); +print '-- startofyear()'; +print startofyear(datetime(2017-01-01 10:10:17)); +print startofyear(datetime(2017-01-01 10:10:17), -1); +print startofyear(datetime(2017-01-01 10:10:17), 1); +print '-- endofyear()'; +print endofyear(datetime(2017-01-01 10:10:17)); +print endofyear(datetime(2017-01-01 10:10:17), -1); +print endofyear(datetime(2017-01-01 10:10:17), 1); +print '-- unixtime_seconds_todatetime()'; +print unixtime_seconds_todatetime(1546300800); +print unixtime_seconds_todatetime(86400); +print unixtime_seconds_todatetime(-86400); +print '-- unixtime_microseconds_todatetime'; +print unixtime_microseconds_todatetime(1546300800000000); +print '-- unixtime_milliseconds_todatetime()'; +print unixtime_milliseconds_todatetime(1546300800000); +print '-- unixtime_nanoseconds_todatetime()'; +print unixtime_nanoseconds_todatetime(1546300800000000000); +print '-- weekofyear()'; +print week_of_year(datetime(2000-01-01)); +print '-- monthofyear()'; +print monthofyear(datetime(2015-12-31)); +print '-- weekofyear()'; +print week_of_year(datetime(2000-01-01)); +print '-- now()'; +print getyear(now(-2d))>1900; +print '-- make_datetime()'; +print make_datetime(2017,10,01,12,10); +print make_datetime(2300,10,01,12,10); +print make_datetime(2020,14,30,12,10); +print make_datetime(2020,10,35,12,10); +print year_month_day_hour_minute = make_datetime(2017,10,01,12,10); +print year_month_day_hour_minute_second = make_datetime(2017,10,01,12,11,0.1234567); +print '-- format_datetime'; +print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s.fffffff'); +print format_datetime(datetime(2017-01-29 09:00:05),'yy-MM-dd [HH:mm:ss]'); +print format_datetime(datetime(2017-01-29 09:00:05), 'yyyy-M-dd [H:mm:ss]'); +print format_datetime(datetime(2017-01-29 09:00:05), 'yy-MM-dd [hh:mm:ss tt]'); +print '-- format_timespan()'; +print format_timespan(time('14.02:03:04.12345'), 'h:m:s.fffffff'); +print format_timespan(time('29.09:00:05.12345'), 'dd.hh:mm:ss:FF'); +print format_timespan(time('29.09:00:05.12345'), 'ddd.h:mm:ss [fffffff]'); +print format_timespan(time('29.09:00:05.12345'), 'ss/s-d:dd,ddd.dd_h[ff]FFfffffFF HH m mm'); +print '-- make_timespan()'; +print make_timespan(1,12); +print make_timespan(1,12,30); +print make_timespan(1,12,30,55.123); +print '-- ago()'; +print ago(1d) - now(); +print '-- datetime_diff()'; +print year = datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31)), quarter = datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30)), month = datetime_diff('month',datetime(2017-01-01),datetime(2015-12-30)), week = datetime_diff('week',datetime(2017-10-29 00:00),datetime(2017-09-30 23:59)), day = datetime_diff('day',datetime(2017-10-29 00:00),datetime(2017-09-30 23:59)), hour = datetime_diff('hour',datetime(2017-10-31 01:00),datetime(2017-10-30 23:59)), minute = datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59)), second = datetime_diff('second',datetime(2017-10-30 23:00:10.100),datetime(2017-10-30 23:00:00.900)); +-- millisecond = datetime_diff('millisecond',datetime(2017-10-30 23:00:00.200100),datetime(2017-10-30 23:00:00.100900)), +-- microsecond = datetime_diff('microsecond',datetime(2017-10-30 23:00:00.1009001),datetime(2017-10-30 23:00:00.1008009)), +-- nanosecond = datetime_diff('nanosecond',datetime(2017-10-30 23:00:00.0000000),datetime(2017-10-30 23:00:00.0000007)) +print '-- datetime_part()'; +print year = datetime_part("year", datetime(2017-10-30 01:02:03.7654321)),quarter = datetime_part("quarter", datetime(2017-10-30 01:02:03.7654321)),month = datetime_part("month", datetime(2017-10-30 01:02:03.7654321)),weekOfYear = datetime_part("week_of_year", datetime(2017-10-30 01:02:03.7654321)),day = datetime_part("day", datetime(2017-10-30 01:02:03.7654321)),dayOfYear = datetime_part("dayOfYear", datetime(2017-10-30 01:02:03.7654321)),hour = datetime_part("hour", datetime(2017-10-30 01:02:03.7654321)),minute = datetime_part("minute", datetime(2017-10-30 01:02:03.7654321)),second = datetime_part("second", datetime(2017-10-30 01:02:03.7654321)); +-- millisecond = datetime_part("millisecond", dt), +-- microsecond = datetime_part("microsecond", dt), +-- nanosecond = datetime_part("nanosecond", dt) +print '-- datetime_add()'; +print year = datetime_add('year',1,make_datetime(2017,1,1)),quarter = datetime_add('quarter',1,make_datetime(2017,1,1)),month = datetime_add('month',1,make_datetime(2017,1,1)),week = datetime_add('week',1,make_datetime(2017,1,1)),day = datetime_add('day',1,make_datetime(2017,1,1)),hour = datetime_add('hour',1,make_datetime(2017,1,1)),minute = datetime_add('minute',1,make_datetime(2017,1,1)),second = datetime_add('second',1,make_datetime(2017,1,1)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_dynamic.reference b/tests/queries/0_stateless/02366_kql_func_dynamic.reference new file mode 100644 index 000000000000..2bee07871386 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_dynamic.reference @@ -0,0 +1,189 @@ +-- constant index value +1 c ['A',NULL,'C'] +-- array_length() +4 +3 +-- array_sum() +10 +11 +1 +\N +0 +4 +-- array_index_of() +3 +1 +-- array_iif() +[1,5,3] +[1,5,3] +[1,5,NULL] +[NULL,NULL,NULL] +[1,NULL] +['1','2',NULL,'2',NULL] +['1','2',NULL,'2',NULL] +['1','2',NULL,'2',NULL] +[1.1,999.99,3.3,999.99,5.5] +[90,3,90,NULL,90] +[1,4,5,8,9] +-- array_concat() +[1,2,3,4,5,6] +-- array_reverse() +[] +[1] +[4,3,2,1] +['example','an','is','this'] +-- array_rotate_left() +[] +[] +[] +[3,4,5,1,2] +[1,2,3,4,5] +[3,4,5,1,2] +[4,5,1,2,3] +[1,2,3,4,5] +[4,5,1,2,3] +-- array_rotate_right() +[] +[] +[] +[4,5,1,2,3] +[1,2,3,4,5] +[4,5,1,2,3] +[3,4,5,1,2] +[1,2,3,4,5] +[3,4,5,1,2] +-- array_shift_left() +[] +[] +[] +[3,4,5,NULL,NULL] +[NULL,NULL,1,2,3] +[3,4,5,-1,-1] +['c','',''] +-- array_shift_right() +[] +[] +[] +[3,4,5,NULL,NULL] +[NULL,NULL,1,2,3] +[3,4,5,-1,-1] +['c','',''] +-- array_slice() +[2,3] +[3,4] +-- array_split() +[[1],[2,3],[4,5]] +[[1,2],[3,4,5]] +[[1],[2,3],[4,5]] +[[1,2,3,4],[],[4,5]] +-- array_sort_asc() +(['a','c','c','d',NULL]) +([1,2,3,4]) +['a','b','c'] +(['p','q','r'],['hello','clickhouse','world']) +([NULL,'a','c','c','d']) +([NULL,'a','c','c','d']) +([NULL,NULL,NULL]) +[1,2,3,NULL,NULL] +['a','e','b','c','d'] +(['George','John','Paul','Ringo']) +(['blue','green','yellow',NULL,NULL]) +([NULL,NULL,'blue','green','yellow']) +-- array_sort_desc() +(['d','c','c','a',NULL]) +([4,3,2,1]) +['c','b','a'] +(['r','q','p'],['world','clickhouse','hello']) +([NULL,'d','c','c','a']) +([NULL,'d','c','c','a']) +([NULL,NULL,NULL]) +[3,2,1,NULL,NULL] +['d','c','b','e','a'] +(['Ringo','Paul','John','George']) +(['yellow','green','blue',NULL,NULL]) +([NULL,NULL,'yellow','green','blue']) +-- jaccard_index() +0.75 +0 +0 +nan +0 +0.75 +0.25 +-- pack_array() +1 2 4 [1,2,4] +['ab','0.0.0.42','4.2'] +-- repeat() +[] +[1,1,1] +['asd','asd','asd'] +[86400000000000,86400000000000,86400000000000] +[true,true,true] +[NULL] +[NULL] +-- set_difference() +[] +[] +[] +[] +[4,5,6] +[4] +[1,3] +[1,2,3] +['d','s'] +['Chewbacca','Han Solo'] +-- set_has_element() +0 +1 +0 +1 +0 +-- set_intersect() +[] +[1,2,3] +[1,2,3] +[] +[5] +[] +['a'] +['Darth Vader'] +-- set_union() +[] +[1,2,3] +[1,2,3,4,5,6] +[1,2,3,4] +[1,2,3,4,5] +[1,2,3] +['a','d','f','s'] +['Chewbacca','Darth Sidious','Darth Vader','Han Solo'] +-- zip() +[] +[[1,2],[3,4],[5,6]] +[['Darth','Vader','has a suit'],['Master','Yoda','doesn\'t have a suit']] +[[1,10],[2,20],[3,NULL]] +[[NULL,1],[NULL,2],[NULL,3]] +-- array_sort in table() +1 (['CA','Eng','FR','US'],[11,20,12,16],[100,200,300,500]) +2 (['Eng','FR','Gem','Japan'],[10,33,22,31],[210,310,410,510]) +3 (['CA','Eng','Gem','Japan'],[25,11,10,23],[120,0,110,130]) +-- array_sort in table() with condition +1 (['CA','Eng','FR','US'],[11,20,12,16],[100,200,300,500]) +2 (['Eng','FR','Gem','Japan'],[10,33,22,31],[210,310,410,510]) +3 (['CA','Eng','Gem','Japan'],[25,11,10,23],[120,0,110,130]) +-- array_sort as condition +-- array_sort with single alias +1 ['CA','Eng','FR','US'] +2 ['Eng','FR','Gem','Japan'] +3 ['CA','Eng','Gem','Japan'] +1 ['CA','Eng','FR','US'] +2 ['Eng','FR','Gem','Japan'] +3 ['CA','Eng','Gem','Japan'] +-- array_sort with partial alias +1 ['CA','Eng','FR','US'] [11,20,12,16] +2 ['Eng','FR','Gem','Japan'] [10,33,22,31] +3 ['CA','Eng','Gem','Japan'] [25,11,10,23] +-- array_sort with all alias +1 ['CA','Eng','FR','US'] [11,20,12,16] [100,200,300,500] +2 ['Eng','FR','Gem','Japan'] [10,33,22,31] [210,310,410,510] +3 ['CA','Eng','Gem','Japan'] [25,11,10,23] [120,0,110,130] +[[1,2],[1,2],[1,2],[1,2]] diff --git a/tests/queries/0_stateless/02366_kql_func_dynamic.sql b/tests/queries/0_stateless/02366_kql_func_dynamic.sql new file mode 100644 index 000000000000..25c6e16a56ba --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_dynamic.sql @@ -0,0 +1,197 @@ +DROP TABLE IF EXISTS array_test; +CREATE TABLE array_test (floats Array(Float64), + strings Array(String), + nullable_strings Array(Nullable(String)) + ) ENGINE=Memory; +INSERT INTO array_test VALUES([1.0, 2.5], ['a', 'c'], ['A', NULL, 'C']); +DROP TABLE IF EXISTS visit; +CREATE TABLE visit(pageid UInt8, ip_country Array(Nullable(String)), hit Array(Int64),duration Array(Int64)) ENGINE = Memory; +INSERT INTO visit VALUES (1,['CA', 'US','FR','Eng'], [11,16,12,20],[100,500,300,200]); +INSERT INTO visit VALUES (2,['Japan', 'Gem','FR','Eng'], [31,22,33,10],[510,410,310,210]); +INSERT INTO visit VALUES (3,['CA', 'Gem','Japan','Eng'], [25,10,23,11],[120,110,130,000]); +--INSERT INTO visit VALUES (4,['CA', 'Gem',null,'Eng'], [5,10,3,2],[220,320,310,150]); +--INSERT INTO visit VALUES (5,['FR', null,'US','Eng'], [16,12,23,10],[210,250,110,260]); +set dialect = 'kusto'; +print '-- constant index value'; +array_test | project floats[0], strings[1], nullable_strings; +print '-- array_length()'; +print array_length(dynamic(['John', 'Denver', 'Bob', 'Marley'])); +print array_length(dynamic([1, 2, 3])); +print array_length(42); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print array_length('a'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- print array_length(dynamic(42)); -> NULL +-- print array_length(dynamic('a')); -> NULL +print '-- array_sum()'; +print array_sum(dynamic([2, 5, 3])); +print array_sum(dynamic([2.5, 5.5, 3])); +print array_sum(dynamic([true, false, null])); +print array_sum(dynamic(['Alice', 'Bob'])); +print array_sum(dynamic([null, null, null])); +print array_sum(repeat(1, 4)); +print '-- array_index_of()'; +print array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley'); +print array_index_of(dynamic([1, 2, 3]), 2); +print '-- array_iif()'; +print array_iif(dynamic([true,false,true]), dynamic([1,2,3]), dynamic([4,5,6])); +print array_iif(dynamic([1,0,1]), dynamic([1,2,3]), dynamic([4,5,6])); +print array_iif(dynamic([true,false,true]), dynamic([1,2]), dynamic([4,5,6])); +print array_iif(dynamic(['a','b','c']), dynamic([1,2,3]), dynamic([4,5,6])); +print array_iif(dynamic([true,null]), dynamic([1, 2]), repeat(4, 2)); +print t = array_iif(dynamic([true, false, true, false, true]), dynamic(['1', '3']), '2'); +print t = array_iif(dynamic([10, 0, 5, 0, -4]), dynamic(['1', '3']), '2'); +print t = array_iif(dynamic([2.2, 0, 4.4, 0, 66.7]), dynamic(['1', '3']), '2'); +print t = array_iif(dynamic([true, false, true, false, true]), dynamic([1.1, 2.2, 3.3, 4.4, 5.5]), 999.99); +print t = array_iif(dynamic([true, false, true, false, true]), 90, dynamic([1, 3])); +print t = array_iif(dynamic([true, false, true, false, true]), dynamic([1, 3, 5, 7, 9]), dynamic([2, 4, 6, 8, 10])); +print '-- array_concat()'; +print array_concat(dynamic([1,2,3]),dynamic([4,5,6])); +print '-- array_reverse()'; +print array_reverse(dynamic([])); +print array_reverse(dynamic([1])); +print array_reverse(dynamic([1,2,3,4])); +print array_reverse(dynamic(["this", "is", "an", "example"])); +print '-- array_rotate_left()'; +print array_rotate_left(dynamic([]), 0); +print array_rotate_left(dynamic([]), 500); +print array_rotate_left(dynamic([]), -500); +print array_rotate_left(dynamic([1,2,3,4,5]), 2); +print array_rotate_left(dynamic([1,2,3,4,5]), 5); +print array_rotate_left(dynamic([1,2,3,4,5]), 7); +print array_rotate_left(dynamic([1,2,3,4,5]), -2); +print array_rotate_left(dynamic([1,2,3,4,5]), -5); +print array_rotate_left(dynamic([1,2,3,4,5]), -7); +print '-- array_rotate_right()'; +print array_rotate_right(dynamic([]), 0); +print array_rotate_right(dynamic([]), 500); +print array_rotate_right(dynamic([]), -500); +print array_rotate_right(dynamic([1,2,3,4,5]), 2); +print array_rotate_right(dynamic([1,2,3,4,5]), 5); +print array_rotate_right(dynamic([1,2,3,4,5]), 7); +print array_rotate_right(dynamic([1,2,3,4,5]), -2); +print array_rotate_right(dynamic([1,2,3,4,5]), -5); +print array_rotate_right(dynamic([1,2,3,4,5]), -7); +print '-- array_shift_left()'; +print array_shift_left(dynamic([]), 0); +print array_shift_left(dynamic([]), 555); +print array_shift_left(dynamic([]), -555); +print array_shift_left(dynamic([1,2,3,4,5]), 2); +print array_shift_left(dynamic([1,2,3,4,5]), -2); +print array_shift_left(dynamic([1,2,3,4,5]), 2, -1); +print array_shift_left(dynamic(['a', 'b', 'c']), 2); +print '-- array_shift_right()'; +print array_shift_left(dynamic([]), 0); +print array_shift_left(dynamic([]), 555); +print array_shift_left(dynamic([]), -555); +print array_shift_right(dynamic([1,2,3,4,5]), -2); +print array_shift_right(dynamic([1,2,3,4,5]), 2); +print array_shift_right(dynamic([1,2,3,4,5]), -2, -1); +print array_shift_right(dynamic(['a', 'b', 'c']), -2); +print '-- array_slice()'; +print array_slice(dynamic([1,2,3]), 1, 2); +print array_slice(dynamic([1,2,3,4,5]), -3, -2); +print '-- array_split()'; +print array_split(dynamic([1,2,3,4,5]), dynamic([1,-2])); +print array_split(dynamic([1,2,3,4,5]), 2); +print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])); +print array_split(dynamic([1,2,3,4,5]), dynamic([-1,-2])); +print '-- array_sort_asc()'; +print array_sort_asc(dynamic([null, 'd', 'a', 'c', 'c'])); +print array_sort_asc(dynamic([4, 1, 3, 2])); +print array_sort_asc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))[0]; +print array_sort_asc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world'])); +print array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , false); +print array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2); +print array_sort_asc( dynamic([null, null, null]) , false); +print array_sort_asc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50]), 1 < 2)[0]; +print array_sort_asc(dynamic(['1','3','4','5','2']),dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]))[3]; +print array_sort_asc(split("John,Paul,George,Ringo", ",")); +print array_sort_asc(dynamic([null,"blue","yellow","green",null])); +print array_sort_asc(dynamic([null,"blue","yellow","green",null]), false); +print '-- array_sort_desc()'; +print array_sort_desc(dynamic([null, 'd', 'a', 'c', 'c'])); +print array_sort_desc(dynamic([4, 1, 3, 2])); +print array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))[0]; +print array_sort_desc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world'])); +print array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , false); +print array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2); +print array_sort_desc( dynamic([null, null, null]) , false); +print array_sort_desc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50]), 1 < 2)[0]; +print array_sort_desc(dynamic(['1','3','4','5','2']),dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]))[3]; +print array_sort_desc(split("John,Paul,George,Ringo", ",")); +print array_sort_desc(dynamic([null,"blue","yellow","green",null])); +print array_sort_desc(dynamic([null,"blue","yellow","green",null]), false); +print '-- jaccard_index()'; +print jaccard_index(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3, 4, 4, 4])); +print jaccard_index(dynamic([1, 2, 3]), dynamic([])); +print jaccard_index(dynamic([]), dynamic([1, 2, 3, 4])); +print jaccard_index(dynamic([]), dynamic([])); +print jaccard_index(dynamic([1, 2, 3]), dynamic([4, 5, 6, 7])); +print jaccard_index(dynamic(['a', 's', 'd']), dynamic(['f', 'd', 's', 'a'])); +print jaccard_index(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])); +print '-- pack_array()'; +print pack_array(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print x = 1 | extend y = x * 2 | extend z = y * 2 | extend pack_array(x,y,z); +print pack_array(strcat('a', 'b'), format_ipv4(42), tostring(4.2)); +print '-- repeat()'; +print repeat(1, 0); +print repeat(1, 3); +print repeat("asd", 3); +print repeat(timespan(1d), 3); +print repeat(true, 3); +print repeat(1, -3); +print repeat(6.7,-4); +print '-- set_difference()'; +print set_difference(dynamic([]), dynamic([])); +print set_difference(dynamic([]), dynamic([9])); +print set_difference(dynamic([]), dynamic(["asd"])); +print set_difference(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])); +print array_sort_asc(set_difference(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; +print set_difference(dynamic([4]), dynamic([1, 2, 3])); +print array_sort_asc(set_difference(dynamic([1, 2, 3, 4, 5]), dynamic([5]), dynamic([2, 4])))[0]; +print array_sort_asc(set_difference(dynamic([1, 2, 3]), dynamic([])))[0]; +print array_sort_asc(set_difference(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[0]; +print array_sort_asc(set_difference(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[0]; +print '-- set_has_element()'; +print set_has_element(dynamic([]), 9); +print set_has_element(dynamic(["this", "is", "an", "example"]), "example"); +print set_has_element(dynamic(["this", "is", "an", "example"]), "examplee"); +print set_has_element(dynamic([1, 2, 3]), 2); +print set_has_element(dynamic([1, 2, 3, 4.2]), 4); +print '-- set_intersect()'; +print set_intersect(dynamic([]), dynamic([])); +print array_sort_asc(set_intersect(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_intersect(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; +print set_intersect(dynamic([4]), dynamic([1, 2, 3])); +print set_intersect(dynamic([1, 2, 3, 4, 5]), dynamic([1, 3, 5]), dynamic([2, 5])); +print set_intersect(dynamic([1, 2, 3]), dynamic([])); +print set_intersect(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])); +print set_intersect(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])); +print '-- set_union()'; +print set_union(dynamic([]), dynamic([])); +print array_sort_asc(set_union(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_union(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_union(dynamic([4]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_union(dynamic([1, 3, 4]), dynamic([5]), dynamic([2, 4])))[0]; +print array_sort_asc(set_union(dynamic([1, 2, 3]), dynamic([])))[0]; +print array_sort_asc(set_union(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[0]; +print array_sort_asc(set_union(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[0]; +print '-- zip()'; +print zip(dynamic([]), dynamic([])); +print zip(dynamic([1,3,5]), dynamic([2,4,6])); +print zip(dynamic(['Darth','Master']), dynamic(['Vader','Yoda']), dynamic(['has a suit','doesn\'t have a suit'])); +print zip(dynamic([1,2,3]), dynamic([10,20])); +print zip(dynamic([]), dynamic([1,2,3])); +print '-- array_sort in table()'; +visit | project pageid, array_sort_asc(ip_country, hit, duration) | order by pageid asc; +print '-- array_sort in table() with condition'; +visit | project pageid, array_sort_asc(ip_country, hit, duration, pageid > 4) | order by pageid asc; +print '-- array_sort as condition'; +visit | where isnull(array_sort_asc(ip_country, hit, duration)[2][0]); +print '-- array_sort with single alias'; +visit | project pageid, a = array_sort_asc(ip_country, hit, duration) | order by pageid asc; +visit | project pageid, (a) = array_sort_asc(ip_country, hit, duration) | order by pageid asc; +print '-- array_sort with partial alias'; +visit | project pageid, (a,b) = array_sort_asc(ip_country, hit, duration) | order by pageid asc; +print '-- array_sort with all alias'; +visit | project pageid, (a,b,c) = array_sort_asc(ip_country, hit, duration) | order by pageid asc; +print zip(repeat(1,4), repeat(2,4)); diff --git a/tests/queries/0_stateless/02366_kql_func_general.reference b/tests/queries/0_stateless/02366_kql_func_general.reference new file mode 100644 index 000000000000..2ed45e3eb1fd --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_general.reference @@ -0,0 +1,33 @@ +-- case +Theodore Diaz Skilled Manual Bachelors 28 C +Stephanie Cox Management Bachelors 33 D +Peter Nara Skilled Manual Graduate Degree 26 C +Latoya Shen Professional Graduate Degree 25 C +Joshua Lee Professional Partial College 26 C +Edward Hernandez Skilled Manual High School 36 D +Dalton Wood Professional Partial College 42 D +Christine Nara Skilled Manual Partial College 33 D +Cameron Rodriguez Professional Partial College 28 C +Angel Stewart Professional Partial College 46 D +-- iff +Theodore Diaz Skilled Manual Bachelors 28 bigger +Stephanie Cox Management Bachelors 33 bigger +Peter Nara Skilled Manual Graduate Degree 26 bigger +Latoya Shen Professional Graduate Degree 25 bigger +Joshua Lee Professional Partial College 26 bigger +Edward Hernandez Skilled Manual High School 36 bigger +Dalton Wood Professional Partial College 42 bigger +Christine Nara Skilled Manual Partial College 33 bigger +Cameron Rodriguez Professional Partial College 28 bigger +Angel Stewart Professional Partial College 46 bigger +-- iif +Theodore Diaz Skilled Manual Bachelors 28 bigger +Stephanie Cox Management Bachelors 33 bigger +Peter Nara Skilled Manual Graduate Degree 26 bigger +Latoya Shen Professional Graduate Degree 25 bigger +Joshua Lee Professional Partial College 26 bigger +Edward Hernandez Skilled Manual High School 36 bigger +Dalton Wood Professional Partial College 42 bigger +Christine Nara Skilled Manual Partial College 33 bigger +Cameron Rodriguez Professional Partial College 28 bigger +Angel Stewart Professional Partial College 46 bigger diff --git a/tests/queries/0_stateless/02366_kql_func_general.sql b/tests/queries/0_stateless/02366_kql_func_general.sql new file mode 100644 index 000000000000..3296150eea4d --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_general.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); + +set dialect='kusto'; +print '-- case'; +Customers | extend t = case(Age <= 10, "A", Age <= 20, "B", Age <= 30, "C", "D"); +print '-- iff'; +Customers | extend t = iff(Age <= 10, "smaller", "bigger"); +print '-- iif'; +Customers | extend t = iif(Age <= 10, "smaller", "bigger"); diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference new file mode 100644 index 000000000000..fdba4622c9a9 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_ip.reference @@ -0,0 +1,122 @@ +-- ipv4_is_private(\'127.0.0.1\') +0 +-- ipv4_is_private(\'10.1.2.3\') +1 +-- ipv4_is_private(\'192.168.1.1/24\') +1 +ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\')) +1 +-- ipv4_is_private(\'abc\') +\N +-- ipv4_netmask_suffix(\'192.168.1.1/24\') +24 +-- ipv4_netmask_suffix(\'192.168.1.1\') +32 +-- ipv4_netmask_suffix(\'127.0.0.1/16\') +16 +-- ipv4_netmask_suffix(\'abc\') +\N +ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\')) +16 +-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\') +1 +-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\') +1 +-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\') +0 +-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\') +0 +-- ipv4_is_in_range(\'abc\', \'127.0.0.1\') +\N +-- parse_ipv6(127.0.0.1) +0000:0000:0000:0000:0000:ffff:7f00:0001 +-- parse_ipv6(fe80::85d:e82c:9446:7994) +fe80:0000:0000:0000:085d:e82c:9446:7994 +-- parse_ipv4(\'127.0.0.1\') +2130706433 +-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\') +1 +-- parse_ipv4(arrayStringConcat([\'127\', \'0\', \'0\', \'1\'], \'.\')) +-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432 +1 +-- parse_ipv4_mask(\'abc\', 31) +\N +\N +-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31) +1 +-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\') +1 +-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\') +0 +-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\') +1 +-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24) +1 +-- ipv4_is_match(\'abc\', \'def\', 24) +\N +-- ipv4_compare() +0 +-1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +-- format_ipv4() +192.168.1.0 +192.168.1.1 +192.168.1.0 +192.168.1.0 +1 +1 +127.0.0.0 +-- format_ipv4_mask() +192.168.1.0/24 +192.168.1.0/24 +192.168.1.0/24 +192.168.1.1/32 +192.168.1.0/24 +1 +1 +127.0.0.0/24 +-- parse_ipv6_mask() +0000:0000:0000:0000:0000:0000:0000:0000 +fe80:0000:0000:0000:085d:e82c:9446:7900 +0000:0000:0000:0000:0000:ffff:c0a8:ff00 +0000:0000:0000:0000:0000:ffff:c0a8:ff00 +0000:0000:0000:0000:0000:ffff:ffff:ffff +fe80:0000:0000:0000:085d:e82c:9446:7994 +fe80:0000:0000:0000:085d:e82c:9446:7900 +0000:0000:0000:0000:0000:ffff:c0a8:ffff +0000:0000:0000:0000:0000:ffff:c0a8:ff00 +-- ipv6_is_match() +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql new file mode 100644 index 000000000000..8123bd6a3d11 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_ip.sql @@ -0,0 +1,129 @@ +set dialect='kusto'; +print '-- ipv4_is_private(\'127.0.0.1\')'; +print ipv4_is_private('127.0.0.1'); +print '-- ipv4_is_private(\'10.1.2.3\')'; +print ipv4_is_private('10.1.2.3'); +print '-- ipv4_is_private(\'192.168.1.1/24\')'; +print ipv4_is_private('192.168.1.1/24'); +print 'ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\'))'; +print ipv4_is_private(strcat('192.','168.','1.','1','/24')); +print '-- ipv4_is_private(\'abc\')'; +print ipv4_is_private('abc'); -- == null + +print '-- ipv4_netmask_suffix(\'192.168.1.1/24\')'; +print ipv4_netmask_suffix('192.168.1.1/24'); -- == 24 +print '-- ipv4_netmask_suffix(\'192.168.1.1\')'; +print ipv4_netmask_suffix('192.168.1.1'); -- == 32 +print '-- ipv4_netmask_suffix(\'127.0.0.1/16\')'; +print ipv4_netmask_suffix('127.0.0.1/16'); -- == 16 +print '-- ipv4_netmask_suffix(\'abc\')'; +print ipv4_netmask_suffix('abc'); -- == null +print 'ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\'))'; +print ipv4_netmask_suffix(strcat('127.', '0.', '0.1/16')); -- == 16 + +print '-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\')'; +print ipv4_is_in_range('127.0.0.1', '127.0.0.1'); -- == true +print '-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\')'; +print ipv4_is_in_range('192.168.1.6', '192.168.1.1/24'); -- == true +print '-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\')'; +print ipv4_is_in_range('192.168.1.1', '192.168.2.1/24'); -- == false +print '-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\')'; +print ipv4_is_in_range(strcat('192.','168.', '1.1'), '192.168.2.1/24'); -- == false +print '-- ipv4_is_in_range(\'abc\', \'127.0.0.1\')'; -- == null +print ipv4_is_in_range('abc', '127.0.0.1'); + +print '-- parse_ipv6(127.0.0.1)'; +print parse_ipv6('127.0.0.1'); +print '-- parse_ipv6(fe80::85d:e82c:9446:7994)'; +print parse_ipv6('fe80::85d:e82c:9446:7994'); +print '-- parse_ipv4(\'127.0.0.1\')'; +print parse_ipv4('127.0.0.1'); +print '-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\')'; +print parse_ipv4('192.1.168.1') < parse_ipv4('192.1.168.2'); +print '-- parse_ipv4(arrayStringConcat([\'127\', \'0\', \'0\', \'1\'], \'.\'))'; +print parse_ipv4(arrayStringConcat(['127', '0', '0', '1'], '.')); -- { clientError UNKNOWN_FUNCTION } + +print '-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432'; +print parse_ipv4_mask('127.0.0.1', 24) == 2130706432; +print '-- parse_ipv4_mask(\'abc\', 31)'; +print parse_ipv4_mask('abc', 31) +print '-- parse_ipv4_mask(\'192.1.168.2\', 1000)'; +print parse_ipv4_mask('192.1.168.2', 1000); +print '-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31)'; +print parse_ipv4_mask('192.1.168.2', 31) == parse_ipv4_mask('192.1.168.3', 31); +print '-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\')'; +print ipv4_is_match('127.0.0.1', '127.0.0.1'); +print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\')'; +print ipv4_is_match('192.168.1.1', '192.168.1.255'); +print '-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\')'; +print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24'); +print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24)'; +print ipv4_is_match('192.168.1.1', '192.168.1.255', 24); +print '-- ipv4_is_match(\'abc\', \'def\', 24)'; +print ipv4_is_match('abc', 'dev', 24); +print '-- ipv4_compare()'; +print ipv4_compare('127.0.0.1', '127.0.0.1'); +print ipv4_compare('192.168.1.1', '192.168.1.255'); +print ipv4_compare('192.168.1.255', '192.168.1.1'); +print ipv4_compare('192.168.1.1/24', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1', '192.168.1.255', 24); +print ipv4_compare('192.168.1.1/24', '192.168.1.255'); +print ipv4_compare('192.168.1.1', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1/30', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1', '192.168.1.0', 31); +print ipv4_compare('192.168.1.1/24', '192.168.1.255', 31); +print ipv4_compare('192.168.1.1', '192.168.1.255', 24); +print '-- format_ipv4()'; +print format_ipv4('192.168.1.255', 24); +print format_ipv4('192.168.1.1', 32); +print format_ipv4('192.168.1.1/24', 32); +print format_ipv4(3232236031, 24); +print format_ipv4('192.168.1.1/24', -1) == ''; +print format_ipv4('abc', 24) == ''; +print format_ipv4(strcat('127.0', '.0.', '1', '/32'), 12 + 12); +print '-- format_ipv4_mask()'; +print format_ipv4_mask('192.168.1.255', 24); +print format_ipv4_mask(3232236031, 24); +print format_ipv4_mask('192.168.1.1', 24); +print format_ipv4_mask('192.168.1.1', 32); +print format_ipv4_mask('192.168.1.1/24', 32); +print format_ipv4_mask('192.168.1.1/24', -1) == ''; +print format_ipv4_mask('abc', 24) == ''; +print format_ipv4_mask(strcat('127.0', '.0.', '1', '/32'), 12 + 12); +print '-- parse_ipv6_mask()'; +print parse_ipv6_mask("127.0.0.1", 24); +print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 120); +print parse_ipv6_mask("192.168.255.255", 120); +print parse_ipv6_mask("192.168.255.255/24", 124); +print parse_ipv6_mask("255.255.255.255", 128); +print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 128); +print parse_ipv6_mask("fe80::85d:e82c:9446:7994/120", 124); +print parse_ipv6_mask("::192.168.255.255", 128); +print parse_ipv6_mask("::192.168.255.255/24", 128); +print '-- ipv6_is_match()'; +print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false; +print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true; +print ipv6_is_match('192.168.1.1', '192.168.1.1'); -- // Equal IPs +print ipv6_is_match('192.168.1.1/24', '192.168.1.255'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7994'); -- // Equal IPs +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '::ffff:c0a8:0101'); -- // Equal IPs +print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.0', 31); -- // 31 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1/24', '192.168.1.255', 31); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.255', 24); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127); -- // 127 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7998', 120); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998', 127); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff', 127); -- // 127 bit IP6-prefix is used for comparison +print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255', 120); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24', 127); -- // 120 bit IP6-prefix is used for comparison \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_math.reference b/tests/queries/0_stateless/02366_kql_func_math.reference new file mode 100644 index 000000000000..92f283abcb6e --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_math.reference @@ -0,0 +1,4 @@ +-- isnan -- +1 +0 +0 diff --git a/tests/queries/0_stateless/02366_kql_func_math.sql b/tests/queries/0_stateless/02366_kql_func_math.sql new file mode 100644 index 000000000000..4e83622eb6b8 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_math.sql @@ -0,0 +1,7 @@ +set dialect = 'kusto'; +print '-- isnan --'; +print isnan(double(nan)); +print isnan(4.2); +print isnan(4); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print isnan(real(+inf)); +print isnan(dynamic(null)); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } diff --git a/tests/queries/0_stateless/02366_kql_func_scalar.reference b/tests/queries/0_stateless/02366_kql_func_scalar.reference new file mode 100644 index 000000000000..b63db6efb5e6 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_scalar.reference @@ -0,0 +1,27 @@ +-- bin() +4 +3 +1970-05-11 00:00:00.000000000 +1209600000000000 +1970-05-11 13:45:07.345000000 +2022-09-26 10:13:23.982000000 +1970-05-11 13:45:07.345623000 +2022-09-26 10:13:23.987232000 +1970-05-11 13:45:07.456336000 +1970-05-11 13:45:07.456345700 +2022-09-26 10:13:23.987234100 +2022-09-26 10:13:23.987234100 +\N +26 1 +28 2 +25 1 +38 1 +33 1 +-- bin_at() +4.5 +-43200000000000 +2017-05-14 12:00:00.000000000 +2017-05-14 00:00:00.000000000 +2018-02-26 15:14:00.000000000 5 +2018-02-24 15:14:00.000000000 3 +2018-02-23 15:14:00.000000000 4 diff --git a/tests/queries/0_stateless/02366_kql_func_scalar.sql b/tests/queries/0_stateless/02366_kql_func_scalar.sql new file mode 100644 index 000000000000..aba7d624f98c --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_scalar.sql @@ -0,0 +1,48 @@ +-- datatable (Date:datetime, Num:int) [ +-- '2018-02-24T15:14:00', 3, +-- '2018-02-23T16:14:00', 4, +-- '2018-02-26T15:14:00', 5 +-- ] | summarize sum(Num) by d = todatetime(bin_at(Date, 1d, datetime('2018-02-24 15:14:00'))) | order by d; + +DROP TABLE IF EXISTS Bin_at_test; +CREATE TABLE Bin_at_test +( + `Date` DateTime64(9, 'UTC'), + Num Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO Bin_at_test VALUES ('2018-02-24T15:14:00', 3), ('2018-02-23T16:14:00', 4), ('2018-02-26T15:14:00', 5); + +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect = 'kusto'; +print '-- bin()'; +print bin(4.5, 1); +print floor(4.5, 3); +print bin(datetime(1970-05-11 13:45:07), 1d); +print bin(16d, 7d); +print bin(datetime(1970-05-11 13:45:07.345623), 1ms); +print bin(datetime(2022-09-26 10:13:23.987234), 6ms); +print bin(datetime(1970-05-11 13:45:07.345623), 1microsecond); +print bin(datetime(2022-09-26 10:13:23.987234), 6microseconds); +print bin(datetime(1970-05-11 13:45:07.456345672), 16microseconds); +print bin(datetime('1970-05-11 13:45:07.456345672'), 1tick); +print bin(datetime(2022-09-26 10:13:23.987234128), 100nanosecond); +print bin(datetime(2022-09-26 10:13:23.987234128), 1tick); +print bin(datetime(2022-09-26 10:13:23.987234128), 99nanosecond); +Customers | summarize count() by bin(Age, Age); + +print '-- bin_at()'; +print bin_at(6.5, 2.5, 7); +print bin_at(1h, 1d, 12h); +print bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0)); +print bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0)); +Bin_at_test | summarize sum(Num) by d = todatetime(bin_at(Date, 1d, datetime('2018-02-24 15:14:00'))) | order by d; diff --git a/tests/queries/0_stateless/02366_kql_func_string.reference b/tests/queries/0_stateless/02366_kql_func_string.reference new file mode 100644 index 000000000000..2415a39f2646 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_string.reference @@ -0,0 +1,420 @@ +-- test String Functions -- +-- Customers |where Education contains \'degree\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers |where Education !contains \'degree\' +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers |where Education contains \'Degree\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers |where Education !contains \'Degree\' +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where FirstName endswith \'RE\' +Theodore Diaz Skilled Manual Bachelors 28 + +-- Customers | where ! FirstName endswith \'RE\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +--Customers | where FirstName endswith_cs \'re\' +Theodore Diaz Skilled Manual Bachelors 28 + +-- Customers | where FirstName !endswith_cs \'re\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation == \'Skilled Manual\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation != \'Skilled Manual\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation has \'skilled\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation !has \'skilled\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation has \'Skilled\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation !has \'Skilled\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation hasprefix_cs \'Ab\' + +-- Customers | where Occupation !hasprefix_cs \'Ab\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hasprefix_cs \'ab\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation !hasprefix_cs \'ab\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hassuffix \'Ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation !hassuffix \'Ent\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hassuffix \'ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation hassuffix \'ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers |where Education in (\'Bachelors\',\'High School\') +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Education !in (\'Bachelors\',\'High School\') +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName matches regex \'P.*r\' +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName startswith \'pet\' +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName !startswith \'pet\' +Latoya Shen Professional Graduate Degree 25 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where FirstName startswith_cs \'pet\' + +-- Customers | where FirstName !startswith_cs \'pet\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where isempty(LastName) +Apple Skilled Manual Bachelors 28 + +-- Customers | where isnotempty(LastName) +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +\N why Professional Partial College 38 + +-- Customers | where isnotnull(FirstName) +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where isnull(FirstName) +\N why Professional Partial College 38 + +-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1 +https://www.test.com/hello word + +-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1 +https%3A%2F%2Fwww.test.com%2Fhello%20word + +-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2)) + y +Lat en +Pet ra +The az +Ste x +App + +-- Customers | project name = strcat(FirstName, \' \', LastName) + why +Latoya Shen +Peter Nara +Theodore Diaz +Stephanie Cox +Apple + +-- Customers | project FirstName, strlen(FirstName) +\N \N +Latoya 6 +Peter 5 +Theodore 8 +Stephanie 9 +Apple 5 + +-- Customers | project strrep(FirstName,2,\'_\') +_ +Latoya_Latoya +Peter_Peter +Theodore_Theodore +Stephanie_Stephanie +Apple_Apple + +--print from_str = strrep("ABC", 2) +ABCABC +--print from_int = strrep(123,3,".") +123.123.123 +--print from_time = strrep(3s,2," ") +00:00:03 00:00:03 + +-- Customers | project toupper(FirstName) +\N +LATOYA +PETER +THEODORE +STEPHANIE +APPLE + +-- Customers | project tolower(FirstName) +\N +latoya +peter +theodore +stephanie +apple + + +-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Apple Skilled Manual Bachelors 28 + +-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) +3 +3 +1 + +-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) +PINEAPPLE ice cream is 20 +PINEAPPLE +20 + +20 +\N +\N +\N +\N +\N +45.6 +45.6 +alert + +-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet +[['T','h','e'],['p','ric','e'],['P','INEAPPL','E'],['i','c','e'],['c','rea','m']] + +-- extract_json (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction) + + +John +iPhone +\N +26 +26 +26 +26 +\N + +-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) +['aa','bb'] +['bbb'] +[''] +['a','','b'] +['aa','cc'] +['aabbcc'] +['aaa','bbb','ccc'] +[NULL] + +-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now. +1-2-Ab + +-- base64_encode_fromguid() +8jMxriJurkmwahbmqbIS6w== +-- base64_decode_toarray() +[] +[75,117,115,116,111] +-- base64_decode_toguid() +10e99626-bc2b-4c75-bb3e-fe606de25700 +1 +-- base64_encode_tostring + +S3VzdG8x +-- base64_decode_tostring + +Kusto1 +-- parse_url() +{"Scheme":"scheme","Host":"","Port":"0","Path":"/this/is/a/path","Username":"username","Password":"password","Query Parameters":{"k1":"v1","k2":"v2"},"Fragment":"fragment"} +-- parse_url() +{"Scheme":"","Host":"","Port":"","Path":"","Username":"","Password":"","Query Parameters":{},"Fragment":""} +-- parse_urlquery() +{"Query Parameters":{"k1":"v1","k2":"v2","k3":"v3"}} +-- strcat -- +a1235.00:00:00 +a111.01:00:00 +-- strcmp() +0 1 -1 1 +-- substring() +CD +-- translate() +kusto xxx +-- trim() +https://www.ibm.com +Te st1 + asd +asd +sd +-- trim_start() +www.ibm.com +Te st1// $ +asdw + +asd +-- trim_end() +https +- Te st1 +wasd + +asd +-- trim, trim_start, trim_end all at once +--https://bing.com-- -- https://bing.com-- --https://bing.com https://bing.com +-- replace_regex +Number was: 1 +-- has_any_index() +0 1 -1 -1 +-- parse_version() +1000000020000000300000040 +1000000020000000000000000 +1000000020000000000000000 +\N +\N +\N +\N +\N +\N +1000000020000000300000004 +1000000020000000000000000 +1000000020000000300000000 +1000000000000000000000000 +-- parse_json() +[1,2,3] +[{"a":123.5,"b":"{\\"c\\":456}"}] +-- parse_command_line() +[NULL] +[NULL] +-- reverse() +321 +43.321 + +dsa +][ +]3,2,1[ +]\'redaV\',\'htraD\'[ +Z0000000.00:00:21T51-01-7102 +00:00:30 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +-- parse_csv() +[''] +['aaa'] +['aa','b','cc'] +['record1','a','b','c'] +-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction) +9 +2 +2 +2 +-1 +2 +4 +2 +9 +-1 +-1 +-1 +9 +2 +-- has -- +1 +0 +1 +0 +0 +1 +1 +0 +1 +1 +0 + +asdf +asdf.ghkj +asdf.qwer + +asdf.qwer + +asdf.qwer +qwer +-- !has -- +asdfghkj +qwer +qwerqwer + +asdf +asdf.ghkj +asdfghkj +qwer +qwerqwer +-- has_all -- +asdf.qwer +-- has_any -- +asdf +asdf.ghkj +asdf.qwer +qwer diff --git a/tests/queries/0_stateless/02366_kql_func_string.sql b/tests/queries/0_stateless/02366_kql_func_string.sql new file mode 100644 index 000000000000..f16e1d37d175 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_string.sql @@ -0,0 +1,392 @@ +-- datatable(FirstName:string, LastName:string, Occupation:string, Education:string, Age:int) [ +-- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, +-- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, +-- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, +-- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, +-- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, +-- '', 'why', 'Professional', 'Partial College', 38 +-- ] + +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +-- datatable (Version:string) [ +-- '1.2.3.4', +-- '1.2', +-- '1.2.3', +-- '1' +-- ] + +DROP TABLE IF EXISTS Versions; +CREATE TABLE Versions +( + Version String +) ENGINE = Memory; +INSERT INTO Versions VALUES ('1.2.3.4'),('1.2'),('1.2.3'),('1'); + +-- datatable (Text:string) [ +-- 'asdf', +-- 'asdf.ghkj', +-- 'asdf.qwer', +-- 'asdfghkj', +-- 'qwer', +-- 'qwerqwer' +-- ] + +drop table if exists StringTest; +create table StringTest +( + Text String +) engine = Memory; + +insert into StringTest values ('asdf'), ('asdf.ghkj'), ('asdf.qwer'), ('asdfghkj'), ('qwer'), ('qwerqwer'); + + +set dialect='kusto'; +print '-- test String Functions --'; + +print '-- Customers |where Education contains \'degree\''; +Customers |where Education contains 'degree' | order by LastName; +print ''; +print '-- Customers |where Education !contains \'degree\''; +Customers |where Education !contains 'degree' | order by LastName; +print ''; +print '-- Customers |where Education contains \'Degree\''; +Customers |where Education contains 'Degree' | order by LastName; +print ''; +print '-- Customers |where Education !contains \'Degree\''; +Customers |where Education !contains 'Degree' | order by LastName; +print ''; +print '-- Customers | where FirstName endswith \'RE\''; +Customers | where FirstName endswith 'RE' | order by LastName; +print ''; +print '-- Customers | where ! FirstName endswith \'RE\''; +Customers | where FirstName ! endswith 'RE' | order by LastName; +print ''; +print '--Customers | where FirstName endswith_cs \'re\''; +Customers | where FirstName endswith_cs 're' | order by LastName; +print ''; +print '-- Customers | where FirstName !endswith_cs \'re\''; +Customers | where FirstName !endswith_cs 're' | order by LastName; +print ''; +print '-- Customers | where Occupation == \'Skilled Manual\''; +Customers | where Occupation == 'Skilled Manual' | order by LastName; +print ''; +print '-- Customers | where Occupation != \'Skilled Manual\''; +Customers | where Occupation != 'Skilled Manual' | order by LastName; +print ''; +print '-- Customers | where Occupation has \'skilled\''; +Customers | where Occupation has 'skilled' | order by LastName; +print ''; +print '-- Customers | where Occupation !has \'skilled\''; +Customers | where Occupation !has 'skilled' | order by LastName; +print ''; +print '-- Customers | where Occupation has \'Skilled\''; +Customers | where Occupation has 'Skilled'| order by LastName; +print ''; +print '-- Customers | where Occupation !has \'Skilled\''; +Customers | where Occupation !has 'Skilled'| order by LastName; +print ''; +print '-- Customers | where Occupation hasprefix_cs \'Ab\''; +Customers | where Occupation hasprefix_cs 'Ab'| order by LastName; +print ''; +print '-- Customers | where Occupation !hasprefix_cs \'Ab\''; +Customers | where Occupation !hasprefix_cs 'Ab'| order by LastName; +print ''; +print '-- Customers | where Occupation hasprefix_cs \'ab\''; +Customers | where Occupation hasprefix_cs 'ab'| order by LastName; +print ''; +print '-- Customers | where Occupation !hasprefix_cs \'ab\''; +Customers | where Occupation !hasprefix_cs 'ab'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'Ent\''; +Customers | where Occupation hassuffix 'Ent'| order by LastName; +print ''; +print '-- Customers | where Occupation !hassuffix \'Ent\''; +Customers | where Occupation !hassuffix 'Ent'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'ent\''; +Customers | where Occupation hassuffix 'ent'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'ent\''; +Customers | where Occupation hassuffix 'ent'| order by LastName; +print ''; +print '-- Customers |where Education in (\'Bachelors\',\'High School\')'; +Customers |where Education in ('Bachelors','High School')| order by LastName; +print ''; +print '-- Customers | where Education !in (\'Bachelors\',\'High School\')'; +Customers | where Education !in ('Bachelors','High School')| order by LastName; +print ''; +print '-- Customers | where FirstName matches regex \'P.*r\''; +Customers | where FirstName matches regex 'P.*r'| order by LastName; +print ''; +print '-- Customers | where FirstName startswith \'pet\''; +Customers | where FirstName startswith 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName !startswith \'pet\''; +Customers | where FirstName !startswith 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName startswith_cs \'pet\''; +Customers | where FirstName startswith_cs 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName !startswith_cs \'pet\''; +Customers | where FirstName !startswith_cs 'pet'| order by LastName; +print ''; +print '-- Customers | where isempty(LastName)'; +Customers | where isempty(LastName); +print ''; +print '-- Customers | where isnotempty(LastName)'; +Customers | where isnotempty(LastName); +print ''; +print '-- Customers | where isnotnull(FirstName)'; +Customers | where isnotnull(FirstName)| order by LastName; +print ''; +print '-- Customers | where isnull(FirstName)'; +Customers | where isnull(FirstName)| order by LastName; +print ''; +print '-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1'; +Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1; +print ''; +print '-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1'; +Customers | project url_encode('https://www.test.com/hello word') | take 1; +print ''; +print '-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2))'; +Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))| order by LastName; +print ''; +print '-- Customers | project name = strcat(FirstName, \' \', LastName)'; +Customers | project name = strcat(FirstName, ' ', LastName)| order by LastName; +print ''; +print '-- Customers | project FirstName, strlen(FirstName)'; +Customers | project FirstName, strlen(FirstName)| order by LastName; +print ''; +print '-- Customers | project strrep(FirstName,2,\'_\')'; +Customers | project strrep(FirstName,2,'_')| order by LastName; +print ''; +print '--print from_str = strrep("ABC", 2)'; +print from_str = strrep('ABC', 2); +print '--print from_int = strrep(123,3,".")'; +print from_int = strrep(123, 3, '.'); +print '--print from_time = strrep(3s,2," ")'; +print from_time = strrep(3s, 2, ' '); +print ''; +print '-- Customers | project toupper(FirstName)'; +Customers | project toupper(FirstName)| order by LastName; +print ''; +print '-- Customers | project tolower(FirstName)'; +Customers | project tolower(FirstName)| order by LastName; +print ''; +-- Customer | where LastName in~ ("diaz", "cox") +print ''; +print '-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet'; +Customers | where Occupation has_all ('manual', 'skilled') | order by LastName; +print ''; +print '-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet'; +Customers | where Occupation has_any ('Skilled', 'abcd'); +print ''; +print '-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction)'; +Customers | project countof('The cat sat on the mat', 'at') | take 1; +Customers | project countof('The cat sat on the mat', 'at', 'normal') | take 1; +Customers | project countof('The cat sat on the mat', '\\s.he', 'regex') | take 1; +print ''; +print '-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction)'; +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(bool)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(date)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(guid)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(int)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(long)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(real)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(decimal)); +print extract(".*Action=(\\w+)",1, "dstPostNATPort=80 proto=tcp Action=alert"); +print ''; +print '-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet'; +Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20') | take 1; +print ''; +print '-- extract_json (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction)'; +print extract_json('', ''); -- { serverError BAD_ARGUMENTS } +print extract_json('a', ''); -- { serverError BAD_ARGUMENTS } +print extract_json('$.firstName', ''); +print extract_json('$.phoneNumbers[0].type', ''); +print extractjson('$.firstName', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}'); +print extract_json('$.phoneNumbers[0].type', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(string)); +print extract_json('$.phoneNumbers[0].type', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(int)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}'); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(int)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(long)); +-- print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(bool)); -> true +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(double)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(guid)); +-- print extract_json('$.phoneNumbers', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(dynamic)); we won't be able to handle this particular case for a while, because it should return a dictionary +print ''; +print '-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction)'; +Customers | project split('aa_bb', '_') | take 1; +Customers | project split('aaa_bbb_ccc', '_', 1) | take 1; +Customers | project split('', '_') | take 1; +Customers | project split('a__b', '_') | take 1; +Customers | project split('aabbcc', 'bb') | take 1; +Customers | project split('aabbcc', '') | take 1; +Customers | project split('aaa_bbb_ccc', '_', -1) | take 1; +Customers | project split('aaa_bbb_ccc', '_', 10) | take 1; +print ''; +print '-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now.'; +Customers | project strcat_delim('-', '1', '2', strcat('A','b')) | take 1; +-- Customers | project strcat_delim('-', '1', '2', 'A' , 1s); +print ''; +print '-- base64_encode_fromguid()'; +-- print base64_encode_fromguid(guid(null)); +print base64_encode_fromguid(guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')); +print base64_encode_fromguid(dynamic(null)); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print base64_encode_fromguid("abcd1231"); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print '-- base64_decode_toarray()'; +print base64_decode_toarray(''); +print base64_decode_toarray('S3VzdG8='); +print '-- base64_decode_toguid()'; +print base64_decode_toguid("JpbpECu8dUy7Pv5gbeJXAA=="); +print base64_decode_toguid(base64_encode_fromguid(guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'))) == guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'); +print '-- base64_encode_tostring'; +print base64_encode_tostring(''); +print base64_encode_tostring('Kusto1'); +print '-- base64_decode_tostring'; +print base64_decode_tostring(''); +print base64_decode_tostring('S3VzdG8x'); +print '-- parse_url()'; +print parse_url('scheme://username:password@host:1234/this/is/a/path?k1=v1&k2=v2#fragment'); +print '-- parse_url()'; +print parse_url(''); +print '-- parse_urlquery()'; +print parse_urlquery('k1=v1&k2=v2&k3=v3'); +print '-- strcat --'; +print strcat('a', 1, 2, 3, timespan(5d)); +print strcat('a', null, 9 + 2, 1h + 1d); +print '-- strcmp()'; +print strcmp('ABC','ABC'), strcmp('abc','ABC'), strcmp('ABC','abc'), strcmp('abcde','abc'); +print '-- substring()'; +print substring("ABCD", -2, 2); +print '-- translate()'; +print translate('krasp', 'otsku', 'spark'), translate('abc', '', 'ab'), translate('abc', 'x', 'abc'); +print '-- trim()'; +print trim("--", "--https://www.ibm.com--"); +print trim("[^\w]+", strcat("- ","Te st", "1", "// $")); +print trim("", " asd "); +print trim("a$", "asd"); +print trim("^a", "asd"); +print '-- trim_start()'; +print trim_start("https://", "https://www.ibm.com"); +print trim_start("[^\w]+", strcat("- ","Te st", "1", "// $")); +print trim_start("asd$", "asdw"); +print trim_start("asd$", "asd"); +print trim_start("d$", "asd"); +print '-- trim_end()'; +print trim_end("://www.ibm.com", "https://www.ibm.com"); +print trim_end("[^\w]+", strcat("- ","Te st", "1", "// $")); +print trim_end("^asd", "wasd"); +print trim_end("^asd", "asd"); +print trim_end("^a", "asd"); +print '-- trim, trim_start, trim_end all at once'; +print str = "--https://bing.com--", pattern = '--' | extend start = trim_start(pattern, str), end = trim_end(pattern, str), both = trim(pattern, str); +print '-- replace_regex'; +print replace_regex(strcat('Number is ', '1'), 'is (\d+)', 'was: \1'); +print '-- has_any_index()'; +print has_any_index('this is an example', dynamic(['this', 'example'])), has_any_index("this is an example", dynamic(['not', 'example'])), has_any_index("this is an example", dynamic(['not', 'found'])), has_any_index("this is an example", dynamic([])); +print '-- parse_version()'; +print parse_version(42); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- print parse_version(''); -> NULL +print parse_version('1.2.3.40'); +print parse_version('1.2'); +print parse_version(strcat('1.', '2')); +print parse_version('1.2.4.5.6'); +print parse_version('moo'); +print parse_version('moo.boo.foo'); +print parse_version(strcat_delim('.', 'moo', 'boo', 'foo')); +print parse_version(''); +print parse_version('....'); +Versions | project parse_version(Version); +print '-- parse_json()'; +print parse_json(dynamic([1, 2, 3])); +print parse_json('{"a":123.5, "b":"{\\"c\\":456}"}'); +print '-- parse_command_line()'; +print parse_command_line(55, 'windows'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- print parse_command_line((52 + 3) * 4 % 2, 'windows'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print parse_command_line('', 'windows'); +print parse_command_line(strrep(' ', 6), 'windows'); +-- print parse_command_line('echo \"hello world!\" print$?', 'windows'); -> ["echo","hello world!","print$?"] +-- print parse_command_line("yolo swag 'asd bcd' \"moo moo \"", 'windows'); -> ["yolo","swag","'asd","bcd'","moo moo "] +-- print parse_command_line(strcat_delim(' ', "yolo", "swag", "\'asd bcd\'", "\"moo moo \""), 'windows'); -> ["yolo","swag","'asd","bcd'","moo moo "] +print '-- reverse()'; +print reverse(123); +print reverse(123.34); +print reverse(''); +print reverse("asd"); +print reverse(dynamic([])); +print reverse(dynamic([1, 2, 3])); +print reverse(dynamic(['Darth', "Vader"])); +print reverse(datetime(2017-10-15 12:00)); +print reverse(timespan(3h)); +Customers | where Education contains 'degree' | order by reverse(FirstName); +print '-- parse_csv()'; +print parse_csv(''); +print parse_csv(65); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print parse_csv('aaa'); +print result=parse_csv('aa,b,cc'); +print result_multi_record=parse_csv('record1,a,b,c\nrecord2,x,y,z'); +-- print result=parse_csv('aa,"b,b,b",cc,"Escaping quotes: ""Title""","line1\nline2"'); -> ["aa","b,b,b","cc","Escaping quotes: \"Title\"","line1\nline2"] +-- print parse_csv(strcat(strcat_delim(',', 'aa', '"b,b,b"', 'cc', '"Escaping quotes: ""Title"""', '"line1\nline2"'), '\r\n', strcat_delim(',', 'asd', 'qcf'))); -> ["aa","b,b,b","cc","Escaping quotes: \"Title\"","line1\nline2"] +print '-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction)'; + +Customers | project indexof('abcdefgabcdefg', 'cde', 1, 10, 2) | take 1; +print indexof('abcdefg','cde'); +print idx2 = indexof('abcdefg','cde',0,3); +print idx3 = indexof('abcdefg','cde',1,2); +print idx4 = indexof('abcdefg','cde',3,4); +print idx5 = indexof('abcdefg','cde',-5); +print idx6 = indexof(1234567,5,1,4); +print idx7 = indexof('abcdefg','cde',2,-1); +print idx8 = indexof('abcdefgabcdefg', 'cde', 1, 10, 2); +print idx9 = indexof('abcdefgabcdefg', 'cde', 1, -1, 3); +print indexof('abcdefgabcdefg','cde', -1); +print indexof('abcdefgabcdefg','cde', -4); +print indexof('abcdefgabcdefg','cde', -5); +print indexof('abcdefgabcdefg','cde', -105); + +print '-- has --'; +print 'svchost.exe1' has ''; +print 'svchost.exe1' has 'svchost.exe'; +print 'svchost.exe' has 'svchost.exe'; +print 'svchost.exe' has 'svchost.exe1'; +print '' has 'svchost.exe1'; +print '' has ''; +print '.' has ''; +print '.' has ','; +print '.' has '.'; +print '.ex.e' has 'ex'; +print '.ex.e' has 'exe'; +print ''; +StringTest | where Text has 'asdf'; +print ''; +StringTest | where Text has 'asdf.qwer'; +print ''; +StringTest | where Text has 'qwer'; + +print '-- !has --'; +StringTest | where Text !has 'asdf'; +print ''; +StringTest | where Text !has 'asdf.qwer'; + +print '-- has_all --'; +StringTest | where Text has_all ('asdf', 'qwer'); + +print '-- has_any --'; +StringTest | where Text has_any ('asdf', 'qwer'); diff --git a/tests/queries/0_stateless/02366_kql_lookup_join.reference b/tests/queries/0_stateless/02366_kql_lookup_join.reference new file mode 100644 index 000000000000..f4c338821d91 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_lookup_join.reference @@ -0,0 +1,74 @@ +-- lookup 1 -- +1 Bill Gates billg +2 Bill Clinton billc +3 Bill Clinton billc +4 Steve Ballmer steveb +5 Tim Cook timc +-- lookup 2 -- +1 Bill Gates billg +2 Bill Clinton billc +3 Bill Clinton billc +4 Steve Ballmer steveb +5 Tim Cook timc +-- lookup 3 -- +1 Bill Gates billg +2 Bill Clinton billc +3 Bill Clinton billc +4 Steve Ballmer +5 Tim Cook +-- lookup 4 -- +1 Bill Gates billg +2 Bill Clinton billc +3 Bill Clinton billc +4 Steve Ballmer steveb +5 Tim Cook timc +-- lookup 5 -- +1 Bill Gates billg billg +2 Bill Clinton billc billc +3 Bill Clinton billc billc +4 Steve Ballmer steveb +5 Tim Cook timc +-- Default join -- +c 4 30 +c 4 20 +b 3 10 +-- Default join 2-- +c 4 30 +c 4 20 +b 3 10 +-- Inner-join -- +c 4 30 +c 4 20 +b 3 10 +b 2 10 +-- Left outer-join -- +c 4 30 +c 4 20 +b 3 10 +b 2 10 +a 1 0 +-- Right outer-join -- +d 0 40 +c 4 30 +c 4 20 +b 3 10 +b 2 10 +-- Full outer-join -- +d 0 40 +c 4 30 +c 4 20 +b 3 10 +b 2 10 +a 1 0 +-- Left anti-join -- +a 1 0 +-- Right anti-join -- +d 0 40 +-- Left semi-join -- +c 4 30 +b 3 10 +b 2 10 +-- Right semi-join -- +c 4 30 +c 4 20 +b 3 10 diff --git a/tests/queries/0_stateless/02366_kql_lookup_join.sql b/tests/queries/0_stateless/02366_kql_lookup_join.sql new file mode 100644 index 000000000000..e12bc6b2422a --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_lookup_join.sql @@ -0,0 +1,62 @@ +DROP TABLE IF EXISTS FactTable; +CREATE TABLE FactTable (Row String, Personal String, Family String) ENGINE = Memory; +INSERT INTO FactTable VALUES ('1', 'Bill', 'Gates'); +INSERT INTO FactTable VALUES ('2', 'Bill', 'Clinton'); +INSERT INTO FactTable VALUES ('3', 'Bill', 'Clinton'); +INSERT INTO FactTable VALUES ('4', 'Steve', 'Ballmer'); +INSERT INTO FactTable VALUES ('5', 'Tim', 'Cook'); + +DROP TABLE IF EXISTS DimTable; +CREATE TABLE DimTable (Personal String, Family String, Alias String) ENGINE = Memory; +INSERT INTO DimTable VALUES ('Bill', 'Gates', 'billg'); +INSERT INTO DimTable VALUES ('Bill', 'Clinton', 'billc'); +INSERT INTO DimTable VALUES ('Steve', 'Ballmer', 'steveb'); +INSERT INTO DimTable VALUES ('Tim', 'Cook', 'timc'); + +DROP TABLE IF EXISTS X; +CREATE TABLE X (Key String, Value1 Int64) ENGINE = Memory; +INSERT INTO X VALUES ('a',1); +INSERT INTO X VALUES ('b',2); +INSERT INTO X VALUES ('b',3); +INSERT INTO X VALUES ('c',4); + +DROP TABLE IF EXISTS Y; +CREATE TABLE Y (Key String, Value2 Int64) ENGINE = Memory; +INSERT INTO Y VALUES ('b',10); +INSERT INTO Y VALUES ('c',20); +INSERT INTO Y VALUES ('c',30); +INSERT INTO Y VALUES ('d',40); + +set dialect='kusto'; + +print '-- lookup 1 --'; +FactTable | lookup kind=leftouter DimTable on Personal, Family | order by Row asc; +print '-- lookup 2 --'; +FactTable | lookup kind=inner DimTable on Personal, Family | order by Row asc; +print '-- lookup 3 --'; +FactTable | lookup kind=leftouter (DimTable | where Personal == 'Bill') on Personal, Family | order by Row asc; +print '-- lookup 4 --'; +FactTable | project Row, Personal , Family| lookup kind=leftouter DimTable on Personal, Family | order by Row asc; +print '-- lookup 5 --'; +FactTable |project Row, Personal , Family| lookup kind=leftouter (DimTable | where Personal == 'Bill') on Personal, Family| lookup kind=inner DimTable on Personal, Family | order by Row asc; + +print '-- Default join --'; +X | order by Key, Value1 | join ( Y | order by Key, Value2 ) on $left.Key == $right.Key | order by Key, Value1, Value2; +print '-- Default join 2--'; +X | order by Key, Value1 | join kind=innerunique ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Inner-join --'; +X | order by Key, Value1 | join kind=inner ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Left outer-join --'; +X | order by Key, Value1 | join kind=leftouter ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Right outer-join --'; +X | order by Key, Value1 | join kind=rightouter ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Full outer-join --'; +X | order by Key, Value1 | join kind=fullouter ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Left anti-join --'; +X | order by Key, Value1 | join kind=leftanti ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Right anti-join --'; +X | order by Key, Value1 | join kind=rightanti ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Left semi-join --'; +X | order by Key, Value1 | join kind=leftsemi ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Right semi-join --'; +X | order by Key, Value1 | join kind=rightsemi ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_makeseries.reference b/tests/queries/0_stateless/02366_kql_makeseries.reference new file mode 100644 index 000000000000..dc25ee4beeb1 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_makeseries.reference @@ -0,0 +1,61 @@ +-- from to +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,2,0] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,500,0] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +-- from +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000'] [0,2] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000'] [0,500] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +-- to +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Costco Apple ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [2,0] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [500,0] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +-- without from/to +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Costco Apple ['2016-09-11 00:00:00.000000000'] [2] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +-- without by +['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [70,334,54] +-- without aggregation alias +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +Costco Apple ['2016-09-11 00:00:00.000000000'] [2] +-- assign group alias +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +Costco Apple ['2016-09-11 00:00:00.000000000'] [2] +-- 3d step +Costco Snargaluff ['2016-09-10 00:00:00.000000000'] [134.66666666666666] +Costco Apple ['2016-09-10 00:00:00.000000000'] [2] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000'] [500] +Aldi Apple ['2016-09-10 00:00:00.000000000'] [5.5] +-- numeric column +-- from to +Costco Snargaluff [10,11,12,13,14] [200,0,102,0,0] +Aldi Snargaluff [10,11,12,13,14] [0,500,0,0,0] +Aldi Apple [10,11,12,13,14] [5,0,6,0,0] +Costco Apple [10,11,12,13,14] [0,2,0,0,0] +-- from +Costco Snargaluff [10,11,12] [200,0,102] +Aldi Snargaluff [10,11] [0,500] +Aldi Apple [10,11,12] [5,0,6] +Costco Apple [10,11] [0,2] +-- to +Costco Snargaluff [8,12,16] [200,102,0] +Aldi Snargaluff [8,12,16] [500,0,0] +Aldi Apple [8,12,16] [5,6,0] +Costco Apple [8,12,16] [2,0,0] +-- without from/to +Costco Snargaluff [10,12] [200,102] +Aldi Snargaluff [10] [500] +Aldi Apple [10,12] [5,6] +Costco Apple [10] [2] +-- without by +[10,12] [202,54] +['2017-01-01 00:00:00.000000000','2017-01-02 00:00:00.000000000','2017-01-03 00:00:00.000000000','2017-01-04 00:00:00.000000000','2017-01-05 00:00:00.000000000','2017-01-06 00:00:00.000000000','2017-01-07 00:00:00.000000000','2017-01-08 00:00:00.000000000','2017-01-09 00:00:00.000000000'] [4,3,5,0,10.5,4,3,8,7] diff --git a/tests/queries/0_stateless/02366_kql_makeseries.sql b/tests/queries/0_stateless/02366_kql_makeseries.sql new file mode 100644 index 000000000000..3926d91e644d --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_makeseries.sql @@ -0,0 +1,71 @@ +-- Azure Data Explore Test Data +-- let make_series_test_table = datatable (Supplier:string, Fruit:string, Price: real, Purchase:datetime) +-- [ +-- 'Aldi','Apple',4,'2016-09-10', +-- 'Costco','Apple',2,'2016-09-11', +-- 'Aldi','Apple',6,'2016-09-10', +-- 'Costco','Snargaluff',100,'2016-09-12', +-- 'Aldi','Apple',7,'2016-09-12', +-- 'Aldi','Snargaluff',400,'2016-09-11', +-- 'Costco','Snargaluff',104,'2016-09-12', +-- 'Aldi','Apple',5,'2016-09-12', +-- 'Aldi','Snargaluff',600,'2016-09-11', +-- 'Costco','Snargaluff',200,'2016-09-10', +-- ]; +DROP TABLE IF EXISTS make_series_test_table; +CREATE TABLE make_series_test_table +( + Supplier Nullable(String), + Fruit String, + Price Float64, + Purchase Date +) ENGINE = Memory; +INSERT INTO make_series_test_table VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); +DROP TABLE IF EXISTS make_series_test_table2; +CREATE TABLE make_series_test_table2 +( + Supplier Nullable(String), + Fruit String , + Price Int32, + Purchase Int32 +) ENGINE = Memory; +INSERT INTO make_series_test_table2 VALUES ('Aldi','Apple',4,10),('Costco','Apple',2,11),('Aldi','Apple',6,10),('Costco','Snargaluff',100,12),('Aldi','Apple',7,12),('Aldi','Snargaluff',400,11),('Costco','Snargaluff',104,12),('Aldi','Apple',5,12),('Aldi','Snargaluff',600,11),('Costco','Snargaluff',200,10); +DROP TABLE IF EXISTS make_series_test_table3; +CREATE TABLE make_series_test_table3 +( + timestamp datetime, + metric Float64, +) ENGINE = Memory; +INSERT INTO make_series_test_table3 VALUES (parseDateTimeBestEffort('2016-12-31T06:00'), 50), (parseDateTimeBestEffort('2017-01-01'), 4), (parseDateTimeBestEffort('2017-01-02'), 3), (parseDateTimeBestEffort('2017-01-03'), 4), (parseDateTimeBestEffort('2017-01-03T03:00'), 6), (parseDateTimeBestEffort('2017-01-05'), 8), (parseDateTimeBestEffort('2017-01-05T13:40'), 13), (parseDateTimeBestEffort('2017-01-06'), 4), (parseDateTimeBestEffort('2017-01-07'), 3), (parseDateTimeBestEffort('2017-01-08'), 8), (parseDateTimeBestEffort('2017-01-08T21:00'), 8), (parseDateTimeBestEffort('2017-01-09'), 2), (parseDateTimeBestEffort('2017-01-09T12:00'), 11), (parseDateTimeBestEffort('2017-01-10T05:00'), 5); + +set dialect = 'kusto'; +print '-- from to'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- from'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- to'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- without from/to'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- without by'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d; +print '-- without aggregation alias'; +make_series_test_table | make-series avg(Price) default=0 on Purchase step 1d by Supplier, Fruit; +print '-- assign group alias'; +make_series_test_table | make-series avg(Price) default=0 on Purchase step 1d by Supplier_Name = Supplier, Fruit; +print '-- 3d step'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 3d by Supplier, Fruit | order by Supplier, Fruit; + +print '-- numeric column'; +print '-- from to'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit; +print '-- from'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 step 1.0 by Supplier, Fruit; +print '-- to'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase to 18 step 4.0 by Supplier, Fruit; +print '-- without from/to'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0 by Supplier, Fruit; +print '-- without by'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0; + +make_series_test_table3 | make-series avg(metric) default=0 on timestamp from datetime(2017-01-01) to datetime(2017-01-10) step 1d; diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.reference b/tests/queries/0_stateless/02366_kql_mvexpand.reference new file mode 100644 index 000000000000..25be070eb0b7 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_mvexpand.reference @@ -0,0 +1,65 @@ +-- mv-expand -- +-- mv_expand_test_table | mv-expand c -- +1 ['Salmon','Steak','Chicken'] 1 [5,6,7,8] +1 ['Salmon','Steak','Chicken'] 2 [5,6,7,8] +1 ['Salmon','Steak','Chicken'] 3 [5,6,7,8] +1 ['Salmon','Steak','Chicken'] 4 [5,6,7,8] +-- mv_expand_test_table | mv-expand c, d -- +1 ['Salmon','Steak','Chicken'] 1 5 +1 ['Salmon','Steak','Chicken'] 2 6 +1 ['Salmon','Steak','Chicken'] 3 7 +1 ['Salmon','Steak','Chicken'] 4 8 +-- mv_expand_test_table | mv-expand b | mv-expand c -- +1 Salmon 1 [5,6,7,8] +1 Salmon 2 [5,6,7,8] +1 Salmon 3 [5,6,7,8] +1 Salmon 4 [5,6,7,8] +1 Steak 1 [5,6,7,8] +1 Steak 2 [5,6,7,8] +1 Steak 3 [5,6,7,8] +1 Steak 4 [5,6,7,8] +1 Chicken 1 [5,6,7,8] +1 Chicken 2 [5,6,7,8] +1 Chicken 3 [5,6,7,8] +1 Chicken 4 [5,6,7,8] +-- mv_expand_test_table | mv-expand with_itemindex=index b, c, d -- +0 1 Salmon 1 5 +1 1 Steak 2 6 +2 1 Chicken 3 7 +3 1 4 8 +-- mv_expand_test_table | mv-expand array_concat(c,d) -- +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 5 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 6 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 7 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 8 +-- mv_expand_test_table | mv-expand x = c, y = d -- +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 5 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 6 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 7 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 8 +-- mv_expand_test_table | mv-expand xy = array_concat(c, d) -- +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 5 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 6 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 7 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 8 +-- mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy -- +1 1 +2 1 +-- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool) -- +0 1 ['Salmon','Steak','Chicken'] 1 true +1 1 ['Salmon','Steak','Chicken'] 2 true +2 1 ['Salmon','Steak','Chicken'] 3 true +3 1 ['Salmon','Steak','Chicken'] 4 true +-- mv_expand_test_table | mv-expand c to typeof(bool) -- +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.sql b/tests/queries/0_stateless/02366_kql_mvexpand.sql new file mode 100644 index 000000000000..e77986096463 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_mvexpand.sql @@ -0,0 +1,35 @@ +-- datatable(a: int, b: dynamic, c: dynamic, d: dynamic) [ +-- 1, dynamic(['Salmon', 'Steak', 'Chicken']), dynamic([1, 2, 3, 4]), dynamic([5, 6, 7, 8]) +-- ] + +DROP TABLE IF EXISTS mv_expand_test_table; +CREATE TABLE mv_expand_test_table +( + a UInt8, + b Array(String), + c Array(Int8), + d Array(Int8) +) ENGINE = Memory; +INSERT INTO mv_expand_test_table VALUES (1, ['Salmon', 'Steak','Chicken'],[1,2,3,4],[5,6,7,8]); +set dialect='kusto'; +print '-- mv-expand --'; +print '-- mv_expand_test_table | mv-expand c --'; +mv_expand_test_table | mv-expand c; +print '-- mv_expand_test_table | mv-expand c, d --'; +mv_expand_test_table | mv-expand c, d; +print '-- mv_expand_test_table | mv-expand b | mv-expand c --'; +mv_expand_test_table | mv-expand b | mv-expand c; +print '-- mv_expand_test_table | mv-expand with_itemindex=index b, c, d --'; +mv_expand_test_table | mv-expand with_itemindex=index b, c, d; +print '-- mv_expand_test_table | mv-expand array_concat(c,d) --'; +mv_expand_test_table | mv-expand array_concat(c,d); +print '-- mv_expand_test_table | mv-expand x = c, y = d --'; +mv_expand_test_table | mv-expand x = c, y = d; +print '-- mv_expand_test_table | mv-expand xy = array_concat(c, d) --'; +mv_expand_test_table | mv-expand xy = array_concat(c, d); +print '-- mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy --'; +mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy; +print '-- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool) --'; +mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool); +print '-- mv_expand_test_table | mv-expand c to typeof(bool) --'; +mv_expand_test_table | mv-expand c to typeof(bool); diff --git a/tests/queries/0_stateless/02366_kql_native_bin.reference b/tests/queries/0_stateless/02366_kql_native_bin.reference new file mode 100644 index 000000000000..901fc2c687db --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_native_bin.reference @@ -0,0 +1,33 @@ +-- kql_bin -- +-- Numbers -- +4.5 +4 +3 +4.5 +\N +nan +-- Intervals -- +0 Nullable(IntervalWeek) +2000000000 +2000000000 Nullable(IntervalNanosecond) +\N +-- DateTime64 -- +2022-11-08 12:34:56.789012300 +2022-11-08 12:34:56.789012000 +2022-11-08 12:34:56.789000000 +2022-11-08 12:34:56.000000000 +2022-11-08 12:34:56.000000000 +2022-11-08 12:34:00.000000000 +2022-11-08 12:34:00.000000000 +\N +-- Date -- +2022-11-06 00:00:00.000000000 +-- Date32 -- +2022-11-06 00:00:00.000000000 +-- DateTime -- +2022-11-08 12:34:00.000000000 +-- kql_bin_at -- +4.5 +-43200000000000 +2017-05-14 12:00:00.000000000 +2017-05-14 00:00:00.000000000 diff --git a/tests/queries/0_stateless/02366_kql_native_bin.sql b/tests/queries/0_stateless/02366_kql_native_bin.sql new file mode 100644 index 000000000000..0b82dbcd03e7 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_native_bin.sql @@ -0,0 +1,47 @@ +select '-- kql_bin --'; +select '-- Numbers --'; +select kql_bin(4.5, 1.5); +select kql_bin(4.5, 2); +select kql_bin(4, 3); +select kql_bin(5, 1.5); +select kql_bin(5, 0); +select kql_bin(4.5, 0); + +select kql_bin(5, toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select kql_bin(5, toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- Intervals --'; +select kql_bin(toIntervalWeek(1), toIntervalWeek(2)) as result, toTypeName(result); +select kql_bin(toIntervalNanosecond(2500000000), toIntervalNanosecond(1000000000)); +select kql_bin(toIntervalNanosecond(2500000000), 1) as result, toTypeName(result); +select kql_bin(toIntervalNanosecond(2500000000), toIntervalNanosecond(0)); + +select kql_bin(toIntervalWeek(2), toIntervalHour(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select kql_bin(toIntervalWeek(2), toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- DateTime64 --'; +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(100)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(1000)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(1000000)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(1000000000)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), 1); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(60000000000)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalMinute(1)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalMinute(0)); + +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- Date --'; +select kql_bin(toDate('2022-11-07'), toIntervalNanosecond(172800000000000)); + +select '-- Date32 --'; +select kql_bin(toDate32('2022-11-07'), toIntervalNanosecond(172800000000000)); + +select '-- DateTime --'; +select kql_bin(toDateTime('2022-11-08 12:34:56', 'UTC'), toIntervalNanosecond(60000000000)); + +select '-- kql_bin_at --'; +select kql_bin_at(6.5, 2.5, 7); +select kql_bin_at(toIntervalNanosecond(3600000000000), toIntervalNanosecond(86400000000000), toIntervalNanosecond(43200000000000)); +select kql_bin_at(toDateTime64('2017-05-15 10:20:00.123', 5, 'UTC'), toIntervalNanosecond(86400000000000), toDateTime('1970-01-01 12:00:00', 'UTC')); +select kql_bin_at(toDate('2017-05-17'), toIntervalNanosecond(604800000000000), toDate32('2017-06-04')); diff --git a/tests/queries/0_stateless/02366_kql_operator_in_sql.reference b/tests/queries/0_stateless/02366_kql_operator_in_sql.reference new file mode 100644 index 000000000000..8952286b86b9 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_operator_in_sql.reference @@ -0,0 +1,105 @@ +-- #1 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 +-- #2 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #3 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #4 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #5 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #6 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #7 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #8 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #9 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #10 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #11 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #12 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #13 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #14 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #15 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #16 -- +1 +-- #17 -- +Peter Nara Skilled Manual Graduate Degree 26 +Apple Skilled Manual Bachelors 28 +-- #18 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #19 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #20 -- +0 +-- #21 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +-- #22 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #23 -- +Stephanie Cox Management abcd defg Bachelors 33 +-- #24 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #25 -- +Peter Nara Skilled Manual Graduate Degree 26 diff --git a/tests/queries/0_stateless/02366_kql_operator_in_sql.sql b/tests/queries/0_stateless/02366_kql_operator_in_sql.sql new file mode 100644 index 000000000000..e837c108add3 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_operator_in_sql.sql @@ -0,0 +1,63 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); +Select '-- #1 --' ; +select * from kql(Customers | where FirstName !in ('Peter', 'Latoya')); +Select '-- #2 --' ; +select * from kql(Customers | where FirstName !in ("test", "test2")); +Select '-- #3 --' ; +select * from kql(Customers | where FirstName !contains 'Pet'); +Select '-- #4 --' ; +select * from kql(Customers | where FirstName !contains_cs 'Pet'); +Select '-- #5 --' ; +select * from kql(Customers | where FirstName !endswith 'ter'); +Select '-- #6 --' ; +select * from kql(Customers | where FirstName !endswith_cs 'ter'); +Select '-- #7 --' ; +select * from kql(Customers | where FirstName != 'Peter'); +Select '-- #8 --' ; +select * from kql(Customers | where FirstName !has 'Peter'); +Select '-- #9 --' ; +select * from kql(Customers | where FirstName !has_cs 'peter'); +Select '-- #10 --' ; +select * from kql(Customers | where FirstName !hasprefix 'Peter'); +Select '-- #11 --' ; +select * from kql(Customers | where FirstName !hasprefix_cs 'Peter'); +Select '-- #12 --' ; +select * from kql(Customers | where FirstName !hassuffix 'Peter'); +Select '-- #13 --' ; +select * from kql(Customers | where FirstName !hassuffix_cs 'Peter'); +Select '-- #14 --' ; +select * from kql(Customers | where FirstName !startswith 'Peter'); +Select '-- #15 --' ; +select * from kql(Customers | where FirstName !startswith_cs 'Peter'); +Select '-- #16 --' ; +select * from kql(print t = 'a' in~ ('A', 'b', 'c')); +Select '-- #17 --' ; +select * from kql(Customers | where FirstName in~ ('peter', 'apple')); +Select '-- #18 --' ; +select * from kql(Customers | where FirstName in~ ((Customers | project FirstName | where FirstName == 'Peter'))); +Select '-- #19 --' ; +select * from kql(Customers | where FirstName in~ ((Customers | project FirstName | where Age < 30))); +Select '-- #20 --' ; +select * from kql(print t = 'a' !in~ ('A', 'b', 'c')); +Select '-- #21 --' ; +select * from kql(Customers | where FirstName !in~ ('peter', 'apple')); +Select '-- #22 --' ; +select * from kql(Customers | where FirstName !in~ ((Customers | project FirstName | where FirstName == 'Peter'))); +Select '-- #23 --' ; +select * from kql(Customers | where FirstName !in~ ((Customers | project FirstName | where Age < 30))); +Select '-- #24 --' ; +select * from kql(Customers | where FirstName =~ 'peter' and LastName =~ 'naRA'); +Select '-- #25 --' ; +select * from kql(Customers | where FirstName !~ 'nEyMaR' and LastName =~ 'naRA'); + +DROP TABLE IF EXISTS Customers; diff --git a/tests/queries/0_stateless/02366_kql_range.reference b/tests/queries/0_stateless/02366_kql_range.reference new file mode 100644 index 000000000000..e31e9fcc5c71 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_range.reference @@ -0,0 +1,56 @@ +-- range function int, int, int -- +[1,3,5,7,9] +-- range function int, int -- +[1,2,3,4,5,6,7,8,9,10] +-- range function float, float, float -- +[1.2,3.4000000000000004,5.6000000000000005,7.800000000000001,10] +-- range function postive float, float, int -- +[1.2,3.2,5.2,7.2,9.2] +-- range function postive float, int, float -- +[1.2,3.4000000000000004,5.6000000000000005,7.800000000000001,10] +-- range function postive integer, int, float -- +[1,3.2,5.4,7.6000000000000005,9.8] +-- range function postive intger, float, float -- +[1,3.2,5.4,7.6000000000000005,9.8] +-- range function postive float, int, int -- +[1.2,3.2,5.2,7.2,9.2] +-- range function postive int, int, negative int -- +[12,10,8,6,4] +-- range function postive float, int, negative float -- +[12.8,10.5,8.2,5.8999999999999995,3.5999999999999996] +-- range function datetime, datetime, timespan -- +['2001-01-01 00:00:00.000000000','2001-01-01 05:00:00.000000000','2001-01-01 10:00:00.000000000','2001-01-01 15:00:00.000000000','2001-01-01 20:00:00.000000000'] +-- range function datetime, datetime, negative timespan -- +['2001-01-03 00:00:00.000000000','2001-01-02 19:00:00.000000000','2001-01-02 14:00:00.000000000','2001-01-02 09:00:00.000000000','2001-01-02 04:00:00.000000000'] +-- range function datetime, datetime -- +['2001-01-01 00:00:00.000000000','2001-01-01 01:00:00.000000000','2001-01-01 02:00:00.000000000','2001-01-01 03:00:00.000000000','2001-01-01 04:00:00.000000000','2001-01-01 05:00:00.000000000','2001-01-01 06:00:00.000000000','2001-01-01 07:00:00.000000000','2001-01-01 08:00:00.000000000','2001-01-01 09:00:00.000000000','2001-01-01 10:00:00.000000000','2001-01-01 11:00:00.000000000','2001-01-01 12:00:00.000000000','2001-01-01 13:00:00.000000000','2001-01-01 14:00:00.000000000','2001-01-01 15:00:00.000000000','2001-01-01 16:00:00.000000000','2001-01-01 17:00:00.000000000','2001-01-01 18:00:00.000000000','2001-01-01 19:00:00.000000000','2001-01-01 20:00:00.000000000','2001-01-01 21:00:00.000000000','2001-01-01 22:00:00.000000000','2001-01-01 23:00:00.000000000','2001-01-02 00:00:00.000000000'] +-- range function timespan, timespan, timespan -- +[3600000000000,10800000000000,18000000000000] +-- range function timespan, timespan -- +[3600000000000,7200000000000,10800000000000,14400000000000,18000000000000] +-- range function timespan, timespan, negative timespan -- +[39600000000000,32400000000000,25200000000000,18000000000000] +-- range function float timespan, timespan, timespan -- +[5400000000000,12600000000000] +-- range function endofday, endofday, timespan -- +['2017-01-01 23:59:59.999999900','2017-01-02 23:59:59.999999900','2017-01-03 23:59:59.999999900'] +-- range orerator int, int, int -- +20 +21 +22 +23 +24 +25 +-- range orerator float, float, float -- +20.5 +22 +23.5 +25 +-- range orerator datetime, datetime, timespan -- +2023-01-01 00:00:00.000000000 +2023-01-02 00:00:00.000000000 +2023-01-03 00:00:00.000000000 +2023-01-04 00:00:00.000000000 +2023-01-05 00:00:00.000000000 +2023-01-06 00:00:00.000000000 +2023-01-07 00:00:00.000000000 diff --git a/tests/queries/0_stateless/02366_kql_range.sql b/tests/queries/0_stateless/02366_kql_range.sql new file mode 100644 index 000000000000..109db6395020 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_range.sql @@ -0,0 +1,64 @@ +set dialect = 'kusto'; + +print '-- range function int, int, int --'; +print range(1, 10, 2); + +print '-- range function int, int --'; +print range(1, 10); + +print '-- range function float, float, float --'; +print range(1.2, 10.3, 2.2); + +print '-- range function postive float, float, int --'; +print range(1.2, 10.3, 2); + +print '-- range function postive float, int, float --'; +print range(1.2, 10, 2.2); + +print '-- range function postive integer, int, float --'; +print range(1, 10, 2.2); + +print '-- range function postive intger, float, float --'; +print range(1, 10.5, 2.2); + +print '-- range function postive float, int, int --'; +print range(1.2, 10, 2); + +print '-- range function postive int, int, negative int --'; +print range(12, 3, -2); + +print '-- range function postive float, int, negative float --'; +print range(12.8, 3, -2.3); + +print '-- range function datetime, datetime, timespan --'; +print range(datetime('2001-01-01'), datetime('2001-01-02'), 5h); + +print '-- range function datetime, datetime, negative timespan --'; +print range(datetime('2001-01-03'), datetime('2001-01-02'), -5h); + +print '-- range function datetime, datetime --'; +print range(datetime('2001-01-01'), datetime('2001-01-02')); + +print '-- range function timespan, timespan, timespan --'; +print range(1h, 5h, 2h); + +print '-- range function timespan, timespan --'; +print range(1h, 5h); + +print '-- range function timespan, timespan, negative timespan --'; +print range(11h, 5h, -2h); + +print '-- range function float timespan, timespan, timespan --'; +print range(1.5h, 5h, 2h); + +print '-- range function endofday, endofday, timespan --'; +print range(endofday(datetime(2017-01-01 10:10:17)), endofday(datetime(2017-01-03 10:10:17)), 1d); + +print '-- range orerator int, int, int --'; +range Age from 20 to 25 step 1; + +print '-- range orerator float, float, float --'; +range temp from 20.5 to 25.5 step 1.5; + +print '-- range orerator datetime, datetime, timespan --'; +range FirstWeek from datetime('2023-01-01') to datetime('2023-01-07') step 1d; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_subquery.reference b/tests/queries/0_stateless/02366_kql_subquery.reference new file mode 100644 index 000000000000..51fd985276b8 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_subquery.reference @@ -0,0 +1,5 @@ +-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 diff --git a/tests/queries/0_stateless/02366_kql_subquery.sql b/tests/queries/0_stateless/02366_kql_subquery.sql new file mode 100644 index 000000000000..68aa58de26c6 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_subquery.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect='kusto'; + +print '-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet'; +Customers | where Age in ((Customers|project Age|where Age < 30)) | order by LastName; + diff --git a/tests/queries/0_stateless/02366_kql_summarize.reference b/tests/queries/0_stateless/02366_kql_summarize.reference new file mode 100644 index 000000000000..2dc99107caea --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_summarize.reference @@ -0,0 +1,108 @@ +-- test summarize -- +12 25 46 32.416666666666664 389 +Skilled Manual 5 26 36 30.2 151 +Professional 6 25 46 34.166666666666664 205 +Management abcd defg 1 33 33 33 33 +Skilled Manual 0 +Professional 2 +Management abcd defg 0 +Skilled Manual 36 +Professional 38 +Management abcd defg 33 +Skilled Manual 26 +Professional 25 +Management abcd defg 33 +Skilled Manual 30.2 +Professional 29.25 +Management abcd defg 33 +Skilled Manual 151 +Professional 117 +Management abcd defg 33 +4 +2 +40 2 +30 4 +20 6 +Skilled Manual 5 +Professional 6 +Management abcd defg 1 +-- make_list() -- +Skilled Manual ['Bachelors','Graduate Degree','High School','Partial College','Bachelors'] +Professional ['Graduate Degree','Partial College','Partial College','Partial College','Partial College','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Bachelors','Graduate Degree'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +-- make_list_if() -- +Skilled Manual ['Edward','Christine'] +Professional ['Dalton','Angel'] +Management abcd defg ['Stephanie'] +Skilled Manual ['Edward'] +Professional ['Dalton'] +Management abcd defg ['Stephanie'] +-- make_set() -- +Skilled Manual ['Graduate Degree','High School','Partial College','Bachelors'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Graduate Degree','Bachelors'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +-- make_set_if() -- +Skilled Manual ['Partial College','High School'] +Professional ['Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['High School'] +Professional ['Partial College'] +Management abcd defg ['Bachelors'] +-- stdev() -- +6.855102059227432 +-- stdevif() -- +7.557189365836421 +-- binary_all_and -- +42 +-- binary_all_or -- +46 +-- binary_all_xor -- +4 +43.8 +[25.549999999999997,30.5,43.8] +30.5 +35 +[25,35,45] +-- Summarize following sort -- +Skilled Manual 5 +Professional 6 +Management abcd defg 1 +-- summarize with bin -- +0 1 +245000 2 +1970-01-01 00:00:00.000000000 1 +1970-01-01 00:04:05.000000000 2 +0 1 +245000000000 2 +2015-10-12 00:00:00.000000000 +2016-10-12 00:00:00.000000000 +-- make_list_with_nulls -- +['Theodore','Stephanie','Peter','Latoya','Joshua','Edward','Dalton','Christine','Cameron','Angel','Apple',NULL] +Skilled Manual ['Theodore','Peter','Edward','Christine','Apple'] +Professional ['Latoya','Joshua','Dalton','Cameron','Angel',NULL] +Management abcd defg ['Stephanie'] +Skilled Manual ['Theodore','Peter','Edward','Christine','Apple'] [28,26,36,33,28] +Professional ['Latoya','Joshua','Dalton','Cameron','Angel',NULL] [25,26,42,28,46,38] +Management abcd defg ['Stephanie'] [33] +-- count_distinct -- +4 +-- count_distinctif -- +3 +-- format_datetime -- +70-01-01 1 +70-01-03 2 +-- take_any -- +Theodore +Theodore Diaz +Cameron Rodriguez Cameron Rodriguez +Christine Nara Christine Nara +TheodoreDiaz +-- take_anyif -- +Theodore +Theodore 11 diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql new file mode 100644 index 000000000000..9a970ea87f00 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -0,0 +1,126 @@ +-- datatable(FirstName:string, LastName:string, Occupation:string, Education:string, Age:int) [ +-- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, +-- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, +-- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, +-- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, +-- 'Joshua', 'Lee', 'Professional', 'Partial College', 26, +-- 'Edward', 'Hernandez', 'Skilled Manual', 'High School', 36, +-- 'Dalton', 'Wood', 'Professional', 'Partial College', 42, +-- 'Christine', 'Nara', 'Skilled Manual', 'Partial College', 33, +-- 'Cameron', 'Rodriguez', 'Professional', 'Partial College', 28, +-- 'Angel', 'Stewart', 'Professional', 'Partial College', 46, +-- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, +-- dynamic(null), 'why', 'Professional', 'Partial College', 38 +-- ] + +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Joshua','Lee','Professional','Partial College',26),('Edward','Hernandez','Skilled Manual','High School',36),('Dalton','Wood','Professional','Partial College',42),('Christine','Nara','Skilled Manual','Partial College',33),('Cameron','Rodriguez','Professional','Partial College',28),('Angel','Stewart','Professional','Partial College',46),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +-- datatable (LogEntry:string, Created:long) [ +-- 'Darth Vader has entered the room.', 546, +-- 'Rambo is suspciously looking at Darth Vader.', 245234, +-- 'Darth Sidious electrocutes both using Force Lightning.', 245554 +-- ] + +drop table if exists EventLog; +create table EventLog +( + LogEntry String, + Created Int64 +) ENGINE = Memory; + +insert into EventLog values ('Darth Vader has entered the room.', 546), ('Rambo is suspciously looking at Darth Vader.', 245234), ('Darth Sidious electrocutes both using Force Lightning.', 245554); + +drop table if exists Dates; +create table Dates +( + EventTime DateTime('UTC'), +) ENGINE = Memory; + +insert into Dates values ('2015-10-12'), ('2016-10-12'); + +select '-- test summarize --'; +set dialect='kusto'; +Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age); +Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age) by Occupation; +Customers | summarize countif(Age>40) by Occupation; +Customers | summarize MyMax = maxif(Age, Age<40) by Occupation; +Customers | summarize MyMin = minif(Age, Age<40) by Occupation; +Customers | summarize MyAvg = avgif(Age, Age<40) by Occupation; +Customers | summarize MySum = sumif(Age, Age<40) by Occupation; +Customers | summarize dcount(Education); +Customers | summarize dcountif(Education, Occupation=='Professional'); +Customers | summarize count_ = count() by bin(Age, 10) | order by count_ asc; +Customers | summarize job_count = count() by Occupation | where job_count > 0; +Customers | summarize 'Edu Count'=count() by Education | sort by 'Edu Count' desc; -- { clientError 62 } + +print '-- make_list() --'; +Customers | summarize f_list = make_list(Education) by Occupation; +Customers | summarize f_list = make_list(Education, 2) by Occupation; +print '-- make_list_if() --'; +Customers | summarize f_list = make_list_if(FirstName, Age>30) by Occupation; +Customers | summarize f_list = make_list_if(FirstName, Age>30, 1) by Occupation; +print '-- make_set() --'; +Customers | summarize f_list = make_set(Education) by Occupation; +Customers | summarize f_list = make_set(Education, 2) by Occupation; +print '-- make_set_if() --'; +Customers | summarize f_list = make_set_if(Education, Age>30) by Occupation; +Customers | summarize f_list = make_set_if(Education, Age>30, 1) by Occupation; +print '-- stdev() --'; +Customers | project Age | summarize stdev(Age); +print '-- stdevif() --'; +Customers | project Age | summarize stdevif(Age, Age%2==0); +print '-- binary_all_and --'; +Customers | project Age | where Age > 40 | summarize binary_all_and(Age); +print '-- binary_all_or --'; +Customers | project Age | where Age > 40 | summarize binary_all_or(Age); +print '-- binary_all_xor --'; +Customers | project Age | where Age > 40 | summarize binary_all_xor(Age); + +Customers | project Age | summarize percentile(Age, 95); +Customers | project Age | summarize percentiles(Age, 5, 50, 95); +Customers | project Age | summarize percentiles(Age, 5, 50, 95)[1]; +Customers | summarize w=count() by AgeBucket=bin(Age, 5) | summarize percentilew(AgeBucket, w, 75); +Customers | summarize w=count() by AgeBucket=bin(Age, 5) | summarize percentilesw(AgeBucket, w, 50, 75, 99.9); + +print '-- Summarize following sort --'; +Customers | sort by FirstName | summarize count() by Occupation; + +print '-- summarize with bin --'; +EventLog | summarize count=count() by bin(Created, 1000); +EventLog | summarize count=count() by bin(unixtime_seconds_todatetime(Created/1000), 1s); +EventLog | summarize count=count() by time_label=bin(Created / 1000 * 1s, 1s); +Dates | project bin(EventTime, 1m); +print '-- make_list_with_nulls --'; +Customers | summarize t = make_list_with_nulls(FirstName); +Customers | summarize f_list = make_list_with_nulls(FirstName) by Occupation; +Customers | summarize f_list = make_list_with_nulls(FirstName), a_list = make_list_with_nulls(Age) by Occupation; +print '-- count_distinct --'; +Customers | summarize count_distinct(Education); +print '-- count_distinctif --'; +Customers | summarize count_distinctif(Education, Age > 30); + +print '-- format_datetime --'; +EventLog | summarize count() by dt = format_datetime(bin(unixtime_seconds_todatetime(Created), 1d), 'yy-MM-dd') | order by dt asc; + +print '-- take_any --'; +Customers | summarize take_any(FirstName); +Customers | summarize take_any(FirstName), take_any(LastName); +Customers | where FirstName startswith 'C' | summarize take_any(FirstName, LastName) by FirstName, LastName; +Customers | summarize take_any(strcat(FirstName,LastName)); +print '-- take_anyif --'; +Customers | summarize take_anyif(FirstName, LastName has 'Diaz'); +Customers | summarize take_anyif(FirstName, LastName has 'Diaz'), dcount(FirstName); + +-- TODO: +-- arg_max() +-- arg_min() diff --git a/tests/queries/0_stateless/02366_kql_tabular.reference b/tests/queries/0_stateless/02366_kql_tabular.reference new file mode 100644 index 000000000000..e70c02ce34fa --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_tabular.reference @@ -0,0 +1,139 @@ +-- test Query only has table name: -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Joshua Lee Professional Partial College 26 +Edward Hernandez Skilled Manual High School 36 +Dalton Wood Professional Partial College 42 +Christine Nara Skilled Manual Partial College 33 +Cameron Rodriguez Professional Partial College 28 +Angel Stewart Professional Partial College 46 +-- Query has Column Selection -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +Edward Hernandez Skilled Manual +Dalton Wood Professional +Christine Nara Skilled Manual +Cameron Rodriguez Professional +Angel Stewart Professional +-- Query has limit -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +-- Query has second limit with bigger value -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +-- Query has second limit with smaller value -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +-- Query has second Column selection -- +Theodore Diaz +Stephanie Cox +Peter Nara +-- Query has second Column selection with extra column -- +-- Query with desc sort -- +Theodore +Stephanie +Peter +Latoya +Joshua +Skilled Manual +Skilled Manual +Professional +Professional +Management +-- Query with asc sort -- +Management +Professional +Professional +Skilled Manual +Skilled Manual +-- Query with sort (without keyword asc desc) -- +Theodore +Stephanie +Peter +Latoya +Joshua +Skilled Manual +Skilled Manual +Professional +Professional +Management +-- Query with sort 2 Columns with different direction -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Peter Nara Skilled Manual +Theodore Diaz Skilled Manual +-- Query with second sort -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Peter Nara Skilled Manual +Theodore Diaz Skilled Manual +-- Test String Equals (==) -- +Theodore Diaz Skilled Manual +Peter Nara Skilled Manual +Edward Hernandez Skilled Manual +Christine Nara Skilled Manual +-- Test String Not equals (!=) -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Dalton Wood Professional +Cameron Rodriguez Professional +Angel Stewart Professional +-- Test Filter using a list (in) -- +Theodore Diaz Skilled Manual Bachelors +Stephanie Cox Management Bachelors +Edward Hernandez Skilled Manual High School +-- Test Filter using a list (!in) -- +Peter Nara Skilled Manual Graduate Degree +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Christine Nara Skilled Manual Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (contains_cs) -- +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Christine Nara Skilled Manual Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (startswith_cs) -- +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (endswith_cs) -- +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +-- Test Filter using numerical equal (==) -- +Peter Nara Skilled Manual Graduate Degree 26 +Joshua Lee Professional Partial College 26 +-- Test Filter using numerical great and less (> , <) -- +Stephanie Cox Management Bachelors 33 +Edward Hernandez Skilled Manual High School 36 +Christine Nara Skilled Manual Partial College 33 +-- Test Filter using multi where -- +Dalton Wood Professional Partial College 42 +Angel Stewart Professional Partial College 46 +-- Complex query with unknown function -- +-- Missing column in front of startsWith -- diff --git a/tests/queries/0_stateless/02366_kql_tabular.sql b/tests/queries/0_stateless/02366_kql_tabular.sql new file mode 100644 index 000000000000..f73c4c09ccaa --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_tabular.sql @@ -0,0 +1,88 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); + +set dialect='kusto'; +print '-- test Query only has table name: --'; +Customers; + +print '-- Query has Column Selection --'; +Customers | project FirstName,LastName,Occupation; + +print '-- Query has limit --'; +Customers | project FirstName,LastName,Occupation | take 5; +Customers | project FirstName,LastName,Occupation | limit 5; + +print '-- Query has second limit with bigger value --'; +Customers | project FirstName,LastName,Occupation | take 5 | take 7; + +print '-- Query has second limit with smaller value --'; +Customers | project FirstName,LastName,Occupation | take 5 | take 3; + +print '-- Query has second Column selection --'; +Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName; + +print '-- Query has second Column selection with extra column --'; +Customers| project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education;-- { serverError 47 } + +print '-- Query with desc sort --'; +Customers | project FirstName | take 5 | sort by FirstName desc; +Customers | project Occupation | take 5 | order by Occupation desc; + +print '-- Query with asc sort --'; +Customers | project Occupation | take 5 | sort by Occupation asc; + +print '-- Query with sort (without keyword asc desc) --'; +Customers | project FirstName | take 5 | sort by FirstName; +Customers | project Occupation | take 5 | order by Occupation; + +print '-- Query with sort 2 Columns with different direction --'; +Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc, LastName desc; + +print '-- Query with second sort --'; +Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation desc |sort by Occupation asc, LastName desc; + +print '-- Test String Equals (==) --'; +Customers | project FirstName,LastName,Occupation | where Occupation == 'Skilled Manual'; + +print '-- Test String Not equals (!=) --'; +Customers | project FirstName,LastName,Occupation | where Occupation != 'Skilled Manual'; + +print '-- Test Filter using a list (in) --'; +Customers | project FirstName,LastName,Occupation,Education | where Education in ('Bachelors','High School'); + +print '-- Test Filter using a list (!in) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where Education !in ('Bachelors','High School'); + +print '-- Test Filter using common string operations (contains_cs) --'; +Customers | project FirstName,LastName,Occupation,Education | where Education contains_cs 'Coll'; + +print '-- Test Filter using common string operations (startswith_cs) --'; +Customers | project FirstName,LastName,Occupation,Education | where Occupation startswith_cs 'Prof'; + +print '-- Test Filter using common string operations (endswith_cs) --'; +Customers | project FirstName,LastName,Occupation,Education | where FirstName endswith_cs 'a'; + +print '-- Test Filter using numerical equal (==) --'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age == 26; + +print '-- Test Filter using numerical great and less (> , <) --'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 and Age < 40; + +print '-- Test Filter using multi where --'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 | where Occupation == 'Professional'; + +print '-- Complex query with unknown function --'; +hits | where CounterID == 62 and EventDate >= '2013-07-14' and EventDate <= '2013-07-15' and IsRefresh == 0 and DontCountHits == 0 | summarize count() by d=bin(poopoo(EventTime), 1m) | order by d | limit 10; -- { clientError UNKNOWN_FUNCTION } + +print '-- Missing column in front of startsWith --'; +StormEvents | where startswith "W" | summarize Count=count() by State; -- { clientError SYNTAX_ERROR } diff --git a/tests/queries/0_stateless/02366_kql_top_hitters.reference b/tests/queries/0_stateless/02366_kql_top_hitters.reference new file mode 100644 index 000000000000..72e1e27c88b1 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_top_hitters.reference @@ -0,0 +1,38 @@ +--top 1-- +Angel Stewart Professional Partial College 46 100 +Dalton Wood Professional Partial College 42 70 +\N why Professional Partial College 38 120 +--top 2-- +Angel Stewart Professional Partial College 46 100 +Dalton Wood Professional Partial College 42 70 +\N why Professional Partial College 38 120 +--top 3-- +Peter Nara Skilled Manual Graduate Degree 26 30 +Latoya Shen Professional Graduate Degree 25 40 +Joshua Lee Professional Partial College 26 50 +--top 4-- +\N why Professional Partial College 38 120 +Theodore Diaz Skilled Manual Bachelors 28 10 +Stephanie Cox Management Bachelors 31 20 +--top 5-- +Theodore Diaz Skilled Manual Bachelors 28 10 +Stephanie Cox Management Bachelors 31 20 +Peter Nara Skilled Manual Graduate Degree 26 30 +--top 6-- +Dalton Wood Professional Partial College 42 70 +Angel Stewart Professional Partial College 46 100 +--top hitters 1-- +28 210 +38 120 +--top hitters 2-- +28 3 +26 2 +--top hitters 3-- +38 1 +28 1 +--top hitters 4-- +38 120 +--top hitters 5-- +38 120 +--top hitters 6-- +28 3 diff --git a/tests/queries/0_stateless/02366_kql_top_hitters.sql b/tests/queries/0_stateless/02366_kql_top_hitters.sql new file mode 100644 index 000000000000..60ac6a537f6f --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_top_hitters.sql @@ -0,0 +1,39 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8), + extra Int16 +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28,10),('Stephanie','Cox','Management','Bachelors',31,20),('Peter','Nara','Skilled Manual','Graduate Degree',26,30),('Latoya','Shen','Professional','Graduate Degree',25,40),('Joshua','Lee','Professional','Partial College',26,50),('Edward','Hernandez','Skilled Manual','High School',36,60),('Dalton','Wood','Professional','Partial College',42,70),('Christine','Nara','Skilled Manual','Partial College',33,80),('Cameron','Rodriguez','Professional','Partial College',28,90),('Angel','Stewart','Professional','Partial College',46,100),('Apple','B','Skilled Manual','Bachelors',28,110),(NULL,'why','Professional','Partial College',38,120); + +set dialect = 'kusto'; +print '--top 1--'; +Customers | top 3 by Age; +print '--top 2--'; +Customers | top 3 by Age desc; +print '--top 3--'; +Customers | top 3 by Age asc | order by FirstName; +print '--top 4--'; +Customers | top 3 by FirstName desc nulls first; +print '--top 5--'; +Customers | top 3 by FirstName desc nulls last; +print '--top 6--'; +Customers | top 3 by Age | top 2 by FirstName; +print '--top hitters 1--'; +Customers | top-hitters a = 2 of Age by extra; +print '--top hitters 2--'; +Customers | top-hitters 2 of Age; +print '--top hitters 3--'; +Customers | top-hitters 2 of Age by extra | top-hitters 2 of Age | order by Age; +print '--top hitters 4--'; +Customers | top-hitters 2 of Age by extra | where Age > 30; +print '--top hitters 5--'; +Customers | top-hitters 2 of Age by extra | where approximate_sum_extra < 200; +print '--top hitters 6--'; +Customers | top-hitters 2 of Age | where approximate_count_Age > 2; + diff --git a/tests/queries/0_stateless/02366_kql_topnested.reference b/tests/queries/0_stateless/02366_kql_topnested.reference new file mode 100644 index 000000000000..6a3e4f0ae124 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_topnested.reference @@ -0,0 +1,108 @@ +-- top nested 1 layer-- +QC 125 +ON 140 +MA 145 +--top nested 2 layers-- +QC 125 Steven 41 +QC 125 Joseph 33 +ON 140 Steven 64 +ON 140 Joseph 34 +MA 145 Steven 38 +MA 145 Robert 42 +--top nested 3 layers-- +QC 125 Steven 41 03/30/1996 23 +QC 125 Steven 41 03/29/1996 13 +QC 125 Joseph 33 03/30/1996 20 +QC 125 Joseph 33 03/29/1996 10 +ON 140 Steven 64 03/31/1996 30 +ON 140 Steven 64 03/30/1996 22 +ON 140 Joseph 34 03/30/1996 18 +ON 140 Joseph 34 03/29/1996 14 +MA 145 Steven 38 03/30/1996 24 +MA 145 Steven 38 03/29/1996 14 +MA 145 Robert 42 03/31/1996 25 +MA 145 Robert 42 03/30/1996 17 +--top nested 1 layer with others-- +all other region 55 +QC 125 +ON 140 +MA 145 +--top nested 2 layers with 2 others-- +all other region 55 all other person 55 +QC 125 all other person 51 +QC 125 Steven 41 +QC 125 Joseph 33 +ON 140 all other person 42 +ON 140 Steven 64 +ON 140 Joseph 34 +MA 145 all other person 65 +MA 145 Steven 38 +MA 145 Robert 42 +--top nested 2 layers with 1st others-- +all other region 55 \N \N +QC 125 Steven 41 +QC 125 Joseph 33 +QC 125 \N \N +ON 140 Steven 64 +ON 140 Joseph 34 +ON 140 \N \N +MA 145 Steven 38 +MA 145 Robert 42 +MA 145 \N \N +--top nested 2 layer with 2nd others-- +QC 125 all other person 51 +QC 125 Steven 41 +QC 125 Joseph 33 +ON 140 all other person 42 +ON 140 Steven 64 +ON 140 Joseph 34 +MA 145 all other person 65 +MA 145 Steven 38 +MA 145 Robert 42 +\N \N all other person 55 +--top nested 3 layers with 3 others-- +all other region 55 all other person 55 all other date 55 +QC 125 all other person 51 all other date 51 +QC 125 Steven 41 all other date 5 +QC 125 Steven 41 03/30/1996 23 +QC 125 Steven 41 03/29/1996 13 +QC 125 Joseph 33 all other date 3 +QC 125 Joseph 33 03/30/1996 20 +QC 125 Joseph 33 03/29/1996 10 +ON 140 all other person 42 all other date 42 +ON 140 Steven 64 all other date 12 +ON 140 Steven 64 03/31/1996 30 +ON 140 Steven 64 03/30/1996 22 +ON 140 Joseph 34 all other date 2 +ON 140 Joseph 34 03/30/1996 18 +ON 140 Joseph 34 03/29/1996 14 +MA 145 all other person 65 all other date 65 +MA 145 Steven 38 all other date 0 +MA 145 Steven 38 03/30/1996 24 +MA 145 Steven 38 03/29/1996 14 +MA 145 Robert 42 all other date 0 +MA 145 Robert 42 03/31/1996 25 +MA 145 Robert 42 03/30/1996 17 +--top nested use expression as aggregation-- +QC 255 +ON 285 +MA 295 +--top nested use expression as top n-- +QC 125 +ON 140 +MA 145 +--top nested use expression as others-- +all other region 55 +QC 125 +ON 140 +MA 145 +--top nested use expression as column-- +Q 125 +O 140 +M 145 +B 55 +--top nested without top n-- +QC 125 +ON 140 +MA 145 +BC 55 diff --git a/tests/queries/0_stateless/02366_kql_topnested.sql b/tests/queries/0_stateless/02366_kql_topnested.sql new file mode 100644 index 000000000000..11b9d7e37338 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_topnested.sql @@ -0,0 +1,76 @@ +DROP TABLE IF EXISTS sales; +CREATE TABLE sales +(salesdate String,salesperson String,region String,amount UInt32) ENGINE = Memory; + +INSERT INTO sales VALUES ( '12/31/1995','Robert','ON',1); +INSERT INTO sales VALUES ( '12/31/1995','Joseph','ON',2); +INSERT INTO sales VALUES ( '12/31/1995','Joseph','QC',3); +INSERT INTO sales VALUES ( '12/31/1995','Joseph','MA',4); +INSERT INTO sales VALUES ( '12/31/1995','Steven','QC',5); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','ON',6); +INSERT INTO sales VALUES ( '03/29/1996','Robert','QC',7); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','ON',8); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','BC',9); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','QC',10); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','MA',11); +INSERT INTO sales VALUES ( '03/29/1996','Steven','ON',12); +INSERT INTO sales VALUES ( '03/29/1996','Steven','QC',13); +INSERT INTO sales VALUES ( '03/29/1996','Steven','MA',14); +INSERT INTO sales VALUES ( '03/30/1996','Robert','ON',15); +INSERT INTO sales VALUES ( '03/30/1996','Robert','QC',16); +INSERT INTO sales VALUES ( '03/30/1996','Robert','MA',17); +INSERT INTO sales VALUES ( '03/30/1996','Joseph','ON',18); +INSERT INTO sales VALUES ( '03/30/1996','Joseph','BC',19); +INSERT INTO sales VALUES ( '03/30/1996','Joseph','QC',20); +INSERT INTO sales VALUES ( '03/30/1996','Joseph','MA',21); +INSERT INTO sales VALUES ( '03/30/1996','Steven','ON',22); +INSERT INTO sales VALUES ( '03/30/1996','Steven','QC',23); +INSERT INTO sales VALUES ( '03/30/1996','Steven','MA',24); +INSERT INTO sales VALUES ( '03/31/1996','Robert','MA',25); +INSERT INTO sales VALUES ( '03/31/1996','Thomas','ON',26); +INSERT INTO sales VALUES ( '03/31/1996','Thomas','BC',27); +INSERT INTO sales VALUES ( '03/31/1996','Thomas','QC',28); +INSERT INTO sales VALUES ( '03/31/1996','Thomas','MA',29); +INSERT INTO sales VALUES ( '03/31/1996','Steven','ON',30); + + +set dialect = 'kusto'; + +print '-- top nested 1 layer--'; +sales | top-nested 3 of region by sum(amount)|order by region; + +print '--top nested 2 layers--'; +sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount)|order by region, salesperson; + +print '--top nested 3 layers--'; +sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount), top-nested 2 of salesdate by sum(amount)|order by region, salesperson, salesdate; + +print '--top nested 1 layer with others--'; +sales | top-nested 3 of region with others = 'all other region' by sum(amount)|order by region; + +print '--top nested 2 layers with 2 others--'; +sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount)|order by region, salesperson; + +print '--top nested 2 layers with 1st others--'; +sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson by sum(amount)|order by region, salesperson; + +print '--top nested 2 layer with 2nd others--'; +sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount)|order by region, salesperson; + +print '--top nested 3 layers with 3 others--'; +sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount), top-nested 2 of salesdate with others = 'all other date' by sum(amount)|order by region, salesperson, salesdate; + +print '--top nested use expression as aggregation--'; +sales | top-nested 3 of region by sum(amount)*2 + 5|order by region; + +print '--top nested use expression as top n--'; +sales | top-nested strlen('abc') of region by sum(amount)|order by region; + +print '--top nested use expression as others--'; +sales | top-nested 3 of region with others = strcat("all other"," region") by sum(amount)|order by region; + +print '--top nested use expression as column--'; +sales | top-nested of substring(region,0,1) by sum(amount)|order by Column1; + +print '--top nested without top n--'; +sales | top-nested of region by sum(amount)|order by region; \ No newline at end of file diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 341800206804..c036d57bcfc8 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -377,8 +377,6 @@ hasAny hasColumnInTable hasSubstr hasThreadFuzzer -hasToken -hasTokenCaseInsensitive hashid hex hiveHash diff --git a/tests/queries/0_stateless/02455_dateTime64Diff.reference b/tests/queries/0_stateless/02455_dateTime64Diff.reference new file mode 100644 index 000000000000..db9adebf1b3a --- /dev/null +++ b/tests/queries/0_stateless/02455_dateTime64Diff.reference @@ -0,0 +1,5 @@ +-- dateTime64Diff -- +28200397123456789 +-28200397123456789 +-- DateTime64 arithmetic -- +28200397123456789 diff --git a/tests/queries/0_stateless/02455_dateTime64Diff.sql b/tests/queries/0_stateless/02455_dateTime64Diff.sql new file mode 100644 index 000000000000..06e13df465f1 --- /dev/null +++ b/tests/queries/0_stateless/02455_dateTime64Diff.sql @@ -0,0 +1,11 @@ +-- dateTime64Diff +select '-- dateTime64Diff --'; +select dateTime64Diff(toDateTime64('2022-11-23 09:26:37.123456789', 9), toDateTime64('2022-01-01', 0)); +select dateTime64Diff(toDateTime64('2022-01-01', 0), toDateTime64('2022-11-23 09:26:37.123456789', 9)); +select dateTime64Diff(toDateTime64('2022-11-23 09:26:37.123456789', 9), toDate('2022-01-01')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select dateTime64Diff(toDateTime64('2022-11-23 09:26:37.123456789', 9), toDate32('2022-01-01')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select dateTime64Diff(toDateTime64('2022-11-23 09:26:37.123456789', 9), toDateTime('2022-01-01 01:02:03')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- DateTime64 - DateTime64 +select '-- DateTime64 arithmetic --'; +select toDateTime64('2022-11-23 09:26:37.123456789', 9) - toDateTime64('2022-01-01', 0); diff --git a/tests/queries/0_stateless/02455_interval.reference b/tests/queries/0_stateless/02455_interval.reference new file mode 100644 index 000000000000..83c880e8aca9 --- /dev/null +++ b/tests/queries/0_stateless/02455_interval.reference @@ -0,0 +1,21 @@ +-- Interval -- +1000 IntervalNanosecond +-- Unary Operations -- +-1000 IntervalNanosecond +-1 Int8 +-- Binary Operations -- +-1000 Nullable(IntervalNanosecond) +3000 Nullable(IntervalNanosecond) +2.5 Nullable(Float64) +500 Nullable(IntervalNanosecond) +\N +inf +2500 IntervalNanosecond +2 Nullable(Int64) +0 Nullable(Int64) +1000 IntervalNanosecond +0 Nullable(IntervalNanosecond) +-- Conversion -- +1000 +1000 +1000 diff --git a/tests/queries/0_stateless/02455_interval.sql b/tests/queries/0_stateless/02455_interval.sql new file mode 100644 index 000000000000..9cc110106417 --- /dev/null +++ b/tests/queries/0_stateless/02455_interval.sql @@ -0,0 +1,51 @@ +select '-- Interval --'; +select toIntervalNanosecond(1000) as i, toTypeName(i); + +select '-- Unary Operations --'; +select -toIntervalNanosecond(1000) as i, toTypeName(i); + +select sign(toIntervalNanosecond(-1000)) as i, toTypeName(i); + +select abs(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitCount(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitNot(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select __bitSwapLastTwo(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select __bitWrapperFunc(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select intExp2(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select intExp10(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select roundAge(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select roundDuration(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select roundToExp2(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- Binary Operations --'; +select (toIntervalNanosecond(1000) - toIntervalNanosecond(2000)) as i, toTypeName(i); +select (toIntervalNanosecond(1000) + toIntervalNanosecond(2000)) as i, toTypeName(i); +select (toIntervalNanosecond(2500) / toIntervalNanosecond(1000)) as i, toTypeName(i); +select (toIntervalNanosecond(2500) % toIntervalNanosecond(1000)) as i, toTypeName(i); +select toIntervalNanosecond(1000) / 0; +select toIntervalNanosecond(1000) / toIntervalNanosecond(0); + +select (toIntervalNanosecond(2500) * toIntervalNanosecond(1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select greatest(toIntervalNanosecond(2500), toIntervalNanosecond(1000)) as i, toTypeName(i); +select intDiv(toIntervalNanosecond(2500), toIntervalNanosecond(1000)) as i, toTypeName(i); +select intDivOrZero(toIntervalNanosecond(2500), toIntervalNanosecond(0)) as i, toTypeName(i); +select least(toIntervalNanosecond(2500), toIntervalNanosecond(1000)) as i, toTypeName(i); +select moduloOrZero(toIntervalNanosecond(2500), toIntervalNanosecond(0)) as i, toTypeName(i); + +select bitAnd(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select __bitBoolMaskAnd(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select __bitBoolMaskOr(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitHammingDistance(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitOr(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitRotateLeft(toIntervalNanosecond(1000), 1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitRotateRight(toIntervalNanosecond(1000), 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitShiftLeft(toIntervalNanosecond(1000), 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitShiftRight(toIntervalNanosecond(1000), 4); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitTest(toIntervalNanosecond(1000), 5); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitXor(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- Conversion --'; +select toFloat64(toIntervalNanosecond(1000)); +select toInt64(toIntervalNanosecond(1000)); +select toString(toIntervalNanosecond(1000)); diff --git a/tests/queries/0_stateless/02460_prewhere_row_level_policy.reference b/tests/queries/0_stateless/02460_prewhere_row_level_policy.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02460_prewhere_row_level_policy.sql b/tests/queries/0_stateless/02460_prewhere_row_level_policy.sql new file mode 100644 index 000000000000..fc98fa773b4e --- /dev/null +++ b/tests/queries/0_stateless/02460_prewhere_row_level_policy.sql @@ -0,0 +1,9 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/40956#issuecomment-1262096612 +DROP TABLE IF EXISTS row_level_policy_prewhere; +DROP ROW POLICY IF EXISTS row_level_policy_prewhere_policy0 ON row_level_policy_prewhere; + +CREATE TABLE row_level_policy_prewhere (x Int16, y String) ENGINE = MergeTree ORDER BY x; +INSERT INTO row_level_policy_prewhere(y, x) VALUES ('A',1), ('B',2), ('C',3); +CREATE ROW POLICY row_level_policy_prewhere_policy0 ON row_level_policy_prewhere FOR SELECT USING x >= 0 TO default; +SELECT * FROM row_level_policy_prewhere PREWHERE y = 'foo'; +DROP TABLE row_level_policy_prewhere; diff --git a/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 new file mode 100644 index 000000000000..ca7b300e00e5 --- /dev/null +++ b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 @@ -0,0 +1,29 @@ +{% for index_granularity in [999, 1000, 1001, 9999, 10000, 10001] -%} +-- { echoOn } + +SELECT count() FROM url_na_log; +130000 +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +130000 +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; +110000 +-- Delete more than a half rows (60K) from the range 2022-08-10 .. 2022-08-20 +-- There should be 50K rows remaining in this range +DELETE FROM url_na_log WHERE SiteId = 209 AND DateVisit >= '2022-08-13' AND DateVisit <= '2022-08-18'; +SELECT count() FROM url_na_log; +70000 +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +130000 +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; +50000 +-- Hide more than a half of remaining rows (30K) from the range 2022-08-10 .. 2022-08-20 using row policy +-- Now the this range should have 20K rows left +CREATE ROW POLICY url_na_log_policy0 ON url_na_log FOR SELECT USING DateVisit < '2022-08-11' or DateVisit > '2022-08-19' TO default; +SELECT count() FROM url_na_log; +40000 +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +130000 +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; +20000 +DROP ROW POLICY url_na_log_policy0 ON url_na_log; +{% endfor -%} diff --git a/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 new file mode 100644 index 000000000000..e1ec348e6ac2 --- /dev/null +++ b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 @@ -0,0 +1,59 @@ +{% for index_granularity in [999, 1000, 1001, 9999, 10000, 10001] %} + +DROP TABLE IF EXISTS url_na_log; + +CREATE TABLE url_na_log(SiteId UInt32, DateVisit Date, PRIMARY KEY (SiteId)) +ENGINE = MergeTree() +ORDER BY (SiteId, DateVisit) +SETTINGS index_granularity = {{ index_granularity }}, min_bytes_for_wide_part = 0; + +-- Insert some data to have 110K rows in the range 2022-08-10 .. 2022-08-20 and some more rows before and after that range +insert into url_na_log select 209, '2022-08-09' from numbers(10000); +insert into url_na_log select 209, '2022-08-10' from numbers(10000); +insert into url_na_log select 209, '2022-08-11' from numbers(10000); +insert into url_na_log select 209, '2022-08-12' from numbers(10000); +insert into url_na_log select 209, '2022-08-13' from numbers(10000); +insert into url_na_log select 209, '2022-08-14' from numbers(10000); +insert into url_na_log select 209, '2022-08-15' from numbers(10000); +insert into url_na_log select 209, '2022-08-16' from numbers(10000); +insert into url_na_log select 209, '2022-08-17' from numbers(10000); +insert into url_na_log select 209, '2022-08-18' from numbers(10000); +insert into url_na_log select 209, '2022-08-19' from numbers(10000); +insert into url_na_log select 209, '2022-08-20' from numbers(10000); +insert into url_na_log select 209, '2022-08-21' from numbers(10000); + + +SET mutations_sync=2; +SET allow_experimental_lightweight_delete=1; + +OPTIMIZE TABLE url_na_log FINAL; + +-- { echoOn } + +SELECT count() FROM url_na_log; +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; + + +-- Delete more than a half rows (60K) from the range 2022-08-10 .. 2022-08-20 +-- There should be 50K rows remaining in this range +DELETE FROM url_na_log WHERE SiteId = 209 AND DateVisit >= '2022-08-13' AND DateVisit <= '2022-08-18'; + +SELECT count() FROM url_na_log; +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; + + +-- Hide more than a half of remaining rows (30K) from the range 2022-08-10 .. 2022-08-20 using row policy +-- Now the this range should have 20K rows left +CREATE ROW POLICY url_na_log_policy0 ON url_na_log FOR SELECT USING DateVisit < '2022-08-11' or DateVisit > '2022-08-19' TO default; + +SELECT count() FROM url_na_log; +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; + +DROP ROW POLICY url_na_log_policy0 ON url_na_log; + +-- { echoOff } + +{% endfor %} diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.python b/tests/queries/0_stateless/02473_multistep_prewhere.python new file mode 100644 index 000000000000..a12656f636b4 --- /dev/null +++ b/tests/queries/0_stateless/02473_multistep_prewhere.python @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +import requests +import os +import sys + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, 'helpers')) + +from pure_http_client import ClickHouseClient + + +class Tester: + ''' + - Creates test table + - Deletes the specified range of rows + - Masks another range using row-level policy + - Runs some read queries and checks that the results + ''' + def __init__(self, session, url, index_granularity, total_rows): + self.session = session + self.url = url + self.index_granularity = index_granularity + self.total_rows = total_rows + self.reported_errors = set() + self.repro_queries = [] + + def report_error(self): + print('Repro steps:', '\n\n\t'.join(self.repro_queries)) + exit(1) + + def query(self, query_text, include_in_repro_steps = True, expected_data = None): + self.repro_queries.append(query_text) + resp = self.session.post(self.url, data=query_text) + if resp.status_code != 200: + # Group similar errors + error = resp.text[0:40] + if error not in self.reported_errors: + self.reported_errors.add(error) + print('Code:', resp.status_code) + print('Result:', resp.text) + self.report_error() + + result = resp.text + # Check that the result is as expected + if ((not expected_data is None) and (int(result) != len(expected_data))): + print('Expected {} rows, got {}'.format(len(expected_data), result)) + print('Expected data:' + str(expected_data)) + self.report_error() + + if not include_in_repro_steps: + self.repro_queries.pop() + + + def check_data(self, all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + all_data_after_delete = all_data[ + ~((all_data.a == 0) & + (all_data.b > delete_range_start) & + (all_data.b <= delete_range_end))] + all_data_after_row_policy = all_data_after_delete[ + (all_data_after_delete.b <= row_level_policy_range_start) | + (all_data_after_delete.b > row_level_policy_range_end)] + + for to_select in ['count()', 'sum(d)']: # Test reading with and without column with default value + self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data_after_row_policy) + + delta = 10 + for query_range_start in [0, delta]: + for query_range_end in [self.total_rows - delta]: #, self.total_rows]: + expected = all_data_after_row_policy[ + (all_data_after_row_policy.a == 0) & + (all_data_after_row_policy.b > query_range_start) & + (all_data_after_row_policy.b <= query_range_end)] + self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format( + to_select, query_range_start, query_range_end), False, expected) + + expected = all_data_after_row_policy[ + (all_data_after_row_policy.a == 0) & + (all_data_after_row_policy.c > query_range_start) & + (all_data_after_row_policy.c <= query_range_end)] + self.query('SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;'.format( + to_select, query_range_start, query_range_end), False, expected) + + expected = all_data_after_row_policy[ + (all_data_after_row_policy.a == 0) & + ((all_data_after_row_policy.c <= query_range_start) | + (all_data_after_row_policy.c > query_range_end))] + self.query('SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;'.format( + to_select, query_range_start, query_range_end), False, expected) + + + def run_test(self, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + self.repro_queries = [] + + self.query(''' + CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, PRIMARY KEY (a)) + ENGINE = MergeTree() ORDER BY (a, b) + SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity)) + + self.query('INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});'.format(self.total_rows)) + + client = ClickHouseClient() + all_data = client.query_return_df("SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;") + + self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;') + + # After all data has been written add a column with default value + self.query('ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;') + + self.check_data(all_data, -100, -100, -100, -100) + + self.query('DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};'.format( + delete_range_start, delete_range_end)) + + self.check_data(all_data, delete_range_start, delete_range_end, -100, -100) + + self.query('CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;'.format( + row_level_policy_range_start, row_level_policy_range_end)) + + self.check_data(all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + + self.query('DROP POLICY policy_tab_02473 ON tab_02473;') + + self.query('DROP TABLE tab_02473;') + + + +def main(): + # Set mutations to synchronous mode and enable lightweight DELETE's + url = os.environ['CLICKHOUSE_URL'] + '&mutations_sync=2&allow_experimental_lightweight_delete=1&max_threads=1' + + default_index_granularity = 10; + total_rows = 8 * default_index_granularity + step = default_index_granularity + session = requests.Session() + for index_granularity in [default_index_granularity-1, default_index_granularity]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]: + tester = Tester(session, url, index_granularity, total_rows) + # Test combinations of ranges of various size masked by lightweight DELETES + # along with ranges of various size masked by row-level policies + for delete_range_start in range(0, total_rows, 3 * step): + for delete_range_end in range(delete_range_start + 3 * step, total_rows, 2 * step): + for row_level_policy_range_start in range(0, total_rows, 3 * step): + for row_level_policy_range_end in range(row_level_policy_range_start + 3 * step, total_rows, 2 * step): + tester.run_test(delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + + +if __name__ == "__main__": + main() + diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.reference b/tests/queries/0_stateless/02473_multistep_prewhere.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.sh b/tests/queries/0_stateless/02473_multistep_prewhere.sh new file mode 100755 index 000000000000..bbb411b0a32a --- /dev/null +++ b/tests/queries/0_stateless/02473_multistep_prewhere.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python3 "$CURDIR"/02473_multistep_prewhere.python + diff --git a/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference new file mode 100644 index 000000000000..bb8ce4a83962 --- /dev/null +++ b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference @@ -0,0 +1,76 @@ +-- { echoOn } +CREATE TABLE test_filter(a Int32, b Int32, c Int32) ENGINE = MergeTree() ORDER BY a SETTINGS index_granularity = 3; +INSERT INTO test_filter SELECT number, number+1, (number/2 + 1) % 2 FROM numbers(15); +SELECT _part_offset, intDiv(_part_offset, 3) as granule, * FROM test_filter ORDER BY _part_offset; +0 0 0 1 1 +1 0 1 2 1 +2 0 2 3 0 +3 1 3 4 0 +4 1 4 5 1 +5 1 5 6 1 +6 2 6 7 0 +7 2 7 8 0 +8 2 8 9 1 +9 3 9 10 1 +10 3 10 11 0 +11 3 11 12 0 +12 4 12 13 1 +13 4 13 14 1 +14 4 14 15 0 +-- Check that division by zero occurs on some rows +SELECT intDiv(b, c) FROM test_filter; -- { serverError ILLEGAL_DIVISION } +-- Filter out those rows using WHERE or PREWHERE +SELECT intDiv(b, c) FROM test_filter WHERE c != 0; +1 +2 +5 +6 +9 +10 +13 +14 +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0; +1 +2 +5 +6 +9 +10 +13 +14 +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0 WHERE b%2 != 0; +1 +5 +9 +13 +SET mutations_sync = 2, allow_experimental_lightweight_delete = 1; +-- Delete all rows where division by zero could occur +DELETE FROM test_filter WHERE c = 0; +-- Test that now division by zero doesn't occur without explicit condition +SELECT intDiv(b, c) FROM test_filter; +1 +2 +5 +6 +9 +10 +13 +14 +SELECT * FROM test_filter PREWHERE intDiv(b, c) > 0; +0 1 1 +1 2 1 +4 5 1 +5 6 1 +8 9 1 +9 10 1 +12 13 1 +13 14 1 +SELECT * FROM test_filter PREWHERE b != 0 WHERE intDiv(b, c) > 0; +0 1 1 +1 2 1 +4 5 1 +5 6 1 +8 9 1 +9 10 1 +12 13 1 +13 14 1 diff --git a/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql new file mode 100644 index 000000000000..94ffb1b87302 --- /dev/null +++ b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS test_filter; + +-- { echoOn } +CREATE TABLE test_filter(a Int32, b Int32, c Int32) ENGINE = MergeTree() ORDER BY a SETTINGS index_granularity = 3; + +INSERT INTO test_filter SELECT number, number+1, (number/2 + 1) % 2 FROM numbers(15); + +SELECT _part_offset, intDiv(_part_offset, 3) as granule, * FROM test_filter ORDER BY _part_offset; + +-- Check that division by zero occurs on some rows +SELECT intDiv(b, c) FROM test_filter; -- { serverError ILLEGAL_DIVISION } +-- Filter out those rows using WHERE or PREWHERE +SELECT intDiv(b, c) FROM test_filter WHERE c != 0; +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0; +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0 WHERE b%2 != 0; + + +SET mutations_sync = 2, allow_experimental_lightweight_delete = 1; + +-- Delete all rows where division by zero could occur +DELETE FROM test_filter WHERE c = 0; +-- Test that now division by zero doesn't occur without explicit condition +SELECT intDiv(b, c) FROM test_filter; +SELECT * FROM test_filter PREWHERE intDiv(b, c) > 0; +SELECT * FROM test_filter PREWHERE b != 0 WHERE intDiv(b, c) > 0; + +-- { echoOff } +DROP TABLE test_filter; diff --git a/tests/queries/0_stateless/02503_bad_compatibility_setting.reference b/tests/queries/0_stateless/02503_bad_compatibility_setting.reference new file mode 100644 index 000000000000..5b7d2a449a01 --- /dev/null +++ b/tests/queries/0_stateless/02503_bad_compatibility_setting.reference @@ -0,0 +1 @@ + 0 diff --git a/tests/queries/0_stateless/02503_bad_compatibility_setting.sql b/tests/queries/0_stateless/02503_bad_compatibility_setting.sql new file mode 100644 index 000000000000..178c6a875318 --- /dev/null +++ b/tests/queries/0_stateless/02503_bad_compatibility_setting.sql @@ -0,0 +1,3 @@ +set compatibility='a.a'; -- { serverError BAD_ARGUMENTS } +select value, changed from system.settings where name = 'compatibility' + diff --git a/tests/queries/0_stateless/02503_in_lc_const_args_bug.reference b/tests/queries/0_stateless/02503_in_lc_const_args_bug.reference new file mode 100644 index 000000000000..8baef1b4abc4 --- /dev/null +++ b/tests/queries/0_stateless/02503_in_lc_const_args_bug.reference @@ -0,0 +1 @@ +abc diff --git a/tests/queries/0_stateless/02503_in_lc_const_args_bug.sql b/tests/queries/0_stateless/02503_in_lc_const_args_bug.sql new file mode 100644 index 000000000000..6756e3815867 --- /dev/null +++ b/tests/queries/0_stateless/02503_in_lc_const_args_bug.sql @@ -0,0 +1,2 @@ +SELECT substr(toLowCardinality('abc'), 1 in 1) AS x GROUP BY x; + diff --git a/tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference new file mode 100644 index 000000000000..85adb1850d4d --- /dev/null +++ b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference @@ -0,0 +1,110 @@ +-- { echoOn } +SELECT * FROM table_02513; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143011 +143012 +143013 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 WHERE n%11; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143013 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 PREWHERE n%11; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143013 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 WHERE n%11 AND n%13; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 PREWHERE n%11 WHERE n%13; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 WHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 PREWHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 WHERE n%143011 AND n%13; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 PREWHERE n%143011 WHERE n%13; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } diff --git a/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql new file mode 100644 index 000000000000..771893ce6746 --- /dev/null +++ b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS table_02513; + +CREATE TABLE table_02513 (n UInt64) ENGINE=MergeTree() ORDER BY tuple() SETTINGS index_granularity=100; + +INSERT INTO table_02513 SELECT number+11*13*1000 FROM numbers(20); + +SET allow_experimental_lightweight_delete=1; +SET mutations_sync=2; +SET max_threads=1; + +DELETE FROM table_02513 WHERE n%10=0; + +-- { echoOn } +SELECT * FROM table_02513; +SELECT * FROM table_02513 WHERE n%11; +SELECT * FROM table_02513 PREWHERE n%11; +SELECT * FROM table_02513 WHERE n%11 AND n%13; +SELECT * FROM table_02513 PREWHERE n%11 WHERE n%13; + +SELECT * FROM table_02513 WHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 PREWHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 WHERE n%143011 AND n%13; +SELECT * FROM table_02513 PREWHERE n%143011 WHERE n%13; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +-- { echoOff } + +DROP TABLE table_02513; diff --git a/tests/queries/0_stateless/02518_delete_on_materialized_view.reference b/tests/queries/0_stateless/02518_delete_on_materialized_view.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02518_delete_on_materialized_view.sql b/tests/queries/0_stateless/02518_delete_on_materialized_view.sql new file mode 100644 index 000000000000..73abca4ea53c --- /dev/null +++ b/tests/queries/0_stateless/02518_delete_on_materialized_view.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS kek; +DROP TABLE IF EXISTS kekv; + +CREATE TABLE kek (a UInt32) ENGINE = MergeTree ORDER BY a; +CREATE MATERIALIZED VIEW kekv ENGINE = MergeTree ORDER BY tuple() AS SELECT * FROM kek; + +INSERT INTO kek VALUES (1); +DELETE FROM kekv WHERE a = 1; -- { serverError BAD_ARGUMENTS} + +SET allow_experimental_lightweight_delete=1; +DELETE FROM kekv WHERE a = 1; -- { serverError BAD_ARGUMENTS} + +DROP TABLE IF EXISTS kek; +DROP TABLE IF EXISTS kekv; diff --git a/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference new file mode 100644 index 000000000000..3b40d9048cd5 --- /dev/null +++ b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference @@ -0,0 +1,25 @@ +-- { echoOn } +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +Rows in parts 100000 +SELECT 'Count', count() FROM lwd_test_02521; +Count 100000 +DELETE FROM lwd_test_02521 WHERE id < 25000; +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +Rows in parts 100000 +SELECT 'Count', count() FROM lwd_test_02521; +Count 50000 +ALTER TABLE lwd_test_02521 MODIFY TTL event_time + INTERVAL 1 MONTH SETTINGS mutations_sync = 1; +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +Rows in parts 50000 +SELECT 'Count', count() FROM lwd_test_02521; +Count 25000 +ALTER TABLE lwd_test_02521 DELETE WHERE id >= 40000 SETTINGS mutations_sync = 1; +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +Rows in parts 40000 +SELECT 'Count', count() FROM lwd_test_02521; +Count 15000 +OPTIMIZE TABLE lwd_test_02521 FINAL SETTINGS mutations_sync = 1; +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +Rows in parts 15000 +SELECT 'Count', count() FROM lwd_test_02521; +Count 15000 diff --git a/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql new file mode 100644 index 000000000000..cf2fe452e4bc --- /dev/null +++ b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql @@ -0,0 +1,46 @@ +DROP TABLE IF EXISTS lwd_test_02521; + +CREATE TABLE lwd_test_02521 (id UInt64, value String, event_time DateTime) +ENGINE MergeTree() +ORDER BY id +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO lwd_test_02521 SELECT number, randomString(10), now() - INTERVAL 2 MONTH FROM numbers(50000); +INSERT INTO lwd_test_02521 SELECT number, randomString(10), now() FROM numbers(50000); + +OPTIMIZE TABLE lwd_test_02521 FINAL SETTINGS mutations_sync = 1; + +SET mutations_sync=1; +SET allow_experimental_lightweight_delete = 1; + +-- { echoOn } +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +SELECT 'Count', count() FROM lwd_test_02521; + + +DELETE FROM lwd_test_02521 WHERE id < 25000; + +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +SELECT 'Count', count() FROM lwd_test_02521; + + +ALTER TABLE lwd_test_02521 MODIFY TTL event_time + INTERVAL 1 MONTH SETTINGS mutations_sync = 1; + +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +SELECT 'Count', count() FROM lwd_test_02521; + + +ALTER TABLE lwd_test_02521 DELETE WHERE id >= 40000 SETTINGS mutations_sync = 1; + +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +SELECT 'Count', count() FROM lwd_test_02521; + + +OPTIMIZE TABLE lwd_test_02521 FINAL SETTINGS mutations_sync = 1; + +SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; +SELECT 'Count', count() FROM lwd_test_02521; + +-- { echoOff } + +DROP TABLE lwd_test_02521; \ No newline at end of file diff --git a/utils/check-style/check-submodules b/utils/check-style/check-submodules new file mode 100755 index 000000000000..815e6c13c0f2 --- /dev/null +++ b/utils/check-style/check-submodules @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +# The script checks if all submodules defined in $GIT_ROOT/.gitmodules exist in $GIT_ROOT/contrib + +set -e + +GIT_ROOT=$(git rev-parse --show-cdup) +GIT_ROOT=${GIT_ROOT:-.} + +cd "$GIT_ROOT" + +# Remove keys for submodule.*.path parameters, the values are separated by \0 +# and check if the directory exists +git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \ + xargs -P100 -0 --no-run-if-empty -I{} bash -c 'if ! test -d {}; then echo Directory for submodule {} is not found; exit 1; fi' 2>&1 + + +# And check that the submodule is fine +git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \ + xargs -P100 -0 --no-run-if-empty -I{} git submodule status -q {} 2>&1