databendlabs
diff --git a/‎.github/actions/benchmark_cloud/action.yml‎
Lines changed: 24 additions & 14 deletions b/‎.github/actions/benchmark_cloud/action.yml‎
Lines changed: 24 additions & 14 deletions
diff --git a/‎.github/actions/benchmark_local/action.yml‎
Lines changed: 3 additions & 9 deletions b/‎.github/actions/benchmark_local/action.yml‎
Lines changed: 3 additions & 9 deletions
diff --git a/‎.github/actions/publish_debug_symbols/action.yml‎
Lines changed: 5 additions & 9 deletions b/‎.github/actions/publish_debug_symbols/action.yml‎
Lines changed: 5 additions & 9 deletions
diff --git a/‎.github/workflows/cloud.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/cloud.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 0 additions & 4 deletions b/‎.github/workflows/release.yml‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎.github/workflows/reuse.benchmark.yml‎
Lines changed: 23 additions & 22 deletions b/‎.github/workflows/reuse.benchmark.yml‎
Lines changed: 23 additions & 22 deletions
diff --git a/‎README.md‎
Lines changed: 8 additions & 7 deletions b/‎README.md‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎benchmark/clickbench/README.md‎ ‎benchmark/README.md‎benchmark/clickbench/README.md renamed to benchmark/README.md
Lines changed: 4 additions & 5 deletions b/‎benchmark/clickbench/README.md‎ ‎benchmark/README.md‎benchmark/clickbench/README.md renamed to benchmark/README.md
Lines changed: 4 additions & 5 deletions
@@ -22,6 +22,14 @@ inputs:
   size:
     description: "Small/Medium/Large"
     required: true
+  cache_size:
+    description: "Warehouse cache size"
+    required: false
+    default: "0"
+  tries:
+    description: "Number of attempts per query (1-3)"
+    required: false
+    default: ""
   version:
     description: "Databend version"
     required: true
@@ -38,37 +46,39 @@ inputs:
 runs:
   using: "composite"
   steps:
-    - name: Install script dependencies
-      shell: bash
-      run: |
-        sudo apt-get update -yq
-        sudo apt-get install -yq python3
-
     - name: Prepare
-      working-directory: benchmark/clickbench
+      working-directory: benchmark
       shell: bash
       id: prepare
       env:
         BENDSQL_DSN: "databend://${{ inputs.cloud_user }}:${{ inputs.cloud_password }}@${{ inputs.cloud_gateway }}:443"
       run: |
         if [[ "${{ inputs.dataset }}" == "load" ]]; then
           echo "database=load_test_${{ inputs.run_id }}" >> $GITHUB_OUTPUT
-          echo "tries=1" >> $GITHUB_OUTPUT
         else
           database="${{ inputs.database }}"
           if [[ -z "$database" ]]; then
-            database="clickbench"
+            database="benchmark"
           fi
           echo "database=$database" >> $GITHUB_OUTPUT
-          echo "tries=3" >> $GITHUB_OUTPUT
         fi
+        tries="${{ inputs.tries }}"
+        if [[ -z "$tries" ]]; then
+          if [[ "${{ inputs.dataset }}" == "load" ]]; then
+            tries=1
+          else
+            tries=3
+          fi
+        fi
+        echo "tries=$tries" >> $GITHUB_OUTPUT
 
     - name: Run Benchmark
-      working-directory: benchmark/clickbench
+      working-directory: benchmark
       env:
         BENCHMARK_ID: ${{ inputs.run_id }}
         BENCHMARK_DATASET: ${{ inputs.dataset }}
         BENCHMARK_SIZE: ${{ inputs.size }}
+        BENCHMARK_CACHE_SIZE: ${{ inputs.cache_size }}
         BENCHMARK_VERSION: ${{ inputs.version }}
         BENCHMARK_DATABASE: ${{ steps.prepare.outputs.database }}
         BENCHMARK_TRIES: ${{ steps.prepare.outputs.tries }}
@@ -86,10 +96,10 @@ runs:
     - name: Upload artifact
       uses: actions/upload-artifact@v4
       with:
-        name: benchmark-${{ inputs.dataset }}-${{ inputs.size }}
+        name: benchmark-${{ inputs.dataset }}-${{ inputs.size }}-cache-${{ inputs.cache_size }}
         path: |
-          benchmark/clickbench/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}.json
-          benchmark/clickbench/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-*.ndjson
+          benchmark/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-cache-${{ inputs.cache_size }}.json
+          benchmark/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-cache-${{ inputs.cache_size }}-*.ndjson
 
     - name: Remove warehouse
       if: always()
 
@@ -20,14 +20,8 @@ inputs:
 runs:
   using: "composite"
   steps:
-    - name: Install script dependencies
-      shell: bash
-      run: |
-        sudo apt-get update -yq
-        sudo apt-get install -yq python3
-
     - name: Run Benchmark
-      working-directory: benchmark/clickbench
+      working-directory: benchmark
       env:
         BENCHMARK_ID: ${{ inputs.run_id }}
         BENCHMARK_DATASET: ${{ inputs.dataset }}
@@ -42,7 +36,7 @@ runs:
         name: benchmark_local
 
     - name: Prepare Metadata
-      working-directory: benchmark/clickbench
+      working-directory: benchmark
       shell: bash
       run: |
         case ${{ inputs.source }} in
@@ -63,4 +57,4 @@ runs:
       uses: actions/upload-artifact@v4
       with:
         name: benchmark-${{ inputs.dataset }}-local
-        path: benchmark/clickbench/result-${{ inputs.dataset }}-local.json
+        path: benchmark/result-${{ inputs.dataset }}-local.json
@@ -32,17 +32,13 @@ runs:
       shell: bash
       run: |
         publish_name="databend-debug-${{ inputs.category }}-${{ inputs.version }}-${{ inputs.target }}.tar.gz"
-        tar -C distro/bin -czvf ${publish_name} databend-query.debug
-        echo "name=$publish_name" >> $GITHUB_OUTPUT
+        symbol_name="databend-query-${{ inputs.category }}-${{ inputs.version }}-${{ inputs.target }}.debug"
+        mv "distro/bin/databend-query.debug" "distro/bin/${symbol_name}"
+        tar -C distro/bin -czvf "${publish_name}" "${symbol_name}"
+        rm "distro/bin/${symbol_name}"
+        echo "name=${publish_name}" >> $GITHUB_OUTPUT
 
     - name: Update debug symbols to github
       shell: bash
       run: |
         gh release upload ${{ inputs.version }} ${{ steps.prepare.outputs.name }} --clobber
-
-    - name: Sync debug symbols to R2
-      shell: bash
-      continue-on-error: true
-      if: inputs.category == 'default'
-      run: |
-        aws s3 cp ${{ steps.prepare.outputs.name }} s3://repo/databend/${{ inputs.version }}/${{ steps.prepare.outputs.name }} --no-progress --checksum-algorithm=CRC32
@@ -65,7 +65,7 @@ jobs:
           ref: "refs/pull/${{ github.event.number }}/merge"
       - name: Build Release
         uses: ./.github/actions/build_linux
-        timeout-minutes: 60
+        timeout-minutes: 120
         env:
           DATABEND_ENTERPRISE_LICENSE_PUBLIC_KEY: ${{ secrets.DATABEND_ENTERPRISE_LICENSE_PUBLIC_KEY }}
           DATABEND_TELEMETRY_ENDPOINT: ${{ secrets.DATABEND_TELEMETRY_ENDPOINT}}
 
@@ -313,10 +313,6 @@ jobs:
         uses: ./.github/actions/publish_debug_symbols
         env:
           GH_TOKEN: ${{ github.token }}
-          AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
-          AWS_DEFAULT_REGION: auto
-          AWS_ENDPOINT_URL: ${{ secrets.R2_ENDPOINT_URL }}
         with:
           version: ${{ needs.create_release.outputs.version }}
           target: ${{ matrix.target }}
 
@@ -115,6 +115,8 @@ jobs:
           source: ${{ inputs.source }}
           source_id: ${{ inputs.source_id }}
           size: Small
+          cache_size: "0"
+          tries: 1
           version: ${{ inputs.version }}
           cloud_user: ${{ secrets.BENCHMARK_CLOUD_USER }}
           cloud_password: ${{ secrets.BENCHMARK_CLOUD_PASSWORD }}
@@ -137,11 +139,12 @@ jobs:
     strategy:
       matrix:
         include:
-          - { dataset: hits, size: Small, database: hits, timeout: 10 }
-          - { dataset: hits, size: Large, database: hits, timeout: 10 }
-          - { dataset: tpch100, size: Small, database: tpch_100, timeout: 20 }
-          - { dataset: tpch100, size: Large, database: tpch_100, timeout: 20 }
-          - { dataset: tpch1000, size: Large, database: tpch_1000, timeout: 60 }
+          - { dataset: hits, size: Small, database: hits, timeout: 10, cache_size: 0, tries: 3 }
+          - { dataset: hits, size: Large, database: hits, timeout: 10, cache_size: 0, tries: 3 }
+          - { dataset: tpch100, size: Small, database: tpch_100, timeout: 20, cache_size: 0, tries: 3 }
+          - { dataset: tpch100, size: Large, database: tpch_100, timeout: 20, cache_size: 0, tries: 3 }
+          - { dataset: tpch1000, size: Large, database: tpch_1000, timeout: 60, cache_size: 0, tries: 1 }
+          - { dataset: tpch1000, size: Large, database: tpch_1000, timeout: 60, cache_size: 300, tries: 1 }
       fail-fast: true
       max-parallel: 1
     steps:
@@ -165,6 +168,8 @@ jobs:
           source: ${{ inputs.source }}
           source_id: ${{ inputs.source_id }}
           size: ${{ matrix.size }}
+          cache_size: ${{ matrix.cache_size }}
+          tries: ${{ matrix.tries }}
           version: ${{ inputs.version }}
           cloud_user: ${{ secrets.BENCHMARK_CLOUD_USER }}
           cloud_password: ${{ secrets.BENCHMARK_CLOUD_PASSWORD }}
@@ -184,17 +189,14 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - name: Install Dependencies
-        run: |
-          sudo apt-get update -yq
-          sudo apt-get install -yq python3-jinja2
+      - uses: astral-sh/setup-uv@v5
       - uses: actions/download-artifact@v4
         with:
-          path: benchmark/clickbench/results
+          path: benchmark/results
           pattern: benchmark-*
           merge-multiple: true
       - name: Get Report Prefix
-        working-directory: benchmark/clickbench
+        working-directory: benchmark
         run: |
           shopt -s nullglob
           for result in results/*.json; do
@@ -215,16 +217,17 @@ jobs:
           AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
           AWS_DEFAULT_REGION: auto
           AWS_ENDPOINT_URL: ${{ secrets.R2_ENDPOINT_URL }}
-        working-directory: benchmark/clickbench
+        working-directory: benchmark
         run: |
           echo -e "## ClickBench Report\n" > /tmp/body
           shopt -s nullglob
+          uv sync
           for p in results/*; do
             [ -d "$p" ] || continue
             dataset=$(basename $p)
             aws s3 sync results/$dataset/ ${REPORT_S3_PREFIX}/ --include "*.json" --no-progress --checksum-algorithm=CRC32
             aws s3 sync "s3://benchmark/clickbench/release/${dataset}/latest/" ./results/${dataset}/ --exclude "*" --include "*.json" || true
-            ./update_results.py --dataset $dataset --pr ${{ inputs.source_id }}
+            uv run update_results.py --dataset $dataset --pr ${{ inputs.source_id }}
             aws s3 cp ./results/${dataset}.html ${REPORT_S3_PREFIX}/${dataset}.html --no-progress --checksum-algorithm=CRC32
             echo "* **${dataset}**: https://benchmark.databend.com/clickbench/pr/${{ inputs.source_id }}/${{ inputs.run_id }}/${dataset}.html" >> /tmp/body
           done
@@ -249,19 +252,17 @@ jobs:
           # - "internal"
     steps:
       - uses: actions/checkout@v4
-      - name: Install Dependencies
-        run: |
-          sudo apt-get update -yq
-          sudo apt-get install -yq python3-jinja2
+      - uses: astral-sh/setup-uv@v5
       - uses: actions/download-artifact@v4
         with:
-          path: benchmark/clickbench/results
+          path: benchmark/results
           pattern: benchmark-${{ matrix.dataset }}-*
           merge-multiple: true
       - name: Prepare results directory
-        working-directory: benchmark/clickbench
+        working-directory: benchmark
         run: |
           shopt -s nullglob
+          uv sync
           for result in results/*.json; do
             dataset=$(echo $result | sed -E 's/.*result-(\w+)-.*\.json/\1/')
             mkdir -p results/${dataset}/
@@ -274,7 +275,7 @@ jobs:
             mv $ndjson ndjsons/${dataset}/$(basename $ndjson)
           done
       - name: Generate report and upload to R2
-        working-directory: benchmark/clickbench
+        working-directory: benchmark
         env:
           AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
@@ -283,7 +284,7 @@ jobs:
         run: |
           aws s3 sync s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date --date='-1 month' -u +%Y)/$(date --date='-1 month' -u +%m)/ ./results/${{ matrix.dataset }}/
           aws s3 sync s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date -u +%Y)/$(date -u +%m)/ ./results/${{ matrix.dataset }}/
-          ./update_results.py --dataset ${{ matrix.dataset }} --release ${{ inputs.source_id }}
+          uv run update_results.py --dataset ${{ matrix.dataset }} --release ${{ inputs.source_id }}
 
           RESULT_PREFIX="s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date -u +%Y)/$(date -u +%m)/$(date -u +%Y-%m-%d)/${{ inputs.source_id }}"
           LATEST_PREFIX="s3://benchmark/clickbench/release/${{ matrix.dataset }}/latest/latest"
@@ -294,7 +295,7 @@ jobs:
 
           aws s3 cp ./results/${{ matrix.dataset }}.html s3://benchmark/clickbench/release/${{ matrix.dataset }}.html --no-progress --checksum-algorithm=CRC32
       - name: Upload NDJSON archives to R2
-        working-directory: benchmark/clickbench
+        working-directory: benchmark
         env:
           AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
 
@@ -1,6 +1,6 @@
 <h1 align="center">Databend</h1>
-<h3 align="center">Unified Multimodal Database for Any Data at Any Scale.</h3>
-<p align="center">A <strong>next-generation</strong> cloud-native warehouse built in <strong>Rust</strong>. Open-source, Snowflake-compatible, and unifying BI, AI, Search, Geo, and Stream.</p>
+<h3 align="center">The All-in-One Cloud Data Warehouse for Analytics & AI</h3>
+<p align="center">Built in <strong>Rust</strong> for blazing fast, cost-efficient analytics.<br> Open-source, <strong>Snowflake-compatible</strong>, and designed to unify BI, Search, and AI on object storage.</p>
 
 <div align="center">
 
@@ -24,12 +24,13 @@
 
 ## 💡 Why Databend?
 
-Databend is an open-source **unified multimodal database** built in Rust. It empowers **Analytics**, **AI**, **Search**, and **Geo** workloads on a single platform directly from object storage.
+Databend is an open-source, **All-in-One multimodal database** built in Rust. It seamlessly unifies **Analytics**, **AI**, **Search**, and **Geo** workloads into a single platform, enabling high-performance processing directly on top of object storage.
 
-- **Unified Engine**: One optimizer and runtime for all data types (Structured, Semi-structured, Vector).
-- **Native Pipelines**: Built-in **Stream** and **Task** for automated data cleaning and transformation.
-- **Cloud Native**: Stateless compute nodes over object storage (S3, GCS, Azure) with full ACID support.
-- **High Performance**: Vectorized execution and Zero-Copy processing.
+| | |
+| :--- | :--- |
+| **📊 BI & Analytics**<br>Supercharge your analytics with a high-performance, vectorized SQL query engine. | **✨ Vector Search**<br>Power AI and RAG applications with built-in, high-speed vector similarity search. |
+| **📄 JSON Search**<br>Seamlessly query and analyze semi-structured data with powerful JSON optimization. | **🌍 Geo Search**<br>Efficiently store, index, and query geospatial data for location intelligence. |
+| **🔄 ETL Pipeline**<br>Streamline data ingestion and transformation with built-in Streams and Tasks. | **🌿 Branching**<br>Create isolated Copy-on-Write branches instantly for dev, test, or experiments. |
 
 ![Databend Architecture](https://github.com/user-attachments/assets/288dea8d-0243-4c45-8d18-d4d402b08075)
 
 
@@ -1,24 +1,23 @@
 # Benchmark Directory
 
-This directory contains subdirectories dedicated to various performance tests, 
+This directory contains subdirectories dedicated to various performance tests,
 
 specifically for TPCH tests, Hits tests, and internal query performance tests. Below is a brief overview of each subdirectory:
 
 ## 1. tpch
 
-This subdirectory includes performance evaluation tools and scripts related to TPCH tests. 
+This subdirectory includes performance evaluation tools and scripts related to TPCH tests.
 
 TPCH tests are designed to simulate complex query scenarios to assess the system's performance when handling large datasets. In this directory, you can find testing scripts, configuration files, and documentation for test results.
 
 ## 2. hits
 
-Hits tests focus on specific queries or operations for performance testing. 
+Hits tests focus on specific queries or operations for performance testing.
 
 In this subdirectory, you'll find scripts for Hits tests, sample queries, and performance analysis tools.
 
 ## 3. internal
 
-The internal subdirectory contains testing tools and scripts dedicated to ensuring the performance of internal queries. 
+The internal subdirectory contains testing tools and scripts dedicated to ensuring the performance of internal queries.
 
 These tests may be conducted to ensure the system performs well when handling internal queries specific.
-