Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 2 additions & 23 deletions .github/scripts/build-slo-image.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,12 @@ Usage:
--context <path> \
--tag <docker-tag> \
--src-path <sdk-path> \
--job-name <job-name> \
--ref <git-ref> \
--fallback-image <docker-tag>

Options:
--context Docker build context directory (e.g. $GITHUB_WORKSPACE/current).
--tag Docker image tag to build (e.g. ydb-app-current).
--src-path Value for Docker build arg SRC_PATH (e.g. native/table).
--job-name Value for Docker build arg JOB_NAME (e.g. native-table).
--ref Value for Docker build arg REF (e.g. branch name / sha).
--fallback-image Image tag to return if initial Docker image build fails
EOF
}
Expand All @@ -31,9 +27,7 @@ die() {
context_dir=""
dockerfile="tests/slo/Dockerfile"
tag=""
ref=""
src_path=""
job_name=""
fallback_image=""

while [[ $# -gt 0 ]]; do
Expand All @@ -46,22 +40,10 @@ while [[ $# -gt 0 ]]; do
tag="${2:-}"
shift 2
;;
--ref)
ref="${2:-}"
shift 2
;;
--src-path)
src_path="${2:-}"
shift 2
;;
--job-name)
job_name="${2:-}"
shift 2
;;
--fallback-image)
fallback_image="${2:-}"
shift 2
;;
-h|--help)
usage
exit 0
Expand All @@ -72,7 +54,7 @@ while [[ $# -gt 0 ]]; do
esac
done

if [[ -z "$context_dir" || -z "$tag" || -z "$src_path" || -z "$job_name" || -z "$ref" ]]; then
if [[ -z "$context_dir" || -z "$tag" || -z "$src_path" ]]; then
usage
die "Incomplete argument set"
fi
Expand All @@ -84,17 +66,14 @@ context_dir="$(cd "$context_dir" && pwd)"

echo "Building SLO image..."
echo " TAG: $tag"
echo " REF: $ref"
echo " SRC_PATH: $src_path"
echo " JOB_NAME: $job_name"

(
set +e
cd "$context_dir"
docker build -t "$tag" \
--platform linux/amd64 \
--build-arg "SRC_PATH=$src_path" \
--build-arg "JOB_NAME=$job_name" \
--build-arg "REF=$ref" \
-f "$dockerfile" .
exit_code=$?
echo "Docker build exit code: $exit_code"
Expand Down
208 changes: 43 additions & 165 deletions .github/workflows/slo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,59 +41,26 @@ jobs:
fail-fast: false
matrix:
sdk:
- id: database_sql_table
name: database-sql-table
path: ./database/sql/table
label: database/sql/table
run_extra_args: ''
create_extra_args: ''
- id: database_sql_query
name: database-sql-query
path: ./database/sql/query
label: database/sql/query
run_extra_args: ''
create_extra_args: ''
- id: native_query
name: native-query
path: ./native/query
label: native/query
run_extra_args: ''
create_extra_args: ''
- id: native_table
name: native-table
path: ./native/table
label: native/table
run_extra_args: ''
create_extra_args: ''
- id: native_table_over_query_service
name: native-table-over-query-service
path: ./native/table/over/query/service
label: native/table/over/query/service
run_extra_args: ''
create_extra_args: ''
- id: native_bulk_upsert
name: native-bulk-upsert
path: ./native/bulk-upsert
label: native/bulk-upsert
run_extra_args: '-batch-size=10'
create_extra_args: ''
- id: native_node_hints
name: native-node-hints
path: ./native/node_hints
label: native/node_hints
slo_workload_read_max_rps: 100
slo_workload_write_max_rps: 100
run_extra_args: '-batch-size=10'
create_extra_args: '-min-partitions-count 10'
- id: session_node_hints
name: session-node-hints
path: ./native/session_node_hints
label: native/session_node_hints
slo_workload_read_max_rps: 1000
slo_workload_write_max_rps: 1
run_extra_args: '-batch-size=10'
create_extra_args: '-min-partitions-count 10'

- name: database-sql-table
path: database/sql/table
- name: database-sql-query
path: database/sql/query
- name: native-query
path: native/query
- name: native-table
path: native/table
- name: native-table-over-query-service
path: native/table-over-query-service
- name: native-bulk-upsert
path: native/bulk-upsert
run_extra_args: "-batch-size=10"
- name: native-table-node-hints
path: native/table-node-hints
run_extra_args: "-batch-size=10"
create_extra_args: "-min-partitions-count 10"
- name: native-query-node-hints
path: native/query-node-hints
create_extra_args: "-min-partitions-count 10"

concurrency:
group: slo-${{ github.ref }}-${{ matrix.sdk.name }}
Expand Down Expand Up @@ -176,133 +143,44 @@ jobs:
bash "$SCRIPT" \
--context "$GITHUB_WORKSPACE/current" \
--tag "ydb-app-current" \
--ref "${CURRENT_REF}" \
--src-path "${{ matrix.sdk.label }}" \
--job-name "${{ matrix.sdk.name }}"
--src-path "${{ matrix.sdk.path }}"

bash "$SCRIPT" \
--context "$GITHUB_WORKSPACE/baseline" \
--tag "ydb-app-baseline" \
--ref "${{ steps.baseline.outputs.ref }}" \
--src-path "${{ matrix.sdk.label }}" \
--job-name "${{ matrix.sdk.name }}" \
--fallback-image "ydb-app-current"
--src-path "${{ matrix.sdk.path }}"

- name: Initialize YDB SLO
id: ydb_slo
uses: ydb-platform/ydb-slo-action/init@13c687b7d4b2879da79dd12932dee0ed2b65dd1c
- name: Run SLO Tests
uses: ydb-platform/ydb-slo-action/init@v2
timeout-minutes: 30
with:
github_issue: ${{ github.event.inputs.github_issue }}
github_token: ${{ secrets.GITHUB_TOKEN }}
workload_name: ${{ matrix.sdk.name }}
workload_duration: ${{ inputs.slo_workload_duration_seconds || '600' }}
workload_current_ref: ${{ github.head_ref || github.ref_name }}
workload_current_image: ydb-app-current
workload_baseline_ref: ${{ steps.baseline.outputs.ref }}
# to disable chaos testing for node hint workloads
disable_compose_profiles: "${{ (matrix.sdk.id == 'native_node_hints' || matrix.sdk.id == 'session_node_hints') && 'chaos' || '' }}"
workload_baseline_image: ydb-app-current
disable_compose_profiles: "${{ (matrix.sdk.name == 'native-table-node-hints' || matrix.sdk.name == 'native-query-node-hints') && 'chaos' || '' }}"
metrics_yaml: |
metrics:
- name: node_hints_misses
type: instant
query: sum by(ref) (workload_node_hints_misses)

- name: Prepare SLO Database
run: |
echo "Preparing SLO database..."
CREATE_EXTRA_ARGS="${{ matrix.sdk.create_extra_args }}"
docker run --rm --network ydb_ydb-net \
--add-host "ydb:172.28.0.11" \
--add-host "ydb:172.28.0.12" \
--add-host "ydb:172.28.0.13" \
--add-host "ydb:172.28.0.99" \
ydb-app-current create grpc://ydb:2136 /Root/testdb $CREATE_EXTRA_ARGS

- name: Run SLO Tests (parallel)
timeout-minutes: 15
env:
DURATION: ${{ matrix.sdk.slo_workload_duration_seconds || 600 }}
READ_RPS: ${{ matrix.sdk.slo_workload_read_max_rps || 1000 }}
WRITE_RPS: ${{ matrix.sdk.slo_workload_write_max_rps || 1000 }}
RUN_EXTRA_ARGS: ${{ format('{0} -prometheus-endpoint {1}',matrix.sdk.run_extra_args,steps.ydb_slo.outputs.ydb-prometheus-url)}}
run: |
ARGS="run grpc://ydb:2136 /Root/testdb \
-otlp-endpoint prometheus:9090 \
-report-period 250 \
-time $DURATION \
-read-rps $READ_RPS \
-write-rps $WRITE_RPS \
-read-timeout 100 \
-write-timeout 100 \
$RUN_EXTRA_ARGS"

echo "Starting ydb-app-current..."
docker run -d \
--name ydb-app-current \
--network ydb_ydb-net \
--add-host "ydb:172.28.0.11" \
--add-host "ydb:172.28.0.12" \
--add-host "ydb:172.28.0.13" \
--add-host "ydb:172.28.0.99" \
ydb-app-current $ARGS

echo "Starting ydb-app-baseline..."
docker run -d \
--name ydb-app-baseline \
--network ydb_ydb-net \
--add-host "ydb:172.28.0.11" \
--add-host "ydb:172.28.0.12" \
--add-host "ydb:172.28.0.13" \
--add-host "ydb:172.28.0.99" \
ydb-app-baseline $ARGS

# Show initial logs
echo ""
echo "==================== INITIAL CURRENT LOGS ===================="
docker logs -n 15 ydb-app-current 2>&1 || echo "No current container"
echo ""
echo "==================== INITIAL BASELINE LOGS ===================="
docker logs -n 15 ydb-app-baseline 2>&1 || echo "No baseline container"
echo ""

# Wait for workloads to complete
echo "Waiting for workloads to complete (${DURATION}s)..."
sleep ${DURATION}

# Stop containers after workload duration and wait for graceful shutdown
echo "Stopping containers after ${DURATION}s..."
docker stop --timeout=30 ydb-app-current ydb-app-baseline 2>&1 || true

# Force kill if still running
docker kill ydb-app-current ydb-app-baseline 2>&1 || true

# Check exit codes
CURRENT_EXIT=$(docker inspect ydb-app-current --format='{{.State.ExitCode}}' 2>/dev/null || echo "1")
BASELINE_EXIT=$(docker inspect ydb-app-baseline --format='{{.State.ExitCode}}' 2>/dev/null || echo "0")

echo "Current container exit code: $CURRENT_EXIT"
echo "Baseline container exit code: $BASELINE_EXIT"

# Show final logs
echo ""
echo "==================== FINAL CURRENT LOGS ===================="
docker logs -n 15 ydb-app-current 2>&1 || echo "No current container"
echo ""
echo "==================== FINAL BASELINE LOGS ===================="
docker logs -n 15 ydb-app-baseline 2>&1 || echo "No baseline container"
echo ""

echo "SUCCESS: Workloads completed successfully"

- if: always()
name: Store logs
run: |
docker logs ydb-app-current > current.log 2>&1 || echo "No current container"
docker logs ydb-app-baseline > baseline.log 2>&1 || echo "No baseline container"

- if: always()
uses: actions/upload-artifact@v4
ydb-slo-report:
if: always()
needs: ydb-slo-action
runs-on: ubuntu-latest
name: Publish YDB SLO Report
permissions:
checks: write
contents: read
pull-requests: write
steps:
- name: Publish YDB SLO Report
uses: ydb-platform/ydb-slo-action/report@v2
with:
name: ${{matrix.sdk.name}}-logs
path: |
./current.log
./baseline.log
retention-days: 1
github_token: ${{ secrets.GITHUB_TOKEN }}
github_run_id: ${{ github.run_id }}
4 changes: 1 addition & 3 deletions tests/slo/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
FROM golang:1.24 AS build
ARG SRC_PATH
ARG JOB_NAME
ARG REF=unknown
COPY . /src
WORKDIR /src/tests/slo/${SRC_PATH}
RUN CGO_ENABLED=0 go build -o /build/slo-go-workload -ldflags "-X \"main.ref=${REF}\" -X \"main.label=${SRC_PATH}\" -X \"main.jobName=${JOB_NAME}\"" .
RUN CGO_ENABLED=0 go build -o /build/slo-go-workload .

FROM scratch
COPY --from=build /build /
Expand Down
Loading