Skip to content

Commit 8ca7ec1

Browse files
(test) : Collect and upload logs when test fails for k8s (kubeflow#11618)
fix: add log upload functionality changes test update permission update permission fail test remove exit update permission update update add condition test test Remove exit test fix pod name Remove exit refactor test remove exit Signed-off-by: chahatsagarmain <[email protected]>
1 parent b131566 commit 8ca7ec1

File tree

8 files changed

+227
-15
lines changed

8 files changed

+227
-15
lines changed
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
NS=""
6+
OUTPUT_FILE="/tmp/tmp.log/tmp_pod_log.txt"
7+
8+
while [[ "$#" -gt 0 ]]; do
9+
case $1 in
10+
--ns) NS="$2"; shift ;;
11+
--output) OUTPUT_FILE="$2"; shift ;;
12+
*) echo "Unknown parameter passed: $1"; exit 1 ;;
13+
esac
14+
shift
15+
done
16+
17+
mkdir -p /tmp/tmp.log
18+
19+
if [[ -z "$NS" ]]; then
20+
echo "Both --ns parameters are required."
21+
exit 1
22+
fi
23+
24+
function check_namespace {
25+
if ! kubectl get namespace "$1" &>/dev/null; then
26+
echo "Namespace '$1' does not exist."
27+
exit 1
28+
fi
29+
}
30+
31+
function display_pod_info {
32+
local NAMESPACE=$1
33+
local POD_NAMES
34+
35+
POD_NAMES=$(kubectl get pods -n "${NS}" -o custom-columns=":metadata.name" --no-headers)
36+
37+
if [[ -z "${POD_NAMES}" ]]; then
38+
echo "No pods found in namespace '${NAMESPACE}'." | tee -a "$OUTPUT_FILE"
39+
return
40+
fi
41+
42+
echo "Pod Information for Namespace: ${NAMESPACE}" > "$OUTPUT_FILE"
43+
44+
for POD_NAME in ${POD_NAMES}; do
45+
{
46+
echo "===== Pod: ${POD_NAME} in ${NAMESPACE} ====="
47+
echo "----- EVENTS -----"
48+
kubectl describe pod "${POD_NAME}" -n "${NAMESPACE}" | grep -A 100 Events || echo "No events found for pod ${POD_NAME}."
49+
50+
echo "----- LOGS -----"
51+
kubectl logs "${POD_NAME}" -n "${NAMESPACE}" || echo "No logs found for pod ${POD_NAME}."
52+
53+
echo "==========================="
54+
echo ""
55+
} | tee -a "$OUTPUT_FILE"
56+
done
57+
58+
echo "Pod information stored in $OUTPUT_FILE"
59+
}
60+
61+
check_namespace "$NS"
62+
display_pod_info "$NS"

.github/workflows/backend.yml

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,29 +45,52 @@ jobs:
4545
pip install -e sdk/python
4646
- name: Create KFP cluster
4747
uses: ./.github/actions/kfp-tekton-cluster
48+
4849
- name: "flip coin test"
50+
id: flip-coin
4951
run: |
5052
. .venv/bin/activate
5153
TEST_SCRIPT="test-flip-coin.sh" ./.github/resources/scripts/e2e-test.sh
54+
continue-on-error: true
55+
5256
- name: "static loop test"
57+
id: static-loop
5358
run: |
5459
. .venv/bin/activate
5560
TEST_SCRIPT="test-static-loop.sh" ./.github/resources/scripts/e2e-test.sh
61+
continue-on-error: true
62+
5663
- name: "dynamic loop test"
64+
id: dynamic-loop
5765
run: |
5866
. .venv/bin/activate
5967
TEST_SCRIPT="test-dynamic-loop.sh" ./.github/resources/scripts/e2e-test.sh
68+
continue-on-error: true
69+
6070
- name: "use env"
71+
id: use-env
6172
run: |
6273
. .venv/bin/activate
6374
TEST_SCRIPT="test-env.sh" ./.github/resources/scripts/e2e-test.sh
75+
continue-on-error: true
76+
6477
- name: "use volume"
78+
id: use-volume
6579
run: |
6680
. .venv/bin/activate
6781
TEST_SCRIPT="test-volume.sh" ./.github/resources/scripts/e2e-test.sh
82+
continue-on-error: true
83+
84+
- name: Collect failed logs
85+
if: ${{ (steps.flip-coin.outcome != 'success') || (steps.static-loop.outcome != 'success') || (steps.dynamic-loop.outcome != 'success') || (steps.use-env.outcome != 'success') || (steps.use-volume.outcome != 'success') }}
86+
run: |
87+
./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp.log/tmp_pod_log.txt
88+
exit 1
89+
6890
- name: Collect test results
6991
if: always()
7092
uses: actions/upload-artifact@v4
7193
with:
7294
name: kfp-tekton-backend-artifacts
73-
path: /tmp/tmp.*/*
95+
path: /tmp/tmp*/*
96+

.github/workflows/e2e-test.yml

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,25 @@ jobs:
3939

4040
- name: Forward API port
4141
run: ./.github/resources/scripts/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888
42-
42+
4343
- name: Initialization tests v1
44+
id: tests
4445
working-directory: ./backend/test/initialization
4546
run: go test -v ./... -namespace kubeflow -args -runIntegrationTests=true
47+
continue-on-error: true
48+
49+
- name: Collect failed logs
50+
if: steps.tests.outcome != 'success'
51+
run: |
52+
./.github/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
53+
exit 1
4654
4755
- name: Collect test results
4856
if: always()
4957
uses: actions/upload-artifact@v4
5058
with:
5159
name: kfp-initialization-tests-v1-artifacts-k8s-${{ matrix.k8s_version }}
52-
path: /tmp/tmp.*/*
60+
path: /tmp/tmp*/*
5361

5462
initialization-tests-v2:
5563
runs-on: ubuntu-latest
@@ -71,20 +79,27 @@ jobs:
7179
with:
7280
k8s_version: ${{ matrix.k8s_version }}
7381

74-
7582
- name: Forward API port
7683
run: ./.github/resources/scripts/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888
7784

7885
- name: Initialization tests v2
86+
id: tests
7987
working-directory: ./backend/test/v2/initialization
8088
run: go test -v ./... -namespace kubeflow -args -runIntegrationTests=true
89+
continue-on-error: true
90+
91+
- name: Collect failed logs
92+
if: steps.tests.outcome != 'success'
93+
run: |
94+
./.github/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
95+
exit 1
8196
8297
- name: Collect test results
8398
if: always()
8499
uses: actions/upload-artifact@v4
85100
with:
86101
name: kfp-initialization-tests-v2-artifacts-k8s-${{ matrix.k8s_version }}
87-
path: /tmp/tmp.*/*
102+
path: /tmp/tmp*/*
88103

89104
api-integration-tests-v1:
90105
runs-on: ubuntu-latest
@@ -113,15 +128,23 @@ jobs:
113128
run: ./.github/resources/scripts/forward-port.sh "kubeflow" "mysql" 3306 3306
114129

115130
- name: API integration tests v1
131+
id: tests
116132
working-directory: ./backend/test/integration
117133
run: go test -v ./... -namespace kubeflow -args -runIntegrationTests=true
134+
continue-on-error: true
135+
136+
- name: Collect failed logs
137+
if: steps.tests.outcome != 'success'
138+
run: |
139+
./.github/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
140+
exit 1
118141
119142
- name: Collect test results
120143
if: always()
121144
uses: actions/upload-artifact@v4
122145
with:
123146
name: kfp-api-integration-tests-v1-artifacts-k8s-${{ matrix.k8s_version }}
124-
path: /tmp/tmp.*/*
147+
path: /tmp/tmp*/*
125148

126149
api-integration-tests-v2:
127150
runs-on: ubuntu-latest
@@ -147,15 +170,23 @@ jobs:
147170
run: ./.github/resources/scripts/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888
148171

149172
- name: API integration tests v2
173+
id: tests
150174
working-directory: ./backend/test/v2/integration
151175
run: go test -v ./... -namespace kubeflow -args -runIntegrationTests=true
176+
continue-on-error: true
177+
178+
- name: Collect failed logs
179+
if: steps.tests.outcome != 'success'
180+
run: |
181+
./.github/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
182+
exit 1
152183
153184
- name: Collect test results
154185
if: always()
155186
uses: actions/upload-artifact@v4
156187
with:
157188
name: kfp-api-integration-tests-v2-artifacts-k8s-${{ matrix.k8s_version }}
158-
path: /tmp/tmp.*/*
189+
path: /tmp/tmp*/*
159190

160191
frontend-integration-test:
161192
runs-on: ubuntu-latest
@@ -186,17 +217,26 @@ jobs:
186217
- name: Build frontend integration tests image
187218
working-directory: ./test/frontend-integration-test
188219
run: docker build . -t kfp-frontend-integration-test:local
220+
continue-on-error: true
189221

190222
- name: Frontend integration tests
223+
id: tests
191224
run: docker run --net=host kfp-frontend-integration-test:local --remote-run true
225+
continue-on-error: true
226+
227+
- name: Collect failed logs
228+
if: steps.tests.outcome != 'success'
229+
run: |
230+
./.github/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
231+
exit 1
192232
193233
- name: Collect test results
194234
if: always()
195235
uses: actions/upload-artifact@v4
196236
with:
197237
name: kfp-frontend-integration-test-artifacts-k8s-${{ matrix.k8s_version }}
198-
path: /tmp/tmp.*/*
199-
238+
path: /tmp/tmp*/*
239+
200240
basic-sample-tests:
201241
runs-on: ubuntu-latest
202242
strategy:
@@ -224,14 +264,23 @@ jobs:
224264
run: pip3 install -r ./test/sample-test/requirements.txt
225265

226266
- name: Basic sample tests - sequential
267+
id: sequential-test
227268
run: python3 ./test/sample-test/sample_test_launcher.py sample_test run_test --namespace kubeflow --test-name sequential --results-gcs-dir output
228269

229270
- name: Basic sample tests - exit_handler
271+
id: sample-test
230272
run: python3 ./test/sample-test/sample_test_launcher.py sample_test run_test --namespace kubeflow --test-name exit_handler --expected-result failed --results-gcs-dir output
273+
continue-on-error: true
274+
275+
- name: Collect failed logs
276+
if: ${{ (steps.sequential-test.outcome != 'success') || (steps.sample-test.outcome != 'success')}}
277+
run: |
278+
./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
279+
exit 1
231280
232281
- name: Collect test results
233282
if: always()
234283
uses: actions/upload-artifact@v4
235284
with:
236-
name: kfp-basic-sample-tests-artifacts-k8s-${{ matrix.k8s_version }}
237-
path: /tmp/tmp.*/*
285+
name: kfp-e2e-tests-artifacts-k8s-${{ matrix.k8s_version }}
286+
path: /tmp/tmp*/*

.github/workflows/kfp-kubernetes-execution-tests.yml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
run: sudo apt-get install protobuf-compiler -y
4444

4545
- name: Install setuptools
46-
run: |
46+
run: |
4747
pip3 install setuptools
4848
pip3 freeze
4949
@@ -73,7 +73,22 @@ jobs:
7373
run: pip install -r ./test/kfp-kubernetes-execution-tests/requirements.txt
7474

7575
- name: Run tests
76+
id: test
7677
run: |
7778
export KFP_ENDPOINT="http://localhost:8888"
7879
export TIMEOUT_SECONDS=2700
7980
pytest ./test/kfp-kubernetes-execution-tests/sdk_execution_tests.py --asyncio-task-timeout $TIMEOUT_SECONDS
81+
continue-on-error: true
82+
83+
- name: Collect failed logs
84+
if: steps.test.outcome != 'success'
85+
run: |
86+
./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
87+
exit 1
88+
89+
- name: Collect test results
90+
if: always()
91+
uses: actions/upload-artifact@v4
92+
with:
93+
name: kfp-execution-tests-artifacts-k8s-${{ matrix.k8s_version }}
94+
path: /tmp/tmp*/*

.github/workflows/kfp-samples.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,21 @@ jobs:
4040
run: ./.github/resources/scripts/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888
4141

4242
- name: Run Samples Tests
43+
id: tests
4344
run: |
4445
./backend/src/v2/test/sample-test.sh
46+
continue-on-error: true
47+
48+
- name: Collect failed logs
49+
if: steps.tests.outcome != 'success'
50+
run: |
51+
./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
52+
exit 1
53+
54+
- name: Collect test results
55+
if: always()
56+
uses: actions/upload-artifact@v4
57+
with:
58+
name: kfp-samples-tests-artifacts-k8s-${{ matrix.k8s_version }}
59+
path: /tmp/tmp*/*
60+

.github/workflows/periodic.yml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,27 @@ jobs:
2222
uses: ./.github/actions/kfp-cluster
2323
with:
2424
k8s_version: ${{ matrix.k8s_version }}
25+
2526
- name: Port forward kfp apiserver
2627
run: |
2728
nohup kubectl port-forward --namespace kubeflow svc/ml-pipeline 8888:8888 &
29+
2830
- name: Run Functional Tests
31+
id: tests
2932
run: |
3033
log_dir=$(mktemp -d)
3134
./test/kfp-functional-test/kfp-functional-test.sh > $log_dir/periodic_tests.txt
35+
continue-on-error: true
36+
37+
- name: Collect failed logs
38+
if: steps.tests.outcome != 'success'
39+
run: |
40+
./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
41+
exit 1
42+
3243
- name: Collect test results
3344
if: always()
3445
uses: actions/upload-artifact@v4
3546
with:
3647
name: periodic-functional-artifacts-k8s-${{ matrix.k8s_version }}
37-
path: /tmp/tmp.*/*
48+
path: /tmp/tmp*/*

.github/workflows/sdk-execution.yml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
run: sudo apt-get install protobuf-compiler -y
4444

4545
- name: Install setuptools
46-
run: |
46+
run: |
4747
pip3 install setuptools
4848
pip3 freeze
4949
@@ -65,7 +65,22 @@ jobs:
6565
run: pip install -r ./test/sdk-execution-tests/requirements.txt
6666

6767
- name: Run tests
68+
id: tests
6869
run: |
6970
export KFP_ENDPOINT="http://localhost:8888"
7071
export TIMEOUT_SECONDS=2700
7172
pytest ./test/sdk-execution-tests/sdk_execution_tests.py --asyncio-task-timeout $TIMEOUT_SECONDS
73+
continue-on-error: true
74+
75+
- name: Collect failed logs
76+
if: steps.tests.outcome != 'success'
77+
run: |
78+
./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
79+
exit 1
80+
81+
- name: Collect test results
82+
if: always()
83+
uses: actions/upload-artifact@v4
84+
with:
85+
name: periodic-functional-artifacts-k8s-${{ matrix.k8s_version }}
86+
path: /tmp/tmp*/*

0 commit comments

Comments
 (0)