Skip to content

Commit b134e96

Browse files
mprahlCollinHowland
authored andcommitted
fix(test/backend): Fix pod to pod TLS (#12357)
* Fix CI to pass the TLS client flags This also adds wait logic for the cert-manager secret to be provisioned. Signed-off-by: mprahl <[email protected]> * Stop passing in explicit MySQL configurations to MLMD in TLS mode This conflicts with the configuration options. Signed-off-by: mprahl <[email protected]> * Disable CI for unimplemented test scenarios Pod to Pod TLS manifests are not yet implemented for Kubernetes Native API mode Signed-off-by: mprahl <[email protected]> * Build metadata-writer in CI to avoid TLS connection errors Signed-off-by: mprahl <[email protected]> * Provide --ca_cert_path only if one is set Signed-off-by: mprahl <[email protected]> * Use a clearer path for the custom CA files Having it be under /etc/pki/tls could override the system CAs. Signed-off-by: mprahl <[email protected]> * Add missing mlPipelineServiceTLSEnabled flag on SWF TLS manifests Signed-off-by: mprahl <[email protected]> * Add TLS connection info to importer Signed-off-by: mprahl <[email protected]> --------- Signed-off-by: mprahl <[email protected]> Signed-off-by: CollinHowland <[email protected]>
1 parent 4b157be commit b134e96

File tree

15 files changed

+82
-67
lines changed

15 files changed

+82
-67
lines changed

.github/actions/deploy/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ runs:
6060
- name: Load Docker Images
6161
shell: bash
6262
run: |
63-
APPS=("apiserver" "driver" "launcher" "scheduledworkflow" "persistenceagent" "frontend")
63+
APPS=("apiserver" "driver" "launcher" "scheduledworkflow" "persistenceagent" "frontend" "metadata-writer")
6464
for app in "${APPS[@]}"; do
6565
docker image load -i ${{ inputs.image_path }}/$app/$app.tar
6666
docker push ${{ inputs.image_registry }}/$app:${{ inputs.image_tag }}

.github/actions/test-and-report/action.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,17 @@ runs:
8181
if [ -z $MULTI_USER ]; then
8282
MULTI_USER='false'
8383
fi
84+
TLS_ENABLED=${{ inputs.tls_enabled }}
85+
if [ -z $TLS_ENABLED ]; then
86+
TLS_ENABLED='false'
87+
fi
88+
CA_CERT_PATH=${{ inputs.ca_cert_path }}
89+
if [ -z $CA_CERT_PATH ]; then
90+
CA_CERT_PATH=''
91+
fi
8492
PULL_NUMBER="${{ github.event.inputs.pull_number || github.event.pull_request.number }}"
8593
REPO_NAME="${{ github.repository }}"
86-
go run github.com/onsi/ginkgo/v2/ginkgo -r -v --cover -p --keep-going --github-output=true --nodes=${{ inputs.num_parallel_nodes }} -v --label-filter=${{ inputs.test_label }} -- -namespace=${{ inputs.default_namespace }} -multiUserMode=$MULTI_USER -useProxy=$USE_PROXY -userNamespace=${{ inputs.user_namespace }} -uploadPipelinesWithKubernetes=${{ inputs.upload_pipelines_with_kubernetes_client}} -pullNumber=$PULL_NUMBER -repoName=$REPO_NAME
94+
go run github.com/onsi/ginkgo/v2/ginkgo -r -v --cover -p --keep-going --github-output=true --nodes=${{ inputs.num_parallel_nodes }} -v --label-filter=${{ inputs.test_label }} -- -namespace=${{ inputs.default_namespace }} -multiUserMode=$MULTI_USER -useProxy=$USE_PROXY -userNamespace=${{ inputs.user_namespace }} -uploadPipelinesWithKubernetes=${{ inputs.upload_pipelines_with_kubernetes_client}} -tlsEnabled=$TLS_ENABLED -caCertPath=$CA_CERT_PATH -pullNumber=$PULL_NUMBER -repoName=$REPO_NAME
8795
continue-on-error: true
8896

8997
- name: Collect Pod logs in case of Test Failures

.github/resources/manifests/standalone/tls-enabled/kustomization.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,8 @@ images:
1717
- name: ghcr.io/kubeflow/kfp-frontend
1818
newName: kind-registry:5000/frontend
1919
newTag: latest
20+
- name: ghcr.io/kubeflow/kfp-metadata-writer
21+
newName: kind-registry:5000/metadata-writer
22+
newTag: latest
2023
patches:
2124
- path: apiserver-env.yaml

.github/workflows/api-server-tests.yml

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,10 @@ jobs:
101101

102102
- name: Configure Cluster CA Cert for TLS-Enabled Tests
103103
shell: bash
104-
id: configure-cluster-ca-cert
105-
if: ${{ matrix.pod_to_pod_tls_enabled == "true"}}
104+
if: ${{ matrix.pod_to_pod_tls_enabled == 'true'}}
106105
run: |
107-
kubectl get secret kfp-api-tls-cert -n kubeflow -o jsonpath='{.data.ca\.crt}' | base64 -d > "${{ env.API_TESTS_DIR }}"/ca.crt
106+
kubectl get secret kfp-api-tls-cert -n kubeflow -o jsonpath='{.data.ca\.crt}' | base64 -d > "${{ github.workspace }}/ca.crt"
107+
echo "CA_CERT_PATH=${{ github.workspace }}/ca.crt" >> $GITHUB_ENV
108108
109109
- name: Configure Input Variables
110110
shell: bash
@@ -125,11 +125,6 @@ jobs:
125125
PROXY=false
126126
fi
127127
128-
if [ "${{ matrix.pod_to_pod_tls_enabled }}" == "true" ]; then
129-
CA_CERT_PATH="ca.crt"
130-
fi
131-
132-
echo "CA_CERT_PATH=$CA_CERT_PATH" >> $GITHUB_OUTPUT
133128
echo "NUMBER_OF_NODES=$NUMBER_OF_NODES" >> $GITHUB_OUTPUT
134129
echo "TEST_LABEL=$TEST_LABEL" >> $GITHUB_OUTPUT
135130
echo "NAMESPACE=$NAMESPACE" >> $GITHUB_OUTPUT
@@ -149,7 +144,7 @@ jobs:
149144
python_version: ${{ env.PYTHON_VERSION }}
150145
report_name: "Standalone_k8sVersion=${{ matrix.k8s_version }}_argoVersion=${{ matrix.argo_version }}_cacheEnabled=${{ matrix.cache_enabled }}_proxyEnabled=${{ matrix.proxy }}"
151146
tls_enabled: ${{ matrix.pod_to_pod_tls_enabled }}
152-
ca_cert_path: ${{ steps.configure-cluster-ca-cert.outputs.CA_CERT_PATH}}
147+
ca_cert_path: ${{ env.CA_CERT_PATH }}
153148

154149
- name: Notify test reports
155150
shell: bash
@@ -173,9 +168,10 @@ jobs:
173168
include:
174169
- k8s_version: "v1.31.0"
175170
cache_enabled: "true"
176-
pipeline_store: "kubernetes"
171+
# Pod to Pod TLS manifests are not yet implemented for Kubernetes Native API mode
172+
pipeline_store: "database"
177173
pod_to_pod_tls_enabled: "true"
178-
uploadPipelinesWithKubernetesClient: "true"
174+
uploadPipelinesWithKubernetesClient: "false"
179175
fail-fast: false # So that failure in 1 type of parameterized job does not cause other jobs to terminate prematurely
180176
name: KFP API Server tests K8s Native API - K8sVersion=${{ matrix.k8s_version }} cacheEnabled=${{ matrix.cache_enabled }} argoVersion=${{ matrix.argo_version }} uploadPipelinesWithKubernetesClient=${{ matrix.uploadPipelinesWithKubernetesClient }}
181177

.github/workflows/e2e-test.yml

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,10 @@ jobs:
111111

112112
- name: Configure Cluster CA Cert for TLS-Enabled Tests
113113
shell: bash
114-
id: configure-cluster-ca-cert
115-
if: ${{ matrix.pod_to_pod_tls_enabled == "true"}}
116-
env:
117-
SCHEME: "https"
114+
if: ${{ matrix.pod_to_pod_tls_enabled == 'true'}}
118115
run: |
119-
kubectl get secret kfp-api-tls-cert -n kubeflow -o jsonpath='{.data.ca\.crt}' | base64 -d > "${{ env.E2E_TESTS_DIR }}"/ca.crt
120-
116+
kubectl get secret kfp-api-tls-cert -n kubeflow -o jsonpath='{.data.ca\.crt}' | base64 -d > "${{ github.workspace }}/ca.crt"
117+
echo "CA_CERT_PATH=${{ github.workspace }}/ca.crt" >> $GITHUB_ENV
121118
- name: Configure Input Variables
122119
shell: bash
123120
id: configure
@@ -132,11 +129,6 @@ jobs:
132129
NAMESPACE=${{ inputs.namespace }}
133130
fi
134131
135-
if [ "${{ matrix.pod_to_pod_tls_enabled }}" == "true" ]; then
136-
CA_CERT_PATH="ca.crt"
137-
fi
138-
139-
echo "CA_CERT_PATH=$CA_CERT_PATH" >> $GITHUB_OUTPUT
140132
echo "NUMBER_OF_NODES=$NUMBER_OF_NODES" >> $GITHUB_OUTPUT
141133
echo "TEST_LABEL=$TEST_LABEL" >> $GITHUB_OUTPUT
142134
echo "NAMESPACE=$NAMESPACE" >> $GITHUB_OUTPUT
@@ -162,7 +154,7 @@ jobs:
162154
python_version: ${{ env.PYTHON_VERSION }}
163155
report_name: "${{ matrix.test_label}}Tests_K8s=${{ matrix.k8s_version }}_cacheEnabled=${{ matrix.cache_enabled }}_argoVersion=${{ matrix.argo_version}}_proxy=${{ matrix.proxy }}_storage=${{ matrix.storage }}"
164156
tls_enabled: ${{ matrix.pod_to_pod_tls_enabled }}
165-
ca_cert_path: ${{ steps.configure.outputs.CA_CERT_PATH}}
157+
ca_cert_path: ${{ env.CA_CERT_PATH }}
166158

167159
- name: Notify test reports
168160
shell: bash

.github/workflows/image-builds-with-cache.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ jobs:
4646
- image: frontend
4747
dockerfile: frontend/Dockerfile
4848
context: .
49+
50+
- image: metadata-writer
51+
dockerfile: backend/metadata_writer/Dockerfile
52+
context: .
4953
env:
5054
ARTIFACT_NAME: "${{ matrix.image }}"
5155
ARTIFACTS_PATH: "images_${{ github.sha }}"

.github/workflows/legacy-v2-api-integration-tests.yml

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ jobs:
3030
pipeline_store: [ "database", "kubernetes" ]
3131
k8s_version: [ "v1.31.0" ]
3232
pod_to_pod_tls_enabled: [ true, false ]
33+
exclude:
34+
# Pod to Pod TLS manifests are not yet implemented for Kubernetes Native API mode
35+
- pipeline_store: "kubernetes"
36+
pod_to_pod_tls_enabled: true
3337

3438
name: API integration tests v2 - K8s with ${{ matrix.pipeline_store }} ${{ matrix.k8s_version }} pod_to_pod_tls_enabled=${{ matrix.pod_to_pod_tls_enabled }}
3539
steps:
@@ -64,22 +68,10 @@ jobs:
6468

6569
- name: Configure Cluster CA Cert for TLS-Enabled Tests
6670
shell: bash
67-
id: configure-cluster-ca-cert
6871
if: ${{ matrix.pod_to_pod_tls_enabled }}
6972
run: |
70-
kubectl get secret kfp-api-tls-cert -n kubeflow -o jsonpath='{.data.ca\.crt}' | base64 -d > ca.crt
71-
CA_CERT_PATH="ca.crt"
72-
echo "CA_CERT_PATH=CA_CERT_PATH" >> $GITHUB_OUTPUT
73-
74-
- name: Configure Input Variables
75-
shell: bash
76-
id: configure
77-
if: ${{ steps.deploy.outcome == 'success' && matrix.pod_to_pod_tls_enabled == 'true'}}
78-
run: |
79-
if [ "${{ matrix.pod_to_pod_tls_enabled }}" == "true" ]; then
80-
CA_CERT_PATH="/ca.crt"
81-
fi
82-
echo "CA_CERT_PATH=$CA_CERT_PATH" >> $GITHUB_OUTPUT
73+
kubectl get secret kfp-api-tls-cert -n kubeflow -o jsonpath='{.data.ca\.crt}' | base64 -d > "${{ github.workspace }}/ca.crt"
74+
echo "CA_CERT_PATH=${{ github.workspace }}/ca.crt" >> $GITHUB_ENV
8375
8476
- name: Forward MLMD port
8577
id: forward-mlmd-port
@@ -91,10 +83,7 @@ jobs:
9183
id: tests
9284
if: ${{ steps.forward-mlmd-port.outcome == 'success' }}
9385
working-directory: ./backend/test/v2/integration
94-
run: go test -v ./... -args -runIntegrationTests=true -namespace=kubeflow
95-
with:
96-
pod_to_pod_tls_enabled: ${{ matrix.pod_to_pod_tls_enabled }}
97-
ca_cert_path: ${{ steps.configure.outputs.CA_CERT_PATH}}
86+
run: go test -v ./... -args -runIntegrationTests=true -namespace=kubeflow -tlsEnabled=${{ matrix.pod_to_pod_tls_enabled }} -caCertPath=${{ env.CA_CERT_PATH }}
9887
env:
9988
PULL_NUMBER: ${{ github.event.pull_request.number }}
10089
PIPELINE_STORE: ${{ matrix.pipeline_store }}
@@ -110,5 +99,5 @@ jobs:
11099
if: always()
111100
uses: actions/upload-artifact@v4
112101
with:
113-
name: kfp-api-integration-tests-v2-artifacts-k8s-${{ matrix.k8s_version }}-${{ matrix.pipeline_store }}
102+
name: kfp-api-integration-tests-v2-artifacts-k8s-${{ matrix.k8s_version }}-${{ matrix.pipeline_store }}-tls-${{ matrix.pod_to_pod_tls_enabled }}
114103
path: /tmp/tmp*/*

backend/src/apiserver/common/const.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,6 @@ const (
6868
)
6969

7070
const (
71-
TLSCertCAPath = "/etc/pki/tls/cert/ca.crt"
72-
CABundleDir = "/etc/pki/tls/cert"
71+
TLSCertCAPath = "/kfp/certs/ca.crt"
72+
CABundleDir = "/kfp/certs"
7373
)

backend/src/crd/controller/scheduledworkflow/main.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,12 @@ var (
5252

5353
const (
5454
// These flags match the persistence agent
55-
mlPipelineAPIServerBasePathFlagName = "mlPipelineAPIServerBasePath"
56-
mlPipelineAPIServerNameFlagName = "mlPipelineAPIServerName"
57-
mlPipelineAPIServerGRPCPortFlagName = "mlPipelineServiceGRPCPort"
58-
caCertPathFlagName = "caCertPath"
59-
apiTokenFile = "/var/run/secrets/kubeflow/tokens/scheduledworkflow-sa-token"
55+
mlPipelineAPIServerBasePathFlagName = "mlPipelineAPIServerBasePath"
56+
mlPipelineAPIServerNameFlagName = "mlPipelineAPIServerName"
57+
mlPipelineAPIServerGRPCPortFlagName = "mlPipelineServiceGRPCPort"
58+
mlPipelineAPIServerTLSEnabledFlagName = "mlPipelineServiceTLSEnabled"
59+
caCertPathFlagName = "caCertPath"
60+
apiTokenFile = "/var/run/secrets/kubeflow/tokens/scheduledworkflow-sa-token"
6061
)
6162

6263
func main() {
@@ -172,6 +173,7 @@ func init() {
172173
flag.Float64Var(&clientQPS, "clientQPS", 5, "The maximum QPS to the master from this client.")
173174
flag.StringVar(&mlPipelineAPIServerName, mlPipelineAPIServerNameFlagName, "ml-pipeline", "Name of the ML pipeline API server.")
174175
flag.StringVar(&mlPipelineServiceGRPCPort, mlPipelineAPIServerGRPCPortFlagName, "8887", "GRPC Port of the ML pipeline API server.")
176+
flag.BoolVar(&mlPipelineServiceTLSEnabled, mlPipelineAPIServerTLSEnabledFlagName, false, "Set to true if ML pipeline API server serves over TLS.")
175177
flag.StringVar(&caCertPath, caCertPathFlagName, "", "CA cert to connect to the ML pipeline API server.")
176178
flag.IntVar(&clientBurst, "clientBurst", 10, "Maximum burst for throttle from this client.")
177179
var err error

backend/src/v2/compiler/argocompiler/container.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -216,9 +216,13 @@ func (c *workflowCompiler) addContainerDriverTemplate() string {
216216
if common.GetMetadataTLSEnabled() {
217217
args = append(args, "--metadata_tls_enabled")
218218
}
219-
if c.mlPipelineTLSEnabled || common.GetMetadataTLSEnabled() {
219+
220+
setCABundle := false
221+
if common.GetCaBundleSecretName() != "" && (c.mlPipelineTLSEnabled || common.GetMetadataTLSEnabled()) {
220222
args = append(args, "--ca_cert_path", common.TLSCertCAPath)
223+
setCABundle = true
221224
}
225+
222226
if value, ok := os.LookupEnv(PipelineLogLevelEnvVar); ok {
223227
args = append(args, "--log_level", value)
224228
}
@@ -254,8 +258,8 @@ func (c *workflowCompiler) addContainerDriverTemplate() string {
254258
Env: proxy.GetConfig().GetEnvVars(),
255259
},
256260
}
257-
// If the apiserver is TLS-enabled, add the custom CA bundle to the container driver template.
258-
if c.mlPipelineTLSEnabled {
261+
// If TLS is enabled (apiserver or metadata), add the custom CA bundle to the container driver template.
262+
if setCABundle {
259263
ConfigureCustomCABundle(t)
260264
}
261265
c.templates[name] = t
@@ -541,7 +545,7 @@ func (c *workflowCompiler) addContainerExecutorTemplate(task *pipelinespec.Pipel
541545
},
542546
}
543547
// If the apiserver is TLS-enabled, add the custom CA bundle to the executor.
544-
if c.mlPipelineTLSEnabled {
548+
if c.mlPipelineTLSEnabled || common.GetMetadataTLSEnabled() {
545549
ConfigureCustomCABundle(executor)
546550
}
547551

0 commit comments

Comments
 (0)