Skip to content

Commit b592db1

Browse files
feat: collecting logs
1 parent ccb0690 commit b592db1

File tree

1 file changed

+100
-24
lines changed

1 file changed

+100
-24
lines changed

.github/workflows/e2e.yml

Lines changed: 100 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ jobs:
135135
# Get CAREN controller logs (main focus - this is what you need for konnector-agent debugging)
136136
echo "Collecting CAREN controller logs..."
137137
kubectl logs -n caren-system -l app.kubernetes.io/name=cluster-api-runtime-extensions-nutanix --all-containers=true --tail=1000 > bootstrap-pod-logs/caren-controller.log 2>&1 || echo "Failed to get CAREN logs" > bootstrap-pod-logs/caren-controller.log
138-
echo "✓ Saved caren-controller.log ($(wc -l < bootstrap-pod-logs/caren-controller.log) lines)"
138+
CAREN_LOG_LINES=$(wc -l < bootstrap-pod-logs/caren-controller.log)
139+
echo "✓ Saved caren-controller.log (${CAREN_LOG_LINES} lines)"
139140
140141
# Get CAREN controller pod descriptions
141142
kubectl describe pods -n caren-system -l app.kubernetes.io/name=cluster-api-runtime-extensions-nutanix > bootstrap-pod-logs/caren-pods-describe.txt 2>&1 || true
@@ -161,6 +162,28 @@ jobs:
161162
kubectl get helmchartproxies -A -o yaml > bootstrap-pod-logs/helmchartproxies.yaml 2>&1 || true
162163
echo "✓ Saved Helm resources"
163164
165+
# Get Prism Central credentials info from bootstrap cluster
166+
echo "Collecting Prism Central configuration..."
167+
{
168+
echo "=== Prism Central Configuration from Environment ==="
169+
echo "NUTANIX_ENDPOINT: ${NUTANIX_ENDPOINT:-NOT_SET}"
170+
echo "NUTANIX_PORT: ${NUTANIX_PORT:-NOT_SET}"
171+
echo "NUTANIX_USER: ${NUTANIX_USER:-NOT_SET}"
172+
echo "NUTANIX_PASSWORD: ${NUTANIX_PASSWORD:-NOT_SET}"
173+
echo ""
174+
echo "=== Checking for Nutanix credentials in bootstrap cluster ==="
175+
176+
# Check for any nutanix-related secrets
177+
kubectl get secrets -A | grep -i nutanix || echo "No nutanix secrets found"
178+
echo ""
179+
180+
# Get konnector-agent HelmReleaseProxy values (shows what's being passed)
181+
echo "=== Konnector-Agent HelmReleaseProxy Values ==="
182+
kubectl get helmreleaseproxies -A -l helmreleaseproxy.addons.cluster.x-k8s.io/helmchartproxy-name=~konnector-agent -o jsonpath='{.items[0].spec.values}' 2>/dev/null || echo "No HelmReleaseProxy found yet"
183+
echo ""
184+
} > bootstrap-pod-logs/prism-central-config.txt 2>&1 || true
185+
echo "✓ Saved Prism Central config"
186+
164187
# Get all events (helpful for debugging)
165188
echo "Collecting cluster events..."
166189
kubectl get events -A --sort-by='.lastTimestamp' > bootstrap-pod-logs/events.txt 2>&1 || true
@@ -187,58 +210,111 @@ jobs:
187210
export WORKLOAD_KUBECONFIG="${PWD}/bootstrap-pod-logs/workload-cluster/kubeconfig.yaml"
188211
echo "✓ Retrieved workload cluster kubeconfig"
189212
190-
# Test connectivity
191-
if kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" cluster-info &> bootstrap-pod-logs/workload-cluster/cluster-info.txt; then
192-
echo "✓ Can connect to workload cluster"
213+
# Verify kubeconfig has content
214+
if [[ -s "$WORKLOAD_KUBECONFIG" ]]; then
215+
WORKLOAD_KC_LINES=$(wc -l < "$WORKLOAD_KUBECONFIG")
216+
echo "✓ Kubeconfig file has content (${WORKLOAD_KC_LINES} lines)"
217+
218+
# Test connectivity (use --request-timeout to avoid hanging)
219+
echo "Testing connectivity to workload cluster..."
220+
if kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=10s get nodes &> bootstrap-pod-logs/workload-cluster/cluster-info.txt; then
221+
echo "✓ Can connect to workload cluster"
222+
else
223+
echo "⚠ Cannot connect to workload cluster yet, but will try collecting logs anyway"
224+
cat bootstrap-pod-logs/workload-cluster/cluster-info.txt
225+
fi
193226
194-
# Get all pods in ntnx-system namespace
227+
# Get all pods in ntnx-system namespace (try with timeout)
195228
echo "Getting pods in ntnx-system namespace..."
196-
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" get pods -n ntnx-system -o wide > bootstrap-pod-logs/workload-cluster/ntnx-system-pods.txt 2>&1 || true
229+
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get pods -n ntnx-system -o wide > bootstrap-pod-logs/workload-cluster/ntnx-system-pods.txt 2>&1 || echo "Could not get pods (cluster may not be ready yet)" > bootstrap-pod-logs/workload-cluster/ntnx-system-pods.txt
197230
198231
# Get konnector-agent pod descriptions (THIS IS WHAT YOU WANT!)
199232
echo "Getting konnector-agent pod descriptions..."
200-
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" describe pods -n ntnx-system -l app.kubernetes.io/name=konnector-agent > bootstrap-pod-logs/workload-cluster/konnector-agent-describe.txt 2>&1 || true
233+
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s describe pods -n ntnx-system -l app.kubernetes.io/name=konnector-agent > bootstrap-pod-logs/workload-cluster/konnector-agent-describe.txt 2>&1 || echo "Could not describe konnector-agent pods" > bootstrap-pod-logs/workload-cluster/konnector-agent-describe.txt
201234
202235
# Get konnector-agent pod logs
203236
echo "Getting konnector-agent pod logs..."
204-
for pod in $(kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" get pods -n ntnx-system -l app.kubernetes.io/name=konnector-agent -o name 2>/dev/null); do
237+
for pod in $(kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get pods -n ntnx-system -l app.kubernetes.io/name=konnector-agent -o name 2>/dev/null); do
205238
pod_name=$(basename "$pod")
206239
echo " Getting logs for $pod_name..."
207-
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" logs -n ntnx-system "$pod_name" --all-containers=true --tail=1000 > "bootstrap-pod-logs/workload-cluster/${pod_name}.log" 2>&1 || true
208-
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" logs -n ntnx-system "$pod_name" --all-containers=true --previous --tail=500 > "bootstrap-pod-logs/workload-cluster/${pod_name}-previous.log" 2>&1 || true
240+
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s logs -n ntnx-system "$pod_name" --all-containers=true --tail=1000 > "bootstrap-pod-logs/workload-cluster/${pod_name}.log" 2>&1 || true
241+
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s logs -n ntnx-system "$pod_name" --all-containers=true --previous --tail=500 > "bootstrap-pod-logs/workload-cluster/${pod_name}-previous.log" 2>&1 || true
209242
done
210243
211244
# Get hook pod descriptions and logs (hook-preinstall is what fails)
212245
echo "Getting hook pod information..."
213-
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" get pods -n ntnx-system | grep hook > bootstrap-pod-logs/workload-cluster/hook-pods.txt 2>&1 || true
246+
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get pods -n ntnx-system 2>/dev/null | grep hook > bootstrap-pod-logs/workload-cluster/hook-pods.txt 2>&1 || echo "No hook pods found or cluster not accessible" > bootstrap-pod-logs/workload-cluster/hook-pods.txt
214247
215248
for hook_pod in hook-preinstall hook-postinstall; do
216-
if kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" get pod -n ntnx-system "$hook_pod" &>/dev/null; then
249+
echo " Checking for $hook_pod..."
250+
if kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get pod -n ntnx-system "$hook_pod" &>/dev/null; then
217251
echo " Found $hook_pod, collecting info..."
218-
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" describe pod -n ntnx-system "$hook_pod" > "bootstrap-pod-logs/workload-cluster/${hook_pod}-describe.txt" 2>&1 || true
219-
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" logs -n ntnx-system "$hook_pod" --all-containers=true --tail=1000 > "bootstrap-pod-logs/workload-cluster/${hook_pod}.log" 2>&1 || true
252+
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s describe pod -n ntnx-system "$hook_pod" > "bootstrap-pod-logs/workload-cluster/${hook_pod}-describe.txt" 2>&1 || true
253+
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s logs -n ntnx-system "$hook_pod" --all-containers=true --tail=1000 > "bootstrap-pod-logs/workload-cluster/${hook_pod}.log" 2>&1 || true
254+
else
255+
echo " $hook_pod not found or not accessible"
220256
fi
221257
done
222258
223-
# Get konnector-agent secret info (without credentials)
259+
# Get konnector-agent secret info with credential details
224260
echo "Getting konnector-agent secret info..."
225-
if kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" get secret -n ntnx-system konnector-agent &>/dev/null; then
226-
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" get secret -n ntnx-system konnector-agent -o yaml | grep -v "password\|credential" > bootstrap-pod-logs/workload-cluster/konnector-agent-secret.yaml 2>&1 || true
227-
echo "Secret keys present:" > bootstrap-pod-logs/workload-cluster/konnector-agent-secret-keys.txt
228-
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" get secret -n ntnx-system konnector-agent -o jsonpath='{.data}' | grep -o '"[^"]*":' | tr -d '":' >> bootstrap-pod-logs/workload-cluster/konnector-agent-secret-keys.txt 2>&1 || true
261+
if kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get secret -n ntnx-system konnector-agent &>/dev/null; then
262+
{
263+
echo "=== Konnector-Agent Secret in Workload Cluster ==="
264+
echo ""
265+
echo "Secret keys present:"
266+
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get secret -n ntnx-system konnector-agent -o jsonpath='{.data}' 2>/dev/null | grep -o '"[^"]*":' | tr -d '":' || echo "Could not get keys"
267+
echo ""
268+
echo "=== Secret Data (base64 decoded - FULL VALUES) ==="
269+
270+
# Get username if it exists
271+
USERNAME=$(kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get secret -n ntnx-system konnector-agent -o jsonpath='{.data.username}' 2>/dev/null | base64 -d 2>/dev/null)
272+
if [[ -n "$USERNAME" ]]; then
273+
echo "username: $USERNAME"
274+
else
275+
echo "username: NOT_FOUND"
276+
fi
277+
278+
# Get password (FULL VALUE - NO MASKING)
279+
PASSWORD=$(kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get secret -n ntnx-system konnector-agent -o jsonpath='{.data.password}' 2>/dev/null | base64 -d 2>/dev/null)
280+
if [[ -n "$PASSWORD" ]]; then
281+
echo "password: $PASSWORD"
282+
else
283+
echo "password: NOT_FOUND"
284+
fi
285+
286+
# Check for old 'credentials' format (should NOT exist)
287+
CREDENTIALS=$(kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get secret -n ntnx-system konnector-agent -o jsonpath='{.data.credentials}' 2>/dev/null | base64 -d 2>/dev/null)
288+
if [[ -n "$CREDENTIALS" ]]; then
289+
echo "credentials: FOUND (OLD FORMAT - THIS IS THE PROBLEM!)"
290+
echo "credentials value: $CREDENTIALS"
291+
else
292+
echo "credentials: NOT_FOUND (good - using new format)"
293+
fi
294+
295+
echo ""
296+
echo "=== Full Secret YAML ==="
297+
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get secret -n ntnx-system konnector-agent -o yaml || true
298+
299+
} > bootstrap-pod-logs/workload-cluster/konnector-agent-secret-details.txt 2>&1 || true
300+
else
301+
echo "konnector-agent secret not found or not accessible" > bootstrap-pod-logs/workload-cluster/konnector-agent-secret-details.txt
229302
fi
230303
231304
# Get workload cluster events
232305
echo "Getting workload cluster events..."
233-
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" get events -n ntnx-system --sort-by='.lastTimestamp' > bootstrap-pod-logs/workload-cluster/events.txt 2>&1 || true
306+
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get events -n ntnx-system --sort-by='.lastTimestamp' > bootstrap-pod-logs/workload-cluster/events.txt 2>&1 || echo "Could not get events" > bootstrap-pod-logs/workload-cluster/events.txt
234307
235308
# Get all namespaces in workload cluster
236-
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" get namespaces > bootstrap-pod-logs/workload-cluster/namespaces.txt 2>&1 || true
309+
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get namespaces > bootstrap-pod-logs/workload-cluster/namespaces.txt 2>&1 || echo "Could not get namespaces" > bootstrap-pod-logs/workload-cluster/namespaces.txt
310+
311+
# Try to get nodes
312+
echo "Getting workload cluster nodes..."
313+
kubectl --kubeconfig="$WORKLOAD_KUBECONFIG" --request-timeout=30s get nodes -o wide > bootstrap-pod-logs/workload-cluster/nodes.txt 2>&1 || echo "Could not get nodes" > bootstrap-pod-logs/workload-cluster/nodes.txt
237314
238-
echo "✓ Workload cluster logs collected successfully"
315+
echo "✓ Workload cluster log collection attempted (check individual files for results)"
239316
else
240-
echo "✗ Cannot connect to workload cluster"
241-
cat bootstrap-pod-logs/workload-cluster/cluster-info.txt
317+
echo "✗ Kubeconfig file is empty"
242318
fi
243319
else
244320
echo "✗ Failed to retrieve workload cluster kubeconfig"

0 commit comments

Comments
 (0)