Skip to content

Commit 1e1765e

Browse files
authored
chore: fix the collect-logs.sh (#355)
Signed-off-by: Zhiying Lin <[email protected]>
1 parent ed6177d commit 1e1765e

File tree

1 file changed

+121
-54
lines changed

1 file changed

+121
-54
lines changed

test/e2e/collect-logs.sh

Lines changed: 121 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -34,41 +34,124 @@ echo -e "${GREEN}Starting log collection at ${TIMESTAMP}${NC}"
3434
echo "Logs will be saved to: ${LOG_DIR}"
3535
echo ""
3636

37-
# Function to collect logs from a pod
38-
collect_pod_logs() {
39-
local pod_name=$1
40-
local cluster_name=$2
41-
local log_file_prefix=$3
42-
43-
echo -e "${YELLOW}Collecting logs from pod ${pod_name} in cluster ${cluster_name}${NC}"
44-
45-
# Get all containers in the pod
46-
containers=$(kubectl get pod "${pod_name}" -n "${NAMESPACE}" -o jsonpath='{.spec.containers[*].name}' 2>/dev/null || echo "")
47-
48-
if [ -z "$containers" ]; then
49-
echo -e "${RED}No containers found in pod ${pod_name}${NC}"
37+
38+
39+
# Function to collect fleet agent logs directly from node filesystem using docker exec
40+
# This approach bypasses kubectl logs limitations and accesses the full log history
41+
# including rotated and compressed log files stored in /var/log/pods.
42+
collect_node_agent_logs() {
43+
local cluster_name=$1
44+
local node_log_dir=$2
45+
local agent_type=$3 # "hub-agent" or "member-agent"
46+
47+
echo -e "${YELLOW}Collecting ${agent_type} logs from cluster ${cluster_name} nodes${NC}"
48+
49+
# Get all nodes in the cluster
50+
local nodes
51+
nodes=$(kubectl get nodes -o jsonpath='{.items[*].metadata.name}' 2>/dev/null || echo "")
52+
53+
if [ -z "$nodes" ]; then
54+
echo -e "${RED}No nodes found in cluster ${cluster_name}${NC}"
5055
return
5156
fi
52-
53-
# Collect logs for each container
54-
for container in $containers; do
55-
log_file="${log_file_prefix}-${container}.log"
56-
echo " - Container ${container} -> ${log_file}"
57-
58-
# Get current logs
59-
kubectl logs "${pod_name}" -n "${NAMESPACE}" -c "${container}" > "${log_file}" 2>&1 || \
60-
echo "Failed to get logs for container ${container}" > "${log_file}"
61-
62-
# Try to get previous logs if pod was restarted
63-
previous_log_file="${log_file_prefix}-${container}-previous.log"
64-
if kubectl logs "${pod_name}" -n "${NAMESPACE}" -c "${container}" --previous > "${previous_log_file}" 2>&1; then
65-
echo " - Previous logs for ${container} -> ${previous_log_file}"
66-
else
67-
rm -f "${previous_log_file}"
68-
fi
57+
58+
# Create node logs directory
59+
mkdir -p "${node_log_dir}"
60+
61+
for node in $nodes; do
62+
echo " - Collecting ${agent_type} logs from node ${node}"
63+
local node_specific_dir="${node_log_dir}/${node}"
64+
mkdir -p "${node_specific_dir}"
65+
66+
# Collect specific agent logs from node filesystem
67+
collect_agent_logs_from_node "${node}" "${cluster_name}" "${node_specific_dir}" "${agent_type}"
6968
done
7069
}
7170

71+
# Function to collect specific agent logs from node filesystem
72+
# Collects all log files including rotated (*.log.*) and compressed (*.gz) files
73+
# Args:
74+
# node: The node name to collect logs from
75+
# cluster_name: The cluster name for logging context
76+
# node_log_dir: The directory to save the collected logs
77+
# agent_type: The type of agent ("hub-agent" or "member-agent")
78+
collect_agent_logs_from_node() {
79+
local node=$1
80+
local cluster_name=$2
81+
local node_log_dir=$3
82+
local agent_type=$4 # "hub-agent" or "member-agent"
83+
84+
echo " -> Collecting ${agent_type} logs from node filesystem"
85+
echo " -> Found log paths: $(docker exec "${node}" find /var/log/pods -path "*/${NAMESPACE}_*${agent_type}*")"
86+
87+
# First check if any agent logs exist on this node (including .log, .log.*, and .gz files)
88+
local log_files
89+
log_files=$(docker exec "${node}" find /var/log/pods -path "*/${NAMESPACE}_*${agent_type}*" -type f \( -name "*.log" -o -name "*.log.*" -o -name "*.gz" \) 2>/dev/null || echo "")
90+
91+
if [ -n "$log_files" ]; then
92+
93+
# Process each log file separately using process substitution to avoid subshell
94+
while read -r logfile; do
95+
if [ -n "$logfile" ]; then
96+
97+
# Extract a meaningful filename from the log path
98+
local base_path=$(basename "$(dirname "$logfile")")
99+
local original_filename="$(basename "$logfile")"
100+
local sanitized_filename="${base_path}_${original_filename}"
101+
102+
# Remove .gz extension for the output filename if present
103+
local output_filename="${sanitized_filename%.gz}"
104+
# Ensure output filename ends with .log
105+
if [[ ! "$output_filename" =~ \.log$ ]]; then
106+
output_filename="${output_filename}.log"
107+
fi
108+
109+
# Create individual log file for this specific log
110+
local individual_log_file="${node_log_dir}/${agent_type}-${output_filename}"
111+
112+
{
113+
echo "# ${agent_type} logs from node filesystem"
114+
echo "# Timestamp: $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
115+
echo "# Node: ${node}"
116+
echo "# Cluster: ${cluster_name}"
117+
echo "# Source log file: ${logfile}"
118+
echo "# Method: Direct access to /var/log/pods via docker exec"
119+
echo "# =================================="
120+
echo ""
121+
122+
# Handle different file types
123+
if [[ "$logfile" == *.gz ]]; then
124+
echo "# Note: This is a compressed log file that has been decompressed"
125+
echo ""
126+
# Decompress and read the file
127+
docker exec "${node}" zcat "$logfile" 2>/dev/null || echo "Failed to decompress and read $logfile"
128+
else
129+
# Regular log file (including rotated .log.* files)
130+
docker exec "${node}" cat "$logfile" 2>/dev/null || echo "Failed to read $logfile"
131+
fi
132+
} > "${individual_log_file}"
133+
134+
echo " -> ${agent_type}-${output_filename}"
135+
fi
136+
done < <(echo "$log_files")
137+
138+
# Check if any files were created in the directory
139+
local created_files
140+
created_files=$(find "${node_log_dir}" -name "${agent_type}-*.log" 2>/dev/null | wc -l)
141+
142+
# If no log files were actually created, clean up empty directory
143+
if [ "$created_files" -eq 0 ]; then
144+
echo " -> No valid ${agent_type} logs processed on node ${node}"
145+
rmdir "${node_log_dir}" 2>/dev/null || true
146+
fi
147+
else
148+
# No agent logs found, don't create the file and remove directory if empty
149+
echo " -> No ${agent_type} logs found on node ${node}"
150+
rmdir "${node_log_dir}" 2>/dev/null || true
151+
fi
152+
}
153+
154+
72155
# Collect hub cluster logs
73156
echo -e "${GREEN}=== Collecting Hub Cluster Logs ===${NC}"
74157
kind export kubeconfig --name "${HUB_CLUSTER}" 2>/dev/null || {
@@ -80,42 +163,26 @@ kind export kubeconfig --name "${HUB_CLUSTER}" 2>/dev/null || {
80163
HUB_LOG_DIR="${LOG_DIR}/hub"
81164
mkdir -p "${HUB_LOG_DIR}"
82165

83-
# Get all hub-agent pods
84-
hub_pods=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=hub-agent -o jsonpath='{.items[*].metadata.name}' 2>/dev/null || echo "")
85-
86-
if [ -z "$hub_pods" ]; then
87-
echo -e "${RED}No hub-agent pods found${NC}"
88-
else
89-
for pod in $hub_pods; do
90-
collect_pod_logs "${pod}" "${HUB_CLUSTER}" "${HUB_LOG_DIR}/${pod}"
91-
done
92-
fi
166+
# Collect hub-agent logs from hub cluster nodes
167+
collect_node_agent_logs "${HUB_CLUSTER}" "${HUB_LOG_DIR}/nodes" "hub-agent"
93168

94169
# Collect member cluster logs
95170
for cluster in "${MEMBER_CLUSTERS[@]}"; do
96171
echo -e "${GREEN}=== Collecting Member Cluster Logs: ${cluster} ===${NC}"
97-
172+
98173
# Export kubeconfig for the member cluster
99174
if ! kind export kubeconfig --name "${cluster}" 2>/dev/null; then
100175
echo -e "${RED}Failed to export kubeconfig for cluster ${cluster}, skipping...${NC}"
101176
continue
102177
fi
103-
178+
104179
# Create member logs directory
105180
MEMBER_LOG_DIR="${LOG_DIR}/${cluster}"
106181
mkdir -p "${MEMBER_LOG_DIR}"
107-
108-
# Get all member-agent pods
109-
member_pods=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=member-agent -o jsonpath='{.items[*].metadata.name}' 2>/dev/null || echo "")
110-
111-
if [ -z "$member_pods" ]; then
112-
echo -e "${RED}No member-agent pods found in cluster ${cluster}${NC}"
113-
else
114-
for pod in $member_pods; do
115-
collect_pod_logs "${pod}" "${cluster}" "${MEMBER_LOG_DIR}/${pod}"
116-
done
117-
fi
118-
182+
183+
# Collect member-agent logs from member cluster nodes
184+
collect_node_agent_logs "${cluster}" "${MEMBER_LOG_DIR}/nodes" "member-agent"
185+
119186
echo ""
120187
done
121188

0 commit comments

Comments
 (0)