@@ -34,41 +34,124 @@ echo -e "${GREEN}Starting log collection at ${TIMESTAMP}${NC}"
3434echo " Logs will be saved to: ${LOG_DIR} "
3535echo " "
3636
37- # Function to collect logs from a pod
38- collect_pod_logs () {
39- local pod_name=$1
40- local cluster_name=$2
41- local log_file_prefix=$3
42-
43- echo -e " ${YELLOW} Collecting logs from pod ${pod_name} in cluster ${cluster_name}${NC} "
44-
45- # Get all containers in the pod
46- containers=$( kubectl get pod " ${pod_name} " -n " ${NAMESPACE} " -o jsonpath=' {.spec.containers[*].name}' 2> /dev/null || echo " " )
47-
48- if [ -z " $containers " ]; then
49- echo -e " ${RED} No containers found in pod ${pod_name}${NC} "
37+
38+
39+ # Function to collect fleet agent logs directly from node filesystem using docker exec
40+ # This approach bypasses kubectl logs limitations and accesses the full log history
41+ # including rotated and compressed log files stored in /var/log/pods.
42+ collect_node_agent_logs () {
43+ local cluster_name=$1
44+ local node_log_dir=$2
45+ local agent_type=$3 # "hub-agent" or "member-agent"
46+
47+ echo -e " ${YELLOW} Collecting ${agent_type} logs from cluster ${cluster_name} nodes${NC} "
48+
49+ # Get all nodes in the cluster
50+ local nodes
51+ nodes=$( kubectl get nodes -o jsonpath=' {.items[*].metadata.name}' 2> /dev/null || echo " " )
52+
53+ if [ -z " $nodes " ]; then
54+ echo -e " ${RED} No nodes found in cluster ${cluster_name}${NC} "
5055 return
5156 fi
52-
53- # Collect logs for each container
54- for container in $containers ; do
55- log_file=" ${log_file_prefix} -${container} .log"
56- echo " - Container ${container} -> ${log_file} "
57-
58- # Get current logs
59- kubectl logs " ${pod_name} " -n " ${NAMESPACE} " -c " ${container} " > " ${log_file} " 2>&1 || \
60- echo " Failed to get logs for container ${container} " > " ${log_file} "
61-
62- # Try to get previous logs if pod was restarted
63- previous_log_file=" ${log_file_prefix} -${container} -previous.log"
64- if kubectl logs " ${pod_name} " -n " ${NAMESPACE} " -c " ${container} " --previous > " ${previous_log_file} " 2>&1 ; then
65- echo " - Previous logs for ${container} -> ${previous_log_file} "
66- else
67- rm -f " ${previous_log_file} "
68- fi
57+
58+ # Create node logs directory
59+ mkdir -p " ${node_log_dir} "
60+
61+ for node in $nodes ; do
62+ echo " - Collecting ${agent_type} logs from node ${node} "
63+ local node_specific_dir=" ${node_log_dir} /${node} "
64+ mkdir -p " ${node_specific_dir} "
65+
66+ # Collect specific agent logs from node filesystem
67+ collect_agent_logs_from_node " ${node} " " ${cluster_name} " " ${node_specific_dir} " " ${agent_type} "
6968 done
7069}
7170
71+ # Function to collect specific agent logs from node filesystem
72+ # Collects all log files including rotated (*.log.*) and compressed (*.gz) files
73+ # Args:
74+ # node: The node name to collect logs from
75+ # cluster_name: The cluster name for logging context
76+ # node_log_dir: The directory to save the collected logs
77+ # agent_type: The type of agent ("hub-agent" or "member-agent")
78+ collect_agent_logs_from_node () {
79+ local node=$1
80+ local cluster_name=$2
81+ local node_log_dir=$3
82+ local agent_type=$4 # "hub-agent" or "member-agent"
83+
84+ echo " -> Collecting ${agent_type} logs from node filesystem"
85+ echo " -> Found log paths: $( docker exec " ${node} " find /var/log/pods -path " */${NAMESPACE} _*${agent_type} *" ) "
86+
87+ # First check if any agent logs exist on this node (including .log, .log.*, and .gz files)
88+ local log_files
89+ log_files=$( docker exec " ${node} " find /var/log/pods -path " */${NAMESPACE} _*${agent_type} *" -type f \( -name " *.log" -o -name " *.log.*" -o -name " *.gz" \) 2> /dev/null || echo " " )
90+
91+ if [ -n " $log_files " ]; then
92+
93+ # Process each log file separately using process substitution to avoid subshell
94+ while read -r logfile; do
95+ if [ -n " $logfile " ]; then
96+
97+ # Extract a meaningful filename from the log path
98+ local base_path=$( basename " $( dirname " $logfile " ) " )
99+ local original_filename=" $( basename " $logfile " ) "
100+ local sanitized_filename=" ${base_path} _${original_filename} "
101+
102+ # Remove .gz extension for the output filename if present
103+ local output_filename=" ${sanitized_filename% .gz} "
104+ # Ensure output filename ends with .log
105+ if [[ ! " $output_filename " =~ \. log$ ]]; then
106+ output_filename=" ${output_filename} .log"
107+ fi
108+
109+ # Create individual log file for this specific log
110+ local individual_log_file=" ${node_log_dir} /${agent_type} -${output_filename} "
111+
112+ {
113+ echo " # ${agent_type} logs from node filesystem"
114+ echo " # Timestamp: $( date -u ' +%Y-%m-%d %H:%M:%S UTC' ) "
115+ echo " # Node: ${node} "
116+ echo " # Cluster: ${cluster_name} "
117+ echo " # Source log file: ${logfile} "
118+ echo " # Method: Direct access to /var/log/pods via docker exec"
119+ echo " # =================================="
120+ echo " "
121+
122+ # Handle different file types
123+ if [[ " $logfile " == * .gz ]]; then
124+ echo " # Note: This is a compressed log file that has been decompressed"
125+ echo " "
126+ # Decompress and read the file
127+ docker exec " ${node} " zcat " $logfile " 2> /dev/null || echo " Failed to decompress and read $logfile "
128+ else
129+ # Regular log file (including rotated .log.* files)
130+ docker exec " ${node} " cat " $logfile " 2> /dev/null || echo " Failed to read $logfile "
131+ fi
132+ } > " ${individual_log_file} "
133+
134+ echo " -> ${agent_type} -${output_filename} "
135+ fi
136+ done < <( echo " $log_files " )
137+
138+ # Check if any files were created in the directory
139+ local created_files
140+ created_files=$( find " ${node_log_dir} " -name " ${agent_type} -*.log" 2> /dev/null | wc -l)
141+
142+ # If no log files were actually created, clean up empty directory
143+ if [ " $created_files " -eq 0 ]; then
144+ echo " -> No valid ${agent_type} logs processed on node ${node} "
145+ rmdir " ${node_log_dir} " 2> /dev/null || true
146+ fi
147+ else
148+ # No agent logs found, don't create the file and remove directory if empty
149+ echo " -> No ${agent_type} logs found on node ${node} "
150+ rmdir " ${node_log_dir} " 2> /dev/null || true
151+ fi
152+ }
153+
154+
72155# Collect hub cluster logs
73156echo -e " ${GREEN} === Collecting Hub Cluster Logs ===${NC} "
74157kind export kubeconfig --name " ${HUB_CLUSTER} " 2> /dev/null || {
@@ -80,42 +163,26 @@ kind export kubeconfig --name "${HUB_CLUSTER}" 2>/dev/null || {
80163HUB_LOG_DIR=" ${LOG_DIR} /hub"
81164mkdir -p " ${HUB_LOG_DIR} "
82165
83- # Get all hub-agent pods
84- hub_pods=$( kubectl get pods -n " ${NAMESPACE} " -l app.kubernetes.io/name=hub-agent -o jsonpath=' {.items[*].metadata.name}' 2> /dev/null || echo " " )
85-
86- if [ -z " $hub_pods " ]; then
87- echo -e " ${RED} No hub-agent pods found${NC} "
88- else
89- for pod in $hub_pods ; do
90- collect_pod_logs " ${pod} " " ${HUB_CLUSTER} " " ${HUB_LOG_DIR} /${pod} "
91- done
92- fi
166+ # Collect hub-agent logs from hub cluster nodes
167+ collect_node_agent_logs " ${HUB_CLUSTER} " " ${HUB_LOG_DIR} /nodes" " hub-agent"
93168
94169# Collect member cluster logs
95170for cluster in " ${MEMBER_CLUSTERS[@]} " ; do
96171 echo -e " ${GREEN} === Collecting Member Cluster Logs: ${cluster} ===${NC} "
97-
172+
98173 # Export kubeconfig for the member cluster
99174 if ! kind export kubeconfig --name " ${cluster} " 2> /dev/null; then
100175 echo -e " ${RED} Failed to export kubeconfig for cluster ${cluster} , skipping...${NC} "
101176 continue
102177 fi
103-
178+
104179 # Create member logs directory
105180 MEMBER_LOG_DIR=" ${LOG_DIR} /${cluster} "
106181 mkdir -p " ${MEMBER_LOG_DIR} "
107-
108- # Get all member-agent pods
109- member_pods=$( kubectl get pods -n " ${NAMESPACE} " -l app.kubernetes.io/name=member-agent -o jsonpath=' {.items[*].metadata.name}' 2> /dev/null || echo " " )
110-
111- if [ -z " $member_pods " ]; then
112- echo -e " ${RED} No member-agent pods found in cluster ${cluster}${NC} "
113- else
114- for pod in $member_pods ; do
115- collect_pod_logs " ${pod} " " ${cluster} " " ${MEMBER_LOG_DIR} /${pod} "
116- done
117- fi
118-
182+
183+ # Collect member-agent logs from member cluster nodes
184+ collect_node_agent_logs " ${cluster} " " ${MEMBER_LOG_DIR} /nodes" " member-agent"
185+
119186 echo " "
120187done
121188
0 commit comments