@@ -92,13 +92,39 @@ def parse_and_emit_records(podInventory, serviceList)
9292 record [ 'PodLabel' ] = [ items [ 'metadata' ] [ 'labels' ] ]
9393 record [ 'Namespace' ] = podNameSpace
9494 record [ 'PodCreationTimeStamp' ] = items [ 'metadata' ] [ 'creationTimestamp' ]
95- record [ 'PodStartTime' ] = items [ 'status' ] [ 'startTime' ]
96- record [ 'PodStatus' ] = items [ 'status' ] [ 'phase' ]
97- record [ 'PodIp' ] = items [ 'status' ] [ 'podIP' ]
95+ #for unscheduled (non-started) pods startTime does NOT exist
96+ if !items [ 'status' ] [ 'startTime' ] . nil?
97+ record [ 'PodStartTime' ] = items [ 'status' ] [ 'startTime' ]
98+ else
99+ record [ 'PodStartTime' ] = ""
100+ end
101+ #podStatus
102+ # the below is for accounting 'NodeLost' scenario, where-in the pod(s) in the lost node is still being reported as running
103+ podReadyCondition = true
104+ if !items [ 'status' ] [ 'reason' ] . nil? && items [ 'status' ] [ 'reason' ] == "NodeLost"
105+ items [ 'status' ] [ 'conditions' ] . each do |condition |
106+ if condition [ 'type' ] == "Ready" && condition [ 'status' ] == "False"
107+ podReadyCondition = false
108+ break
109+ end
110+ end
111+ end
112+ if podReadyCondition == false
113+ record [ 'PodStatus' ] = "Unknown"
114+ else
115+ record [ 'PodStatus' ] = items [ 'status' ] [ 'phase' ]
116+ end
117+ #for unscheduled (non-started) pods podIP does NOT exist
118+ if !items [ 'status' ] [ 'podIP' ] . nil?
119+ record [ 'PodIp' ] = items [ 'status' ] [ 'podIP' ]
120+ else
121+ record [ 'PodIp' ] = ""
122+ end
123+ #for unscheduled (non-started) pods nodeName does NOT exist
98124 if !items [ 'spec' ] [ 'nodeName' ] . nil?
99125 record [ 'Computer' ] = items [ 'spec' ] [ 'nodeName' ]
100126 else
101- next
127+ record [ 'Computer' ] = ""
102128 end
103129 record [ 'ClusterId' ] = KubernetesApiClient . getClusterId
104130 record [ 'ClusterName' ] = KubernetesApiClient . getClusterName
@@ -134,15 +160,22 @@ def parse_and_emit_records(podInventory, serviceList)
134160 # "message": "Back-off 5m0s restarting failed container=metrics-server pod=metrics-server-2011498749-3g453_kube-system(5953be5f-fcae-11e7-a356-000d3ae0e432)"
135161 # }
136162 # },
137- record [ 'ContainerStatus' ] = containerStatus . keys [ 0 ]
163+ # the below is for accounting 'NodeLost' scenario, where-in the containers in the lost node/pod(s) is still being reported as running
164+ if podReadyCondition == false
165+ record [ 'ContainerStatus' ] = "Unknown"
166+ else
167+ record [ 'ContainerStatus' ] = containerStatus . keys [ 0 ]
168+ end
138169 #TODO : Remove ContainerCreationTimeStamp from here since we are sending it as a metric
139170 #Picking up both container and node start time from cAdvisor to be consistent
140171 if containerStatus . keys [ 0 ] == "running"
141172 record [ 'ContainerCreationTimeStamp' ] = container [ 'state' ] [ 'running' ] [ 'startedAt' ]
142173 end
143174 podRestartCount += containerRestartCount
144175 records . push ( record . dup )
145- end
176+ end
177+ else # for unscheduled pods there are no status.containerStatuses, in this case we still want the pod
178+ records . push ( record )
146179 end #container status block end
147180 records . each do |record |
148181 if !record . nil?
@@ -157,6 +190,9 @@ def parse_and_emit_records(podInventory, serviceList)
157190 end
158191 end #podInventory block end
159192 router . emit_stream ( @tag , eventStream ) if eventStream
193+ if ( ENV [ 'ISTEST' ] == true && eventStream . count > 0 )
194+ $log. info ( "in_kube_podinventory::emit-stream : Success @ #{ Time . now . utc . iso8601 } " )
195+ end
160196 rescue => errorStr
161197 $log. warn "Failed in parse_and_emit_record pod inventory: #{ errorStr } "
162198 $log. debug_backtrace ( errorStr . backtrace )
0 commit comments