@@ -19,7 +19,16 @@ package systemlogmonitor
1919import (
2020 "encoding/json"
2121 "io/ioutil"
22+ "k8s.io/heapster/common/kubernetes"
23+ clientset "k8s.io/client-go/kubernetes"
24+ "k8s.io/node-problem-detector/cmd/options"
25+ "net/url"
26+ "os"
27+ "path/filepath"
28+ "regexp"
29+ "strings"
2230 "time"
31+ "fmt"
2332
2433 "github.com/golang/glog"
2534
@@ -32,9 +41,19 @@ import (
3241 "k8s.io/node-problem-detector/pkg/types"
3342 "k8s.io/node-problem-detector/pkg/util"
3443 "k8s.io/node-problem-detector/pkg/util/tomb"
44+ "k8s.io/node-problem-detector/pkg/version"
45+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3546)
3647
37- const SystemLogMonitorName = "system-log-monitor"
48+ const (
49+ SystemLogMonitorName = "system-log-monitor"
50+ OOMREASON = "PodOOMKilling"
51+ )
52+
53+ var (
54+ uuidRegx * regexp.Regexp
55+ k8sClient * clientset.Clientset
56+ )
3857
3958func init () {
4059 problemdaemon .Register (
@@ -44,6 +63,10 @@ func init() {
4463 CmdOptionDescription : "Set to config file paths." })
4564}
4665
66+ func init () {
67+ uuidRegx = regexp .MustCompile ("[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}" )
68+ }
69+
4770type logMonitor struct {
4871 configPath string
4972 watcher watchertypes.LogWatcher
@@ -55,6 +78,17 @@ type logMonitor struct {
5578 tomb * tomb.Tomb
5679}
5780
81+ func InitK8sClientOrDie (options * options.NodeProblemDetectorOptions ) * clientset.Clientset {
82+ uri , _ := url .Parse (options .ApiServerOverride )
83+ cfg , err := kubernetes .GetKubeClientConfig (uri )
84+ if err != nil {
85+ panic (err )
86+ }
87+ cfg .UserAgent = fmt .Sprintf ("%s/%s" , filepath .Base (os .Args [0 ]), version .Version ())
88+ k8sClient = clientset .NewForConfigOrDie (cfg )
89+ return k8sClient
90+ }
91+
5892// NewLogMonitorOrDie create a new LogMonitor, panic if error occurs.
5993func NewLogMonitorOrDie (configPath string ) types.Monitor {
6094 l := & logMonitor {
@@ -167,6 +201,22 @@ func (l *logMonitor) generateStatus(logs []*logtypes.Log, rule systemlogtypes.Ru
167201 // We use the timestamp of the first log line as the timestamp of the status.
168202 timestamp := logs [0 ].Timestamp
169203 message := generateMessage (logs )
204+ if rule .Reason == OOMREASON && k8sClient != nil {
205+ uuid := string (uuidRegx .Find ([]byte (message )))
206+ uuid = strings .ReplaceAll (uuid ,"_" ,"-" )
207+ pl , err := k8sClient .CoreV1 ().Pods ("" ).List (metav1.ListOptions {})
208+ if err != nil {
209+ glog .Error ("Error in getting pods: %v" , err .Error ())
210+ } else {
211+ for _ , pod := range pl .Items {
212+ if string (pod .UID ) == uuid {
213+ message = fmt .Sprintf ("pod was OOM killed. node:%s pod:%s namespace:%s uuid:%s" ,
214+ pod .Spec .NodeName , pod .Name , pod .Namespace , uuid )
215+ break
216+ }
217+ }
218+ }
219+ }
170220 var events []types.Event
171221 var changedConditions []* types.Condition
172222 if rule .Type == types .Temp {
0 commit comments