Skip to content

Commit 9ac45e8

Browse files
author
Ruijian Zhang
committed
monitoring PodOOMKilling event, look up Pod's name and put it into event message
1 parent dbdd3dc commit 9ac45e8

File tree

2 files changed

+57
-1
lines changed

2 files changed

+57
-1
lines changed

cmd/nodeproblemdetector/node_problem_detector.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package main
1818

1919
import (
20+
"k8s.io/node-problem-detector/pkg/systemlogmonitor"
2021
"os"
2122

2223
"github.com/golang/glog"
@@ -55,6 +56,11 @@ func main() {
5556
glog.Fatalf("No problem daemon is configured")
5657
}
5758

59+
if c := systemlogmonitor.InitK8sClientOrDie(npdo); c != nil {
60+
glog.Info("System Log Monitor K8S client initialized")
61+
} else {
62+
glog.Error("Failed to initialize System Log Monitor K8S client")
63+
}
5864
// Initialize exporters.
5965
defaultExporters := []types.Exporter{}
6066
if ke := k8sexporter.NewExporterOrDie(npdo); ke != nil {

pkg/systemlogmonitor/log_monitor.go

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,16 @@ package systemlogmonitor
1919
import (
2020
"encoding/json"
2121
"io/ioutil"
22+
"k8s.io/heapster/common/kubernetes"
23+
clientset "k8s.io/client-go/kubernetes"
24+
"k8s.io/node-problem-detector/cmd/options"
25+
"net/url"
26+
"os"
27+
"path/filepath"
28+
"regexp"
29+
"strings"
2230
"time"
31+
"fmt"
2332

2433
"github.com/golang/glog"
2534

@@ -32,9 +41,19 @@ import (
3241
"k8s.io/node-problem-detector/pkg/types"
3342
"k8s.io/node-problem-detector/pkg/util"
3443
"k8s.io/node-problem-detector/pkg/util/tomb"
44+
"k8s.io/node-problem-detector/pkg/version"
45+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3546
)
3647

37-
const SystemLogMonitorName = "system-log-monitor"
48+
const (
49+
SystemLogMonitorName = "system-log-monitor"
50+
OOMREASON = "PodOOMKilling"
51+
)
52+
53+
var (
54+
uuidRegx *regexp.Regexp
55+
k8sClient *clientset.Clientset
56+
)
3857

3958
func init() {
4059
problemdaemon.Register(
@@ -44,6 +63,10 @@ func init() {
4463
CmdOptionDescription: "Set to config file paths."})
4564
}
4665

66+
func init() {
67+
uuidRegx = regexp.MustCompile("[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}")
68+
}
69+
4770
type logMonitor struct {
4871
configPath string
4972
watcher watchertypes.LogWatcher
@@ -55,6 +78,17 @@ type logMonitor struct {
5578
tomb *tomb.Tomb
5679
}
5780

81+
func InitK8sClientOrDie(options *options.NodeProblemDetectorOptions) *clientset.Clientset{
82+
uri, _ := url.Parse(options.ApiServerOverride)
83+
cfg, err := kubernetes.GetKubeClientConfig(uri)
84+
if err != nil {
85+
panic(err)
86+
}
87+
cfg.UserAgent = fmt.Sprintf("%s/%s", filepath.Base(os.Args[0]), version.Version())
88+
k8sClient = clientset.NewForConfigOrDie(cfg)
89+
return k8sClient
90+
}
91+
5892
// NewLogMonitorOrDie create a new LogMonitor, panic if error occurs.
5993
func NewLogMonitorOrDie(configPath string) types.Monitor {
6094
l := &logMonitor{
@@ -167,6 +201,22 @@ func (l *logMonitor) generateStatus(logs []*logtypes.Log, rule systemlogtypes.Ru
167201
// We use the timestamp of the first log line as the timestamp of the status.
168202
timestamp := logs[0].Timestamp
169203
message := generateMessage(logs)
204+
if rule.Reason == OOMREASON && k8sClient != nil{
205+
uuid := string(uuidRegx.Find([]byte(message)))
206+
uuid = strings.ReplaceAll(uuid,"_","-")
207+
pl, err := k8sClient.CoreV1().Pods("").List(metav1.ListOptions{})
208+
if err != nil {
209+
glog.Error("Error in getting pods: %v", err.Error())
210+
} else {
211+
for _, pod := range pl.Items {
212+
if string(pod.UID) == uuid {
213+
message = fmt.Sprintf("pod was OOM killed. node:%s pod:%s namespace:%s uuid:%s",
214+
pod.Spec.NodeName, pod.Name, pod.Namespace, uuid)
215+
break
216+
}
217+
}
218+
}
219+
}
170220
var events []types.Event
171221
var changedConditions []*types.Condition
172222
if rule.Type == types.Temp {

0 commit comments

Comments
 (0)