@@ -65,32 +65,27 @@ func NewHealthChecker(hco *options.HealthCheckerOptions) (types.HealthChecker, e
65
65
// CheckHealth checks for the health of the component and tries to repair if enabled.
66
66
// Returns true if healthy, false otherwise.
67
67
func (hc * healthChecker ) CheckHealth () (bool , error ) {
68
- var logStartTime string
69
68
healthy , err := hc .healthCheckFunc ()
70
69
if err != nil {
71
70
return healthy , err
72
71
}
73
- uptime , err := hc .uptimeFunc ()
74
- if err != nil {
75
- glog .Warningf ("Failed to get the uptime: %+v" , err )
76
- return true , err
77
- }
78
- if hc .loopBackTime > 0 && uptime > hc .loopBackTime {
79
- logStartTime = time .Now ().Add (- hc .loopBackTime ).Format (types .LogParsingTimeLayout )
80
- } else {
81
- logStartTime = time .Now ().Add (- uptime ).Format (types .LogParsingTimeLayout )
82
- }
83
- logPatternHealthy , err := logPatternHealthCheck (hc .service , logStartTime , hc .logPatternsToCheck )
72
+ logPatternHealthy , err := logPatternHealthCheck (hc .service , hc .loopBackTime , hc .logPatternsToCheck )
84
73
if err != nil {
85
74
return logPatternHealthy , err
86
75
}
87
76
if healthy && logPatternHealthy {
88
77
return true , nil
89
78
}
79
+
90
80
// The service is unhealthy.
91
81
// Attempt repair based on flag.
92
82
if hc .enableRepair {
93
83
// repair if the service has been up for the cool down period.
84
+ uptime , err := hc .uptimeFunc ()
85
+ if err != nil {
86
+ glog .Infof ("error in getting uptime for %v: %v\n " , hc .component , err )
87
+ return false , nil
88
+ }
94
89
glog .Infof ("%v is unhealthy, component uptime: %v\n " , hc .component , uptime )
95
90
if uptime > hc .coolDownTime {
96
91
glog .Infof ("%v cooldown period of %v exceeded, repairing" , hc .component , hc .coolDownTime )
@@ -102,10 +97,22 @@ func (hc *healthChecker) CheckHealth() (bool, error) {
102
97
103
98
// logPatternHealthCheck checks for the provided logPattern occurrences in the service logs.
104
99
// Returns true if the pattern is empty or does not exist logThresholdCount times since start of service, false otherwise.
105
- func logPatternHealthCheck (service , logStartTime string , logPatternsToCheck map [string ]int ) (bool , error ) {
100
+ func logPatternHealthCheck (service string , loopBackTime time. Duration , logPatternsToCheck map [string ]int ) (bool , error ) {
106
101
if len (logPatternsToCheck ) == 0 {
107
102
return true , nil
108
103
}
104
+ uptimeFunc := getUptimeFunc (service )
105
+ glog .Infof ("Getting uptime for service: %v\n " , service )
106
+ uptime , err := uptimeFunc ()
107
+ if err != nil {
108
+ glog .Warningf ("Failed to get the uptime: %+v" , err )
109
+ return true , err
110
+ }
111
+
112
+ logStartTime := time .Now ().Add (- uptime ).Format (types .LogParsingTimeLayout )
113
+ if loopBackTime > 0 && uptime > loopBackTime {
114
+ logStartTime = time .Now ().Add (- loopBackTime ).Format (types .LogParsingTimeLayout )
115
+ }
109
116
for pattern , count := range logPatternsToCheck {
110
117
healthy , err := checkForPattern (service , logStartTime , pattern , count )
111
118
if err != nil || ! healthy {
@@ -127,7 +134,6 @@ func healthCheckEndpointOKFunc(endpoint string, timeout time.Duration) func() (b
127
134
}
128
135
}
129
136
130
-
131
137
// getHealthCheckFunc returns the health check function based on the component.
132
138
func getHealthCheckFunc (hco * options.HealthCheckerOptions ) func () (bool , error ) {
133
139
switch hco .Component {
0 commit comments