@@ -119,6 +119,11 @@ var nackedMessages = prometheus.NewCounter(prometheus.CounterOpts{
119119 Name : "nacked_messages_total" ,
120120})
121121
122+ var actionTimeout = prometheus .NewCounter (prometheus.CounterOpts {
123+ Namespace : "mettle" ,
124+ Name : "action_timeout" ,
125+ })
126+
122127// ErrTimeout is returned when the build action exceeds the action timeout
123128var ErrTimeout = errors .New ("action execution timed out" )
124129
@@ -141,6 +146,7 @@ var metrics = []prometheus.Collector{
141146 packBytesRead ,
142147 collectOutputErrors ,
143148 nackedMessages ,
149+ actionTimeout ,
144150}
145151
146152func init () {
@@ -559,12 +565,12 @@ func (w *worker) runTaskRequest(req *pb.ExecuteRequest) *pb.ExecuteResponse {
559565// forceShutdown sends any shutdown reports and calls log.Fatal() to shut down the worker
560566func (w * worker ) forceShutdown (shutdownMsg string ) {
561567 w .Report (false , false , false , "%s" , shutdownMsg )
562- log .Info ("Force shutting down worker" )
568+ log .Debug ("Force shutting down worker" )
563569 if w .currentMsg != nil {
564570 if w .actionDigest != nil {
565571 logr .WithFields (logrus.Fields {
566572 "hash" : w .actionDigest .Hash ,
567- }).Info ("Nacking action" )
573+ }).Debug ("Nacking action" )
568574 } else {
569575 log .Error ("Nacking action but action digest is nil" )
570576 }
@@ -942,9 +948,9 @@ func (w *worker) runCommand(cmd *exec.Cmd, timeout time.Duration) error {
942948 case <- time .After (timeout ):
943949 logr .WithFields (logrus.Fields {
944950 "hash" : w .actionDigest .Hash ,
945- }).Warn ("Terminating process due to timeout" )
951+ }).Warnf ("Terminating process due to timeout: %s" , timeout )
946952 cmd .Process .Signal (os .Signal (syscall .SIGTERM ))
947-
953+ actionTimeout . Inc ()
948954 select {
949955 case <- ch :
950956 return ErrTimeout
0 commit comments