@@ -14,6 +14,7 @@ import (
1414 "path"
1515 "path/filepath"
1616 "runtime"
17+ "strconv"
1718 "strings"
1819 "syscall"
1920 "time"
@@ -732,12 +733,23 @@ func (w *worker) execute(req *pb.ExecuteRequest, action *pb.Action, command *pb.
732733 duration , _ := ptypes .Duration (action .Timeout )
733734 ctx , cancel := context .WithTimeout (context .Background (), duration )
734735 defer cancel ()
736+
735737 logr .WithFields (logrus.Fields {
736738 "hash" : w .actionDigest .Hash ,
737739 "stage" : "exec_action" ,
738740 "timeout" : duration .String (),
741+ "argv0" : command .Arguments [0 ],
742+ "argc" : len (command .Arguments ),
739743 }).Debug ("Executing action - started" )
740744
745+ logr .WithFields (logrus.Fields {
746+ "hash" : w .actionDigest .Hash ,
747+ "sandbox" : w .sandbox ,
748+ "altSandbox" : w .altSandbox ,
749+ "shouldSandbox" : w .shouldSandbox (command ),
750+ "argv0" : command .Arguments [0 ],
751+ }).Debug ("Sandbox selection" )
752+
741753 cmd := exec .CommandContext (ctx , commandPath (command ), command .Arguments [1 :]... )
742754 // Setting Pdeathsig should ideally make subprocesses get kill signals if we die.
743755
@@ -962,21 +974,8 @@ func (w *worker) runCommand(ctx context.Context, cmd *exec.Cmd, timeout time.Dur
962974 if ! errors .Is (ctx .Err (), context .DeadlineExceeded ) {
963975 return
964976 }
965-
966- pgid , _ := syscall .Getpgid (pid )
967- var processTree string
968- if pgid > 0 {
969- args := []string {"-g" , fmt .Sprintf ("%d" , pgid ), "-o" , "pid,ppid,pgid,state,etime,wchan,%cpu,%mem,command" }
970- if psOut , psErr := exec .Command ("ps" , args ... ).Output (); psErr == nil {
971- processTree = string (psOut )
972- }
973- }
974- logr .WithFields (logrus.Fields {
975- "hash" : w .actionDigest .Hash ,
976- "pid" : pid ,
977- "pgid" : pgid ,
978- "hangingProcessTree" : processTree ,
979- }).Debug ("Deadline exceeded: Analyzing hanging group" )
977+ // # TODO(INFRA-130229): Revert this temporary change mettle timeouts investigations are completed
978+ w .logProcOnTimeout (pid )
980979 }(pid )
981980
982981 err := cmd .Wait ()
@@ -1030,6 +1029,53 @@ func (w *worker) runCommand(ctx context.Context, cmd *exec.Cmd, timeout time.Dur
10301029 return err
10311030}
10321031
1032+ // logProcOnTimeout logs /proc diagnostics for the timed-out pid and its process group.
1033+ // It does not rely on `ps` existing in the container.
1034+ func (w * worker ) logProcOnTimeout (pid int ) {
1035+ const maxFDs = 200
1036+ const maxTargetLen = 256
1037+ p := strconv .Itoa (pid )
1038+ base := filepath .Join ("/proc" , p )
1039+
1040+ cmdlineBytes , err := os .ReadFile (filepath .Join (base , "cmdline" ))
1041+ cmdline := ""
1042+ if err == nil && len (cmdlineBytes ) > 0 {
1043+ cmdline = strings .TrimSpace (strings .ReplaceAll (string (cmdlineBytes ), "\x00 " , " " ))
1044+ }
1045+
1046+ wchanBytes , err := os .ReadFile (filepath .Join (base , "wchan" ))
1047+ wchan := ""
1048+ if err == nil && len (wchanBytes ) > 0 {
1049+ wchan = strings .TrimSpace (string (wchanBytes ))
1050+ }
1051+
1052+ var fds []string
1053+ if entries , err := os .ReadDir (filepath .Join (base , "fd" )); err == nil {
1054+ for i , e := range entries {
1055+ if i >= maxFDs {
1056+ fds = append (fds , "...truncated" )
1057+ break
1058+ }
1059+ target , err := os .Readlink (filepath .Join (base , "fd" , e .Name ()))
1060+ if err != nil {
1061+ continue
1062+ }
1063+ if len (target ) > maxTargetLen {
1064+ target = target [:maxTargetLen ] + "…"
1065+ }
1066+ fds = append (fds , e .Name ()+ " -> " + target )
1067+ }
1068+ }
1069+
1070+ logr .WithFields (logrus.Fields {
1071+ "hash" : w .actionDigest .Hash ,
1072+ "pid" : pid ,
1073+ "cmdline" : cmdline ,
1074+ "wchan" : wchan ,
1075+ "fds" : fds ,
1076+ }).Debug ("Deadline exceeded: /proc snapshot" )
1077+ }
1078+
10331079// writeUncachedResult attempts to write an uncached action result proto.
10341080// This is an extension for buildbarn-browser that lets it display results of a failed build.
10351081// It returns a string that, on success, contains an appropriate message about it (they are
0 commit comments