11package testenv
22
33import (
4- "bufio"
54 "bytes"
65 "context"
76 "encoding/json"
@@ -13,6 +12,7 @@ import (
1312 "os/exec"
1413 "strings"
1514 "sync"
15+ "syscall"
1616 "testing"
1717 "time"
1818
@@ -91,12 +91,32 @@ func (b *BuildxEnv) supportsDialStdio(ctx context.Context) (bool, error) {
9191
9292var errDialStdioNotSupported = errors .New ("buildx dial-stdio not supported" )
9393
94- type connCloseWrapper struct {
94+ // cmdConn wraps a net.Conn and replaces generic pipe errors with the
95+ // actual error from the underlying command when it has exited.
96+ type cmdConn struct {
9597 net.Conn
96- close func ()
98+ close func ()
99+ cmdWait <- chan error
97100}
98101
99- func (c * connCloseWrapper ) Close () error {
102+ func (c * cmdConn ) Read (b []byte ) (int , error ) {
103+ n , err := c .Conn .Read (b )
104+ if err != nil {
105+ // If the command has exited with an error, surface that instead
106+ // of the generic pipe closed error.
107+ select {
108+ case cmdErr := <- c .cmdWait :
109+ if cmdErr != nil {
110+ return n , fmt .Errorf ("%v: %w" , cmdErr , err )
111+ }
112+ default :
113+ }
114+ }
115+
116+ return n , err
117+ }
118+
119+ func (c * cmdConn ) Close () error {
100120 if c .close != nil {
101121 c .close ()
102122 }
@@ -126,68 +146,89 @@ func (b *BuildxEnv) dialStdio(ctx context.Context) error {
126146 // the buildx dial-stdio process from cleaning up its resources properly.
127147 cmd := exec .Command ("docker" , args ... )
128148 cmd .Env = os .Environ ()
149+ // Put the child in its own process group so it does not receive
150+ // SIGINT from the terminal on Ctrl+C. This way shutdown is always
151+ // driven by closing stdin (the Ctrl+D / EOF path), which buildx
152+ // handles cleanly. The process group is only used for signal
153+ // isolation, not for killing.
154+ cmd .SysProcAttr = & syscall.SysProcAttr {Setpgid : true }
129155
130156 dialStdioConn , clientConn := net .Pipe ()
131- cmd .Stdin = dialStdioConn
132157 cmd .Stdout = dialStdioConn
133158
134- // Use a pipe to check when the connection is actually complete
135- // Also write all of stderr to an error buffer so we can have more details
136- // in the error message when the command fails.
137- r , w := io .Pipe ()
138- errBuf := bytes .NewBuffer (nil )
139- ww := io .MultiWriter (w , errBuf )
140- cmd .Stderr = ww
159+ // Capture stderr so we can include it in error messages
160+ // when the command fails.
161+ var stderr bytes.Buffer
162+ cmd .Stderr = & stderr
163+
164+ // Use StdinPipe instead of setting cmd.Stdin directly.
165+ // When cmd.Stdin is set to a non-*os.File (like net.Conn),
166+ // exec creates an internal goroutine to copy data into the
167+ // process's stdin pipe, and cmd.Wait blocks until that
168+ // goroutine finishes. If the process exits immediately (e.g.
169+ // bad arguments), the goroutine is stuck reading from
170+ // dialStdioConn (nobody is writing yet), so cmd.Wait hangs.
171+ // StdinPipe avoids the internal goroutine entirely.
172+ stdinPipe , err := cmd .StdinPipe ()
173+ if err != nil {
174+ _ , _ = dialStdioConn .Close (), clientConn .Close ()
175+
176+ return nil , fmt .Errorf ("creating stdin pipe: %w" , err )
177+ }
141178
142179 if err := cmd .Start (); err != nil {
143- return nil , err
180+ _ , _ , _ = stdinPipe .Close (), dialStdioConn .Close (), clientConn .Close ()
181+
182+ if s := strings .TrimSpace (stderr .String ()); s != "" {
183+ return nil , fmt .Errorf ("starting buildx dial-stdio: %s: %w" , strings .TrimSpace (stderr .String ()), err )
184+ }
185+
186+ return nil , fmt .Errorf ("starting buildx dial-stdio: %w" , err )
144187 }
145188
146- // chWait is closed when cmd.Wait() returns, signaling the cleanup
189+ // Copy client writes to the process's stdin.
190+ // This goroutine stops when dialStdioConn is closed (read
191+ // returns error) or stdinPipe is closed (write returns error).
192+ go func () {
193+ io .Copy (stdinPipe , dialStdioConn ) //nolint:errcheck
194+ stdinPipe .Close ()
195+ }()
196+
197+ // cmdWait is closed when cmd.Wait() returns, signaling the cleanup
147198 // function that the process has exited.
148- chWait := make (chan struct {})
199+ // waitErr is written before cmdWait is closed, so it is safe to
200+ // read after receiving from cmdWait.
201+ cmdWait := make (chan error , 1 )
202+ cmdDone := make (chan struct {})
149203 go func () {
150204 err := cmd .Wait ()
151- close (chWait )
205+ if stderr .Len () > 0 && err != nil {
206+ err = fmt .Errorf ("%v: %w" , strings .TrimSpace (stderr .String ()), err )
207+ }
208+ cmdWait <- err
209+ close (cmdWait )
152210 dialStdioConn .Close ()
153- // pkgerrors.Wrap will return nil if err is nil, otherwise it will give
154- // us a wrapped error with the buffered stderr from the command.
155- w .CloseWithError (pkgerrors .Wrapf (err , "%s" , errBuf ))
211+ close (cmdDone )
156212 }()
157213
158- defer r .Close ()
159-
160- scanner := bufio .NewScanner (r )
161- for scanner .Scan () {
162- txt := strings .ToLower (scanner .Text ())
163-
164- if strings .HasPrefix (txt , "#1 dialing builder" ) && strings .HasSuffix (txt , "done" ) {
165- go func () {
166- // Continue draining stderr so the process does not get blocked
167- _ , _ = io .Copy (io .Discard , r )
168- }()
169- break
170- }
171- }
172- if err := scanner .Err (); err != nil {
173- return nil , err
174- }
175-
176- out := & connCloseWrapper {
177- Conn : clientConn ,
214+ out := & cmdConn {
215+ Conn : clientConn ,
216+ cmdWait : cmdWait ,
178217 close : sync .OnceFunc (func () {
179- // Close the stdin/stdout pipe to the process.
180- // This causes stdin EOF in buildx's dial-stdio , which triggers
181- // closeWrite(conn) on the buildkit connection and should start
182- // the chain reaction for docker CLI process to exit.
218+ // Close the pipe to the process. This sends EOF on stdin
219+ // (like Ctrl+D) , which triggers closeWrite(conn) on the
220+ // buildkit connection and starts the chain reaction for the
221+ // docker CLI process to exit.
183222 dialStdioConn .Close ()
184223
185224 select {
186- case <- chWait :
225+ case <- cmdDone :
187226 case <- time .After (10 * time .Second ):
188- // Safety net: force kill if still running.
189- cmd .Process .Kill () //nolint:errcheck
190- <- chWait
227+ // Safety net: force kill the entire process group
228+ // (negative PID) since Setpgid puts the child and
229+ // any subprocesses it spawns into their own group.
230+ syscall .Kill (- cmd .Process .Pid , syscall .SIGKILL ) //nolint:errcheck
231+ <- cmdWait
191232 }
192233 }),
193234 }
0 commit comments