@@ -35,20 +35,19 @@ package integration
3535import (
3636 "bufio"
3737 "bytes"
38- "encoding/json"
3938 "fmt"
40- "io"
4139 "math"
4240 "os"
4341 "regexp"
44- "runtime"
4542 "strconv"
4643 "strings"
44+ "sync"
4745 "testing"
4846 "time"
4947
5048 "github.com/awslabs/soci-snapshotter/soci/store"
5149 shell "github.com/awslabs/soci-snapshotter/util/dockershell"
50+ "github.com/awslabs/soci-snapshotter/util/testutil"
5251)
5352
5453func TestRunWithDefaultConfig (t * testing.T ) {
@@ -221,15 +220,7 @@ type retryConfig struct {
221220// TestNetworkRetry runs a container, disables network access to the remote image, asks the container
222221// to do something requiring the remote image, waits for some/all requests to fail, enables the network,
223222// confirms retries and success/failure
224-
225- // TODO: Refactor test
226223func TestNetworkRetry (t * testing.T ) {
227- // This test makes the testing suite hang on ARM.
228- // Disabling for now till we can refactor this.
229- if runtime .GOARCH == "arm64" {
230- return
231- }
232-
233224 const containerImage = alpineImage
234225
235226 tests := []struct {
@@ -247,12 +238,12 @@ func TestNetworkRetry(t *testing.T) {
247238 },
248239 },
249240 {
250- name : "1s network interruption, no retries allowed, failure" ,
241+ name : "6s network interruption, no retries allowed, failure" ,
251242 config : retryConfig {
252243 maxRetries : - 1 ,
253244 minWaitMsec : 0 ,
254245 maxWaitMsec : 0 ,
255- networkDisableMsec : 1000 ,
246+ networkDisableMsec : 6000 ,
256247 expectedSuccess : false ,
257248 },
258249 },
@@ -267,12 +258,12 @@ func TestNetworkRetry(t *testing.T) {
267258 },
268259 },
269260 {
270- name : "10s network interruption, ~6-7s retries allowed, failure" ,
261+ name : "12s network interruption, ~6-7s retries allowed, failure" ,
271262 config : retryConfig {
272263 maxRetries : 1 ,
273264 minWaitMsec : 100 ,
274265 maxWaitMsec : 1600 ,
275- networkDisableMsec : 10000 ,
266+ networkDisableMsec : 12000 ,
276267 expectedSuccess : false ,
277268 },
278269 },
@@ -296,135 +287,81 @@ max_wait_msec = ` + strconv.FormatInt(tt.config.maxWaitMsec, 10) + `
296287disable = true
297288`
298289
299- m := rebootContainerd (t , sh , getContainerdConfigToml (t , false ), getSnapshotterConfigToml (t , false , config ))
290+ rebootContainerd (t , sh , getContainerdConfigToml (t , false ), getSnapshotterConfigToml (t , false , config ))
300291 // Mirror image
301292 copyImage (sh , dockerhub (containerImage ), regConfig .mirror (containerImage ))
302- // Pull image, create SOCI index with all layers and small (100kiB) spans
303- indexDigest := buildIndex (sh , regConfig .mirror (containerImage ), withMinLayerSize (0 ), withSpanSize (100 * 1024 ))
304- sh .X (append (imagePullCmd , "--soci-index-digest" , indexDigest , regConfig .mirror (containerImage ).ref )... )
305-
306- // Run the container
307293 image := regConfig .mirror (containerImage ).ref
308- sh .X (append (runSociCmd , "--name" , "test-container" , "-d" , image , "sleep" , "infinity" )... )
309-
310- sh .X ("apt-get" , "--no-install-recommends" , "install" , "-qy" , "iptables" )
311-
312- // TODO: Wait for the container to be up and running
294+ indexDigest := buildIndex (sh , regConfig .mirror (containerImage ), withMinLayerSize (0 ), withSpanSize (1 << 20 ))
295+ sh .X ("soci" , "push" , "--user" , regConfig .creds (), regConfig .mirror (containerImage ).ref )
313296
297+ rebootContainerd (t , sh , getContainerdConfigToml (t , false ), getSnapshotterConfigToml (t , false , config ))
298+ // Re-pull image from our local registry mirror
299+ sh .X (append (imagePullCmd , "--soci-index-digest" , indexDigest , regConfig .mirror (containerImage ).ref )... )
300+ sh .X ("apt-get" , "-qq" , "--no-install-recommends" , "install" , "-y" , "iptables" )
314301 // Block network access to the registry
315302 if tt .config .networkDisableMsec > 0 {
316303 sh .X ("iptables" , "-A" , "OUTPUT" , "-d" , registryHostIP , "-j" , "DROP" )
317304 }
318305
319- // Do something in the container that should work without network access
320- commandSucceedStdout , _ , err := sh .R ("nerdctl" , "exec" , "test-container " , "sh" , "-c" , "times" )
306+ // Read a file to trigger a synchronous network request.
307+ cmdStdout , cmdStderr , err := sh .R (append ( runSociCmd , image , "cat " , "/etc/hosts" ) ... )
321308 if err != nil {
322- t .Fatalf ("attempt to run task without network access failed: %s" , err )
323- }
324-
325- type ErrorLogLine struct {
326- Error string `json:"error"`
327- Msg string `json:"msg"`
309+ t .Fatalf ("attempt to run task requiring network access failed: %s" , err )
328310 }
329311
330- gaveUpChannel := make (chan bool , 1 )
331- defer close ( gaveUpChannel )
312+ successChannel := make (chan string , 1 )
313+ failureChannel := make ( chan string , 1 )
332314
333- monitorGaveUp := func (rawL string ) {
334- if i := strings .Index (rawL , "{" ); i > 0 {
335- rawL = rawL [i :] // trim garbage chars; expects "{...}"-styled JSON log
336- }
337- var logLine ErrorLogLine
338- if err := json .Unmarshal ([]byte (rawL ), & logLine ); err == nil {
339- if logLine .Msg == "statFile error" && strings .Contains (logLine .Error , "giving up after" ) {
340- gaveUpChannel <- true
341- return
342- }
315+ listener := func (c chan string ) func (string ) {
316+ var once sync.Once
317+ return func (rawLog string ) {
318+ once .Do (func () { c <- rawLog })
343319 }
344320 }
345321
346- m .Add ("retry" , monitorGaveUp )
347- defer m .Remove ("retry" )
348-
349- // Do something in the container to access un-fetched spans, requiring network access
350- commandNetworkStdout , _ , err := sh .R ("nerdctl" , "exec" , "test-container" , "cat" , "/etc/hosts" )
351- if err != nil {
352- t .Fatalf ("attempt to run task requiring network access failed: %s" , err )
353- }
354-
355- // Wait with network disabled
356- time .Sleep (time .Duration (tt .config .networkDisableMsec ) * time .Millisecond )
322+ // There's no way to split stdout and stderr with LogMonitor,
323+ // and sh.R could orphan goroutines, so make new LogMonitors
324+ // for stdout and stderr and monitor them separately
325+ r := testutil .NewTestingReporter (t )
357326
358- // Short wait to allow commands to complete
359- time .Sleep (time .Duration (1000 ) * time .Millisecond )
327+ stdoutLogMonitor := testutil .NewLogMonitor (r , cmdStdout , strings .NewReader ("" ))
328+ stdoutLogMonitor .Add ("listener" , listener (successChannel ))
329+ defer stdoutLogMonitor .Remove ("listener" )
360330
361- // Confirm first command succeeded while network was down
362- buf := make ([]byte , 100 )
363- if _ , err = commandSucceedStdout .Read (buf ); err != nil {
364- t .Fatalf ("read from expected successful task output failed: %s" , err )
365- }
366- if ! strings .Contains (string (buf ), "s " ) { // `times` output looks like "0m0.03s 0m0.05s"
367- t .Fatalf ("expected successful task produced unexpected output: %s" , string (buf ))
368- }
331+ stderrLogMonitor := testutil .NewLogMonitor (r , strings .NewReader ("" ), cmdStderr )
332+ stderrLogMonitor .Add ("listener" , listener (failureChannel ))
333+ defer stderrLogMonitor .Remove ("listener" )
369334
370- // async read from command_network_stdout
371- commandNetworkStdoutChannel := make (chan []byte )
372- commandNetworkErrChannel := make (chan error )
373- go func () {
374- defer close (commandNetworkStdoutChannel )
375- defer close (commandNetworkErrChannel )
376- var b []byte
377- if _ , err := commandNetworkStdout .Read (b ); err != nil && err != io .EOF {
378- commandNetworkErrChannel <- fmt .Errorf ("read from network bound task output failed: %s" , err )
379- return
380- }
381- commandNetworkStdoutChannel <- b
382- if err == io .EOF {
383- commandNetworkErrChannel <- fmt .Errorf ("read from network bound task output encountered EOF" )
384- return
385- }
386- }()
335+ // Wait with network disabled. At this point the snapshotter should be
336+ // retrying.
337+ time .Sleep (time .Duration (tt .config .networkDisableMsec ) * time .Millisecond )
387338
339+ // Restore network access
388340 if tt .config .networkDisableMsec > 0 {
389-
390- // Confirm second command has not succeeded while network was down
391- select {
392- case err := <- commandNetworkErrChannel :
393- t .Fatal (err )
394- case data := <- commandNetworkStdoutChannel :
395- if len (data ) > 0 {
396- t .Fatalf ("network bound task produced unexpected output: %s" , string (data ))
397- }
398- case <- time .After (100 * time .Millisecond ):
399- }
400-
401341 // Restore access to the registry and image
402342 sh .X ("iptables" , "-D" , "OUTPUT" , "-d" , registryHostIP , "-j" , "DROP" )
403-
404343 // Wait with network enabled, so a final retry has a chance to succeed
405344 time .Sleep (2 * time .Millisecond * time .Duration (math .Min (
406345 float64 (tt .config .maxWaitMsec ),
407346 math .Pow (2 , float64 (tt .config .maxRetries ))* float64 (tt .config .minWaitMsec ),
408347 )))
409348 }
410349
411- // Confirm whether second command has succeeded with network restored
412-
413350 select {
414- case gaveUp := <- gaveUpChannel :
415- if tt .config .expectedSuccess && gaveUp {
416- t .Fatal ("retries gave up despite test expecting retry success" )
351+ case data := <- failureChannel :
352+ if tt .config .expectedSuccess {
353+ t .Fatalf ("expected Read request to succeed; got data in stderr: %s" , data )
354+ } else if len (successChannel ) > 0 {
355+ t .Fatalf ("expected Read request to fail; got data in (stdout, stderr) : (%s, %s)" , data , <- successChannel )
417356 }
418- case data := <- commandNetworkStdoutChannel :
357+ case data := <- successChannel :
419358 if ! tt .config .expectedSuccess {
420- if len (data ) > 0 {
421- t .Fatalf ("network bound task produced unexpected output: %s" , string (data ))
422- }
423- }
424- case <- time .After (100 * time .Millisecond ):
425- if tt .config .expectedSuccess {
426- t .Fatal ("network bound task produced no output when expecting success" )
359+ t .Fatalf ("expected Read request to fail; got data in stdout: %s" , data )
360+ } else if len (failureChannel ) > 0 {
361+ t .Fatalf ("expected Read request to succeed; got data in (stdout, stderr) : (%s, %s)" , data , <- failureChannel )
427362 }
363+ case <- time .After (15 * time .Second ):
364+ t .Fatal ("neither stdout or stderr has been written to" )
428365 }
429366 })
430367 }
0 commit comments