@@ -242,22 +242,55 @@ func createTapDevice(ctx context.Context, tapName string) error {
242
242
func TestMultipleVMs_Isolated (t * testing.T ) {
243
243
integtest .Prepare (t )
244
244
245
- // This test starts multiple VMs and some may hit firecracker-containerd's
246
- // default timeout. So overriding the timeout to wait longer.
247
- // One hour should be enough to start a VM, regardless of the load of
248
- // the underlying host.
249
- const createVMTimeout = time .Hour
250
-
251
- netns , err := ns .GetCurrentNS ()
252
- require .NoError (t , err , "failed to get a namespace" )
245
+ var err error
253
246
254
247
// numberOfVmsEnvName = NUMBER_OF_VMS ENV and is configurable from buildkite
255
248
numberOfVms := defaultNumberOfVms
256
249
if str := os .Getenv (numberOfVmsEnvName ); str != "" {
257
250
numberOfVms , err = strconv .Atoi (str )
258
251
require .NoError (t , err , "failed to get NUMBER_OF_VMS env" )
259
252
}
260
- t .Logf ("TestMultipleVMs_Isolated: will run %d vm's" , numberOfVms )
253
+ t .Logf ("TestMultipleVMs_Isolated: will run up to %d VMs" , numberOfVms )
254
+
255
+ // We should be able to run 10 VMs without any issues.
256
+ if numberOfVms <= 10 {
257
+ testMultipleVMs (t , 10 )
258
+ return
259
+ }
260
+
261
+ // We have issues running 100 VMs (see #581).
262
+ // Incrementally increase the number of VMs to find the breaking point.
263
+ for i := 10 ; i <= numberOfVms ; i += 10 {
264
+ success := t .Run (fmt .Sprintf ("VMs=%d" , i ), func (t * testing.T ) {
265
+ testMultipleVMs (t , i )
266
+ })
267
+ if ! success {
268
+ // If running N VMs doesn't work, no point to go further.
269
+ return
270
+ }
271
+ }
272
+ }
273
+
274
+ type Event int
275
+
276
+ const (
277
+ Created Event = iota
278
+ Stopped
279
+ )
280
+
281
+ func testMultipleVMs (t * testing.T , count int ) {
282
+ // This test starts multiple VMs and some may hit firecracker-containerd's
283
+ // default timeout. So overriding the timeout to wait longer.
284
+ // One hour should be enough to start a VM, regardless of the load of
285
+ // the underlying host.
286
+ const createVMTimeout = 1 * time .Hour
287
+
288
+ // Apparently writing a lot from Firecracker's serial console blocks VMs.
289
+ // https://github.com/firecracker-microvm/firecracker/blob/v1.1.0/docs/prod-host-setup.md
290
+ kernelArgs := integtest .DefaultRuntimeConfig .KernelArgs + " 8250.nr_uarts=0 quiet loglevel=1"
291
+
292
+ netns , err := ns .GetCurrentNS ()
293
+ require .NoError (t , err , "failed to get a namespace" )
261
294
262
295
tapPrefix := os .Getenv (tapPrefixEnvName )
263
296
@@ -278,6 +311,7 @@ func TestMultipleVMs_Isolated(t *testing.T) {
278
311
},
279
312
{
280
313
MaxContainers : 3 ,
314
+
281
315
JailerConfig : & proto.JailerConfig {
282
316
UID : 300000 ,
283
317
GID : 300000 ,
@@ -299,39 +333,56 @@ func TestMultipleVMs_Isolated(t *testing.T) {
299
333
cfg , err := config .LoadConfig ("" )
300
334
require .NoError (t , err , "failed to load config" )
301
335
336
+ eventCh := make (chan Event )
337
+
338
+ // Creating tap devices without goroutines somehow stabilize this test.
339
+ var devices []string
340
+
341
+ defer func () {
342
+ for _ , dev := range devices {
343
+ err := deleteTapDevice (testCtx , dev )
344
+ assert .NoError (t , err )
345
+ }
346
+ }()
347
+
348
+ for i := 0 ; i < count ; i ++ {
349
+ tapName := fmt .Sprintf ("%stap%d" , tapPrefix , i )
350
+ err := createTapDevice (testCtx , tapName )
351
+ if err != nil {
352
+ t .Errorf ("failed to create %q: %s" , tapName , err )
353
+ return
354
+ }
355
+ devices = append (devices , tapName )
356
+ }
357
+
302
358
// This test spawns separate VMs in parallel and ensures containers are spawned within each expected VM. It asserts each
303
359
// container ends up in the right VM by assigning each VM a network device with a unique mac address and having each container
304
360
// print the mac address it sees inside its VM.
305
361
vmEg , vmEgCtx := errgroup .WithContext (testCtx )
306
- for i := 0 ; i < numberOfVms ; i ++ {
362
+ for i , device := range devices {
307
363
caseTypeNumber := i % len (cases )
308
364
vmID := i
365
+ device := device
309
366
c := cases [caseTypeNumber ]
310
367
311
368
f := func (ctx context.Context ) error {
312
369
containerCount := c .MaxContainers
313
370
jailerConfig := c .JailerConfig
314
371
315
- tapName := fmt .Sprintf ("%stap%d" , tapPrefix , vmID )
316
- err := createTapDevice (ctx , tapName )
317
- if err != nil {
318
- return err
319
- }
320
- defer deleteTapDevice (ctx , tapName )
321
-
322
372
rootfsPath := cfg .RootDrive
323
373
324
374
vmIDStr := strconv .Itoa (vmID )
325
375
req := & proto.CreateVMRequest {
326
- VMID : vmIDStr ,
376
+ KernelArgs : kernelArgs ,
377
+ VMID : vmIDStr ,
327
378
RootDrive : & proto.FirecrackerRootDrive {
328
379
HostPath : rootfsPath ,
329
380
},
330
381
NetworkInterfaces : []* proto.FirecrackerNetworkInterface {
331
382
{
332
383
AllowMMDS : true ,
333
384
StaticConfig : & proto.StaticNetworkConfiguration {
334
- HostDevName : tapName ,
385
+ HostDevName : device ,
335
386
MacAddress : vmIDtoMacAddr (uint (vmID )),
336
387
},
337
388
},
@@ -349,6 +400,7 @@ func TestMultipleVMs_Isolated(t *testing.T) {
349
400
if err != nil {
350
401
return err
351
402
}
403
+ defer fcClient .Close ()
352
404
353
405
resp , createVMErr := fcClient .CreateVM (ctx , req )
354
406
if createVMErr != nil {
@@ -365,6 +417,7 @@ func TestMultipleVMs_Isolated(t *testing.T) {
365
417
createVMErr ,
366
418
)
367
419
}
420
+ eventCh <- Created
368
421
369
422
containerEg , containerCtx := errgroup .WithContext (vmEgCtx )
370
423
for containerID := 0 ; containerID < int (containerCount ); containerID ++ {
@@ -425,10 +478,8 @@ func TestMultipleVMs_Isolated(t *testing.T) {
425
478
}
426
479
427
480
_ , err = fcClient .StopVM (ctx , & proto.StopVMRequest {VMID : strconv .Itoa (vmID ), TimeoutSeconds : 5 })
428
- if err != nil {
429
- return err
430
- }
431
- return nil
481
+ eventCh <- Stopped
482
+ return err
432
483
}
433
484
434
485
vmEg .Go (func () error {
@@ -440,8 +491,26 @@ func TestMultipleVMs_Isolated(t *testing.T) {
440
491
})
441
492
}
442
493
443
- err = vmEg .Wait ()
444
- require .NoError (t , err )
494
+ ticker := time .NewTicker (10 * time .Second )
495
+ defer ticker .Stop ()
496
+
497
+ var created int
498
+ for stopped := 0 ; stopped < count ; {
499
+ select {
500
+ case <- vmEgCtx .Done ():
501
+ require .NoError (t , vmEg .Wait ())
502
+ return
503
+ case e := <- eventCh :
504
+ switch e {
505
+ case Created :
506
+ created ++
507
+ case Stopped :
508
+ stopped ++
509
+ }
510
+ case <- ticker .C :
511
+ t .Logf ("created=%d/%d stopped=%d/%d" , created , count , stopped , count )
512
+ }
513
+ }
445
514
}
446
515
447
516
func testMultipleExecs (
@@ -478,7 +547,7 @@ func testMultipleExecs(
478
547
if err != nil {
479
548
return err
480
549
}
481
- defer newContainer .Delete (ctx )
550
+ defer newContainer .Delete (ctx , containerd . WithSnapshotCleanup )
482
551
483
552
var taskStdout bytes.Buffer
484
553
var taskStderr bytes.Buffer
0 commit comments