Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,8 @@ SUBDIRS:=ctriface taps misc profile
EXTRAGOARGS:=-v -race -cover
EXTRAGOARGS_NORACE:=-v
EXTRATESTFILES:=vhive_test.go stats.go vhive.go functions.go
# User-level page faults are temporarily disabled (gh-807)
# WITHUPF:=-upfTest
# WITHLAZY:=-lazyTest
WITHUPF:=
WITHLAZY:=
WITHUPF:=-upfTest
WITHLAZY:=-lazyTest
WITHSNAPSHOTS:=-snapshotsTest
CTRDLOGDIR:=/tmp/ctrd-logs

Expand All @@ -45,6 +42,11 @@ test-all: test-subdirs test-orch

test-orch: test test-man

debug:
./scripts/clean_fcctr.sh
sudo mkdir -m777 -p $(CTRDLOGDIR) && sudo env "PATH=$(PATH)" /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 1>$(CTRDLOGDIR)/fccd_orch_upf_log.out 2>$(CTRDLOGDIR)/fccd_orch_upf_log.err &
sudo env "PATH=$(PATH)" go test $(EXTRATESTFILES) -short $(EXTRAGOARGS) -args $(WITHSNAPSHOTS) $(WITHUPF)

test:
./scripts/clean_fcctr.sh
sudo mkdir -m777 -p $(CTRDLOGDIR) && sudo env "PATH=$(PATH)" /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 1>$(CTRDLOGDIR)/fccd_orch_noupf_log.out 2>$(CTRDLOGDIR)/fccd_orch_noupf_log.err &
Expand Down
4 changes: 2 additions & 2 deletions bin/containerd-shim-aws-firecracker
Git LFS file not shown
4 changes: 2 additions & 2 deletions bin/default-rootfs.img
Git LFS file not shown
2 changes: 1 addition & 1 deletion bin/firecracker
Git LFS file not shown
4 changes: 2 additions & 2 deletions bin/firecracker-containerd
Git LFS file not shown
4 changes: 2 additions & 2 deletions bin/firecracker-ctr
Git LFS file not shown
7 changes: 4 additions & 3 deletions cri/firecracker/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ import (
"context"
"errors"
"fmt"
"github.com/google/uuid"
"github.com/vhive-serverless/vhive/snapshotting"
"strconv"
"sync"
"sync/atomic"
"time"

"github.com/google/uuid"
"github.com/vhive-serverless/vhive/snapshotting"

log "github.com/sirupsen/logrus"
"github.com/vhive-serverless/vhive/ctriface"
)
Expand Down Expand Up @@ -181,7 +182,7 @@ func (c *coordinator) orchLoadInstance(ctx context.Context, snap *snapshotting.S
ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*30)
defer cancel()

resp, _, err := c.orch.LoadSnapshot(ctxTimeout, vmID, snap)
resp, _, err := c.orch.LoadSnapshot(ctxTimeout, vmID, vmID, snap)
if err != nil {
logger.WithError(err).Error("failed to load VM")
return nil, err
Expand Down
13 changes: 8 additions & 5 deletions ctriface/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@
EXTRAGOARGS:=-v -race -cover
EXTRATESTFILES:=iface_test.go iface.go orch_options.go orch.go
BENCHFILES:=bench_test.go iface.go orch_options.go orch.go
# User-level page faults are temporarily disabled (gh-807)
# WITHUPF:=-upf
# WITHLAZY:=-lazy
WITHUPF:=
WITHLAZY:=
WITHUPF:=-upf
WITHLAZY:=-lazy
GOBENCH:=-v -timeout 1500s
CTRDLOGDIR:=/tmp/ctrd-logs

debug:
./../scripts/clean_fcctr.sh
sudo mkdir -m777 -p $(CTRDLOGDIR) && sudo env "PATH=$(PATH)" /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 1>$(CTRDLOGDIR)/ctriface_log.out 2>$(CTRDLOGDIR)/ctriface_log.err &
sudo env "PATH=$(PATH)" go test $(EXTRATESTFILES) $(EXTRAGOARGS) -args $(WITHUPF)
./../scripts/clean_fcctr.sh

test:
./../scripts/clean_fcctr.sh
sudo mkdir -m777 -p $(CTRDLOGDIR) && sudo env "PATH=$(PATH)" /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 1>$(CTRDLOGDIR)/ctriface_log.out 2>$(CTRDLOGDIR)/ctriface_log.err &
Expand Down
79 changes: 33 additions & 46 deletions ctriface/failing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,62 +22,49 @@

package ctriface

import (
"context"
"os"
"testing"
"time"
// func TestStartSnapStop(t *testing.T) {
// // BROKEN BECAUSE StopVM does not work yet.
// // t.Skip("skipping failing test")
// log.SetFormatter(&log.TextFormatter{
// TimestampFormat: ctrdlog.RFC3339NanoFixed,
// FullTimestamp: true,
// })
// //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging

ctrdlog "github.com/containerd/containerd/log"
"github.com/containerd/containerd/namespaces"
log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
"github.com/vhive-serverless/vhive/snapshotting"
)
// log.SetOutput(os.Stdout)

func TestStartSnapStop(t *testing.T) {
// BROKEN BECAUSE StopVM does not work yet.
t.Skip("skipping failing test")
log.SetFormatter(&log.TextFormatter{
TimestampFormat: ctrdlog.RFC3339NanoFixed,
FullTimestamp: true,
})
//log.SetReportCaller(true) // FIXME: make sure it's false unless debugging
// log.SetLevel(log.DebugLevel)

log.SetOutput(os.Stdout)
// testTimeout := 120 * time.Second
// ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout)
// defer cancel()

log.SetLevel(log.DebugLevel)
// orch := NewOrchestrator("devmapper", "", WithTestModeOn(true))

testTimeout := 120 * time.Second
ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout)
defer cancel()
// vmID := "2"

orch := NewOrchestrator("devmapper", "", WithTestModeOn(true))
// _, _, err := orch.StartVM(ctx, vmID, testImageName)
// require.NoError(t, err, "Failed to start VM")

vmID := "2"
// err = orch.PauseVM(ctx, vmID)
// require.NoError(t, err, "Failed to pause VM")

_, _, err := orch.StartVM(ctx, vmID, testImageName)
require.NoError(t, err, "Failed to start VM")
// snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName)
// err = orch.CreateSnapshot(ctx, vmID, snap)
// require.NoError(t, err, "Failed to create snapshot of VM")

err = orch.PauseVM(ctx, vmID)
require.NoError(t, err, "Failed to pause VM")
// err = orch.StopSingleVM(ctx, vmID)
// require.NoError(t, err, "Failed to stop VM")

snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName)
err = orch.CreateSnapshot(ctx, vmID, snap)
require.NoError(t, err, "Failed to create snapshot of VM")
// _, _, err = orch.LoadSnapshot(ctx, "1", vmID, snap)
// require.NoError(t, err, "Failed to load snapshot of VM")

err = orch.StopSingleVM(ctx, vmID)
require.NoError(t, err, "Failed to stop VM")
// _, err = orch.ResumeVM(ctx, vmID)
// require.NoError(t, err, "Failed to resume VM")

_, _, err = orch.LoadSnapshot(ctx, vmID, snap)
require.NoError(t, err, "Failed to load snapshot of VM")
// err = orch.StopSingleVM(ctx, vmID)
// require.NoError(t, err, "Failed to stop VM")

_, err = orch.ResumeVM(ctx, vmID)
require.NoError(t, err, "Failed to resume VM")

err = orch.StopSingleVM(ctx, vmID)
require.NoError(t, err, "Failed to stop VM")

_ = snap.Cleanup()
orch.Cleanup()
}
// _ = snap.Cleanup()
// orch.Cleanup()
// }
62 changes: 48 additions & 14 deletions ctriface/iface.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ package ctriface

import (
"context"
"github.com/vhive-serverless/vhive/snapshotting"
"os"
"os/exec"
"path/filepath"
Expand All @@ -33,6 +32,8 @@ import (
"syscall"
"time"

"github.com/vhive-serverless/vhive/snapshotting"

log "github.com/sirupsen/logrus"

"github.com/containerd/containerd"
Expand Down Expand Up @@ -64,6 +65,8 @@ type StartVMResponse struct {

const (
testImageName = "ghcr.io/ease-lab/helloworld:var_workload"
fileBackend = "File"
uffdBackend = "Uffd"
)

// StartVM Boots a VM if it does not exist
Expand Down Expand Up @@ -205,17 +208,18 @@ func (o *Orchestrator) StartVMWithEnvironment(ctx context.Context, vmID, imageNa
if o.GetUPFEnabled() {
logger.Debug("Registering VM with the memory manager")

logger.Debugf("TEST (startWithEnv): current vmID used to registerVM is %v", vmID)
stateCfg := manager.SnapshotStateCfg{
VMID: vmID,
GuestMemPath: o.getMemoryFile(vmID),
BaseDir: o.getVMBaseDir(vmID),
GuestMemSize: int(conf.MachineCfg.MemSizeMib) * 1024 * 1024,
IsLazyMode: o.isLazyMode,
VMMStatePath: o.getSnapshotFile(vmID),
WorkingSetPath: o.getWorkingSetFile(vmID),
// FIXME (gh-807)
//InstanceSockAddr: resp.UPFSockPath,
VMID: vmID,
GuestMemPath: o.getMemoryFile(vmID),
BaseDir: o.getVMBaseDir(vmID),
GuestMemSize: int(conf.MachineCfg.MemSizeMib) * 1024 * 1024,
IsLazyMode: o.isLazyMode,
VMMStatePath: o.getSnapshotFile(vmID),
WorkingSetPath: o.getWorkingSetFile(vmID),
InstanceSockAddr: o.uffdSockAddr,
}
logger.Debugf("TEST: show to-reg snapStat: %+v", stateCfg)
if err := o.memoryManager.RegisterVM(stateCfg); err != nil {
return nil, nil, errors.Wrap(err, "failed to register VM with memory manager")
// NOTE (Plamen): Potentially need a defer(DeregisteVM) here if RegisterVM is not last to execute
Expand Down Expand Up @@ -447,7 +451,7 @@ func (o *Orchestrator) CreateSnapshot(ctx context.Context, vmID string, snap *sn
}

// LoadSnapshot Loads a snapshot of a VM
func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string, snap *snapshotting.Snapshot) (_ *StartVMResponse, _ *metrics.Metric, retErr error) {
func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID string, snap *snapshotting.Snapshot) (_ *StartVMResponse, _ *metrics.Metric, retErr error) {
var (
loadSnapshotMetric *metrics.Metric = metrics.NewMetric()
tStart time.Time
Expand Down Expand Up @@ -494,11 +498,22 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string, snap *snap
conf := o.getVMConfig(vm)
conf.LoadSnapshot = true
conf.SnapshotPath = snap.GetSnapshotFilePath()
conf.MemFilePath = snap.GetMemFilePath()
conf.ContainerSnapshotPath = containerSnap.GetDevicePath()
conf.MemBackend = &proto.MemoryBackend{
BackendType: fileBackend,
BackendPath: snap.GetMemFilePath(),
}

if o.GetUPFEnabled() {
if err := o.memoryManager.FetchState(vmID); err != nil {
logger.Debug("TEST: UPF is enabled")
conf.MemBackend.BackendType = uffdBackend
conf.MemBackend.BackendPath = o.uffdSockAddr
logger.Debugf("TEST: the upf socket: %s", conf.MemBackend.BackendPath)
if err != nil {
return nil, nil, errors.Wrapf(err, "failed to get UPF socket path for uffd backend")
}

if err := o.memoryManager.FetchState(originVmID); err != nil {
return nil, nil, err
}
}
Expand All @@ -508,7 +523,8 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string, snap *snap
go func() {
defer close(loadDone)

if _, loadErr = o.fcClient.CreateVM(ctx, conf); loadErr != nil {
_, loadErr := o.fcClient.CreateVM(ctx, conf)
if loadErr != nil {
logger.Error("Failed to load snapshot of the VM: ", loadErr)
logger.Errorf("snapFilePath: %s, memFilePath: %s, newSnapshotPath: %s", snap.GetSnapshotFilePath(), snap.GetMemFilePath(), containerSnap.GetDevicePath())
files, err := os.ReadDir(filepath.Dir(snap.GetSnapshotFilePath()))
Expand Down Expand Up @@ -536,7 +552,25 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string, snap *snap
}
}()

logger.Debug("TEST: CreatVM request sent")
if o.GetUPFEnabled() {

logger.Debug("TEST: Registering VM with the memory manager")

stateCfg := manager.SnapshotStateCfg{
VMID: vmID,
GuestMemPath: o.getMemoryFile(vmID),
BaseDir: o.getVMBaseDir(vmID),
GuestMemSize: int(conf.MachineCfg.MemSizeMib) * 1024 * 1024,
IsLazyMode: o.isLazyMode,
VMMStatePath: o.getSnapshotFile(vmID),
WorkingSetPath: o.getWorkingSetFile(vmID),
InstanceSockAddr: o.uffdSockAddr,
}
if err := o.memoryManager.RegisterVM(stateCfg); err != nil {
logger.Error(err, "failed to register new VM with memory manager")
}

if activateErr = o.memoryManager.Activate(vmID); activateErr != nil {
logger.Warn("Failed to activate VM in the memory manager", activateErr)
}
Expand Down
56 changes: 50 additions & 6 deletions ctriface/iface_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,57 @@ var (

func TestMain(m *testing.M) {
flag.Parse()
os.Exit(m.Run())
}

if *isUPFEnabled {
log.Error("User-level page faults are temporarily disabled (gh-807)")
os.Exit(-1)
}
// Test for ctriface uffd feature
func TestStartSnapStop(t *testing.T) {
// BROKEN BECAUSE StopVM does not work yet.
// t.Skip("skipping failing test")
log.SetFormatter(&log.TextFormatter{
TimestampFormat: ctrdlog.RFC3339NanoFixed,
FullTimestamp: true,
})
//log.SetReportCaller(true) // FIXME: make sure it's false unless debugging

os.Exit(m.Run())
log.SetOutput(os.Stdout)

log.SetLevel(log.DebugLevel)

testTimeout := 120 * time.Second
ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout)
defer cancel()

orch := NewOrchestrator("devmapper", "", WithTestModeOn(true))

vmID := "2"

_, _, err := orch.StartVM(ctx, vmID, testImageName)
require.NoError(t, err, "Failed to start VM")

err = orch.PauseVM(ctx, vmID)
require.NoError(t, err, "Failed to pause VM")

snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName)
err = orch.CreateSnapshot(ctx, vmID, snap)
require.NoError(t, err, "Failed to create snapshot of VM")

err = orch.StopSingleVM(ctx, vmID)
require.NoError(t, err, "Failed to stop VM")

_, _, err = orch.LoadSnapshot(ctx, "1", vmID, snap)
require.NoError(t, err, "Failed to load snapshot of VM")

_, err = orch.ResumeVM(ctx, vmID)
require.NoError(t, err, "Failed to resume VM")

time.Sleep(30 * time.Second)

err = orch.StopSingleVM(ctx, vmID)
require.NoError(t, err, "Failed to stop VM")

_ = snap.Cleanup()
orch.Cleanup()
}

func TestPauseSnapResume(t *testing.T) {
Expand All @@ -65,7 +109,7 @@ func TestPauseSnapResume(t *testing.T) {

log.SetOutput(os.Stdout)

log.SetLevel(log.InfoLevel)
log.SetLevel(log.DebugLevel)

testTimeout := 120 * time.Second
ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout)
Expand Down
Loading