Skip to content

Commit 43fcf55

Browse files
committed
Add support for remote Firecracker snapshots
- When remote snapshots are enabled, after committing the snapshot, it is uploaded to a MinIO instance. When loading from a snapshot, if it is not available locally, it checks if it is available in MinIO and fetches it. - Remote Firecracker snapshots are currently only supported using the Stargz snapshotter (there are some container corruption issues when using devmapper). Signed-off-by: André Jesus <[email protected]>
1 parent 5b9627f commit 43fcf55

File tree

18 files changed

+854
-100
lines changed

18 files changed

+854
-100
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ EXTRATESTFILES:=vhive_test.go stats.go vhive.go functions.go
2929
# WITHLAZY:=-lazyTest
3030
WITHUPF:=
3131
WITHLAZY:=
32-
WITHSNAPSHOTS:=-snapshotsTest
32+
WITHSNAPSHOTS:=-snapshotsTest 'local'
3333
CTRDLOGDIR:=/tmp/ctrd-logs
3434

3535
vhive: proto

bench_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func TestBenchParallelServe(t *testing.T) {
6363
imageName, isPresent := images[*funcName]
6464
require.True(t, isPresent, "Function is not supported")
6565

66-
funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst)
66+
funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst, "disabled")
6767

6868
createResultsDir()
6969

@@ -136,7 +136,7 @@ func TestBenchWarmServe(t *testing.T) {
136136
imageName, isPresent := images[*funcName]
137137
require.True(t, isPresent, "Function is not supported")
138138

139-
funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst)
139+
funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst, "disabled")
140140

141141
createResultsDir()
142142

@@ -201,7 +201,7 @@ func TestBenchServe(t *testing.T) {
201201
imageName, isPresent := images[*funcName]
202202
require.True(t, isPresent, "Function is not supported")
203203

204-
funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst)
204+
funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst, "disabled")
205205

206206
createResultsDir()
207207

configs/.wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ SMI
465465
sms
466466
SMT
467467
snapshotted
468+
snapshotters
468469
snapshotting
469470
SoC
470471
SOCACHE

cri/firecracker/coordinator.go

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ import (
3535

3636
log "github.com/sirupsen/logrus"
3737
"github.com/vhive-serverless/vhive/ctriface"
38+
39+
"github.com/minio/minio-go/v7"
40+
"github.com/minio/minio-go/v7/pkg/credentials"
3841
)
3942

4043
type coordinator struct {
@@ -66,11 +69,32 @@ func newFirecrackerCoordinator(orch *ctriface.Orchestrator, opts ...coordinatorO
6669
opt(c)
6770
}
6871

72+
snapshotsMode := "disabled"
6973
snapshotsDir := "/fccd/test/snapshots"
74+
75+
var minioClient *minio.Client
76+
snapshotsBucket := "snapshots"
77+
minioAddr := "localhost:50052"
78+
minioAccessKey := "minio"
79+
minioSecretKey := "minio123"
80+
7081
if !c.withoutOrchestrator {
82+
snapshotsMode = orch.GetSnapshotMode()
7183
snapshotsDir = orch.GetSnapshotsDir()
84+
snapshotsBucket = orch.GetSnapshotsBucket()
85+
minioAddr = orch.GetMinioAddr()
86+
minioAccessKey = orch.GetMinioAccessKey()
87+
minioSecretKey = orch.GetMinioSecretKey()
7288
}
73-
c.snapshotManager = snapshotting.NewSnapshotManager(snapshotsDir)
89+
90+
if snapshotsMode == "remote" {
91+
minioClient, _ = minio.New(minioAddr, &minio.Options{
92+
Creds: credentials.NewStaticV4(minioAccessKey, minioSecretKey, ""),
93+
Secure: false,
94+
})
95+
}
96+
97+
c.snapshotManager = snapshotting.NewSnapshotManager(snapshotsDir, snapshotsBucket, minioClient)
7498

7599
return c
76100
}
@@ -80,13 +104,27 @@ func (c *coordinator) startVM(ctx context.Context, image, revision string) (*fun
80104
}
81105

82106
func (c *coordinator) startVMWithEnvironment(ctx context.Context, image, revision string, environment []string) (*funcInstance, error) {
83-
if c.orch != nil && c.orch.GetSnapshotsEnabled() {
84-
// Check if snapshot is available
85-
if snap, err := c.snapshotManager.AcquireSnapshot(revision); err == nil {
107+
if c.orch != nil && c.orch.GetSnapshotMode() != "disabled" {
108+
if snap, err := c.snapshotManager.AcquireSnapshot(revision); snap == nil {
109+
log.Printf("failed to acquire snapshot: %v", err)
110+
if c.orch.GetSnapshotMode() == "remote" {
111+
log.Printf("downloading snapshot from remote storage")
112+
if _, err := c.snapshotManager.DownloadSnapshot(revision); err != nil {
113+
log.Printf("failed to download snapshot from remote storage: %v", err)
114+
_ = c.snapshotManager.DeleteSnapshot(revision) // TODO only for testing. Remove later
115+
} else {
116+
log.Printf("downloaded snapshot from remote storage")
117+
}
118+
}
119+
}
120+
121+
if snap, _ := c.snapshotManager.AcquireSnapshot(revision); snap != nil {
122+
log.Printf("loading snapshot %s", snap.GetId())
86123
return c.orchLoadInstance(ctx, snap)
87124
}
88125
}
89126

127+
log.Printf("creating fresh instance")
90128
return c.orchStartVM(ctx, image, revision, environment)
91129
}
92130

@@ -102,7 +140,7 @@ func (c *coordinator) stopVM(ctx context.Context, containerID string) error {
102140
return nil
103141
}
104142

105-
if c.orch != nil && c.orch.GetSnapshotsEnabled() && !fi.SnapBooted {
143+
if c.orch != nil && c.orch.GetSnapshotMode() != "disabled" && !fi.SnapBooted {
106144
err := c.orchCreateSnapshot(ctx, fi)
107145
if err != nil {
108146
log.Printf("Err creating snapshot %s\n", err)
@@ -199,6 +237,7 @@ func (c *coordinator) orchLoadInstance(ctx context.Context, snap *snapshotting.S
199237

200238
func (c *coordinator) orchCreateSnapshot(ctx context.Context, fi *funcInstance) error {
201239
var err error
240+
log.Printf("creating snapshot for %s\n", fi.Revision)
202241

203242
snap, err := c.snapshotManager.InitSnapshot(fi.Revision, fi.Image)
204243
if err != nil {
@@ -230,11 +269,23 @@ func (c *coordinator) orchCreateSnapshot(ctx context.Context, fi *funcInstance)
230269
}
231270
}
232271

272+
if err := snap.SerializeSnapInfo(); err != nil {
273+
fi.Logger.WithError(err).Error("failed to serialize snapshot info")
274+
return err
275+
}
276+
233277
if err := c.snapshotManager.CommitSnapshot(fi.Revision); err != nil {
234278
fi.Logger.WithError(err).Error("failed to commit snapshot")
235279
return err
236280
}
237281

282+
if c.orch.GetSnapshotMode() == "remote" {
283+
fi.Logger.Debug("uploading snapshot to remote storage")
284+
if err := c.snapshotManager.UploadSnapshot(fi.Revision); err != nil {
285+
fi.Logger.WithError(err).Error("failed to upload snapshot")
286+
}
287+
}
288+
238289
return nil
239290
}
240291

ctriface/orch.go

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -98,16 +98,21 @@ type Orchestrator struct {
9898
imageManager *image.ImageManager
9999
dockerCredentials DockerCredentials
100100
// store *skv.KVStore
101-
snapshotsEnabled bool
102-
isUPFEnabled bool
103-
isLazyMode bool
104-
snapshotsDir string
105-
isMetricsMode bool
106-
netPoolSize int
101+
snapshotMode string
102+
isUPFEnabled bool
103+
isLazyMode bool
104+
snapshotsDir string
105+
snapshotsBucket string
106+
isMetricsMode bool
107+
netPoolSize int
107108

108109
vethPrefix string
109110
clonePrefix string
110111

112+
minioAddr string
113+
minioAccessKey string
114+
minioSecretKey string
115+
111116
memoryManager *manager.MemoryManager
112117
}
113118

@@ -119,9 +124,13 @@ func NewOrchestrator(snapshotter, hostIface string, opts ...OrchestratorOption)
119124
o.cachedImages = make(map[string]containerd.Image)
120125
o.snapshotter = snapshotter
121126
o.snapshotsDir = "/fccd/snapshots"
127+
o.snapshotsBucket = "snapshots"
122128
o.netPoolSize = 10
123129
o.vethPrefix = "172.17"
124130
o.clonePrefix = "172.18"
131+
o.minioAddr = "10.96.0.46:9000"
132+
o.minioAccessKey = "minio"
133+
o.minioSecretKey = "minio123"
125134

126135
for _, opt := range opts {
127136
opt(o)
@@ -187,9 +196,9 @@ func (o *Orchestrator) Cleanup() {
187196
}
188197
}
189198

190-
// GetSnapshotsEnabled Returns the snapshots mode of the orchestrator
191-
func (o *Orchestrator) GetSnapshotsEnabled() bool {
192-
return o.snapshotsEnabled
199+
// GetSnapshotMode Returns the snapshots mode of the orchestrator
200+
func (o *Orchestrator) GetSnapshotMode() string {
201+
return o.snapshotMode
193202
}
194203

195204
// GetUPFEnabled Returns the UPF mode of the orchestrator
@@ -252,6 +261,27 @@ func (o *Orchestrator) GetDockerCredentials() string {
252261
return string(data)
253262
}
254263

264+
// GetSnapshotsBucket returns the S3 bucket name used by the orchestrator for storing remote snapshots.
265+
func (o *Orchestrator) GetSnapshotsBucket() string {
266+
return o.snapshotsBucket
267+
}
268+
269+
// GetMinioAddr returns the address (endpoint) of the MinIO server used by the orchestrator.
270+
func (o *Orchestrator) GetMinioAddr() string {
271+
return o.minioAddr
272+
}
273+
274+
// GetMinioAccessKey returns the access key used to authenticate with the MinIO server.
275+
func (o *Orchestrator) GetMinioAccessKey() string {
276+
return o.minioAccessKey
277+
}
278+
279+
// GetMinioSecretKey returns the secret key used to authenticate with the MinIO server.
280+
// This should be handled securely and never exposed in logs or error messages.
281+
func (o *Orchestrator) GetMinioSecretKey() string {
282+
return o.minioSecretKey
283+
}
284+
255285
func (o *Orchestrator) setupHeartbeat() {
256286
heartbeat := time.NewTicker(60 * time.Second)
257287

ctriface/orch_options.go

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ func WithTestModeOn(testModeOn bool) OrchestratorOption {
4040
}
4141
}
4242

43-
// WithSnapshots Sets the snapshot mode on or off
44-
func WithSnapshots(snapshotsEnabled bool) OrchestratorOption {
43+
// WithSnapshotMode Sets the snapshot mode
44+
func WithSnapshotMode(snapshotMode string) OrchestratorOption {
4545
return func(o *Orchestrator) {
46-
o.snapshotsEnabled = snapshotsEnabled
46+
o.snapshotMode = snapshotMode
4747
}
4848
}
4949

@@ -111,3 +111,26 @@ func WithDockerCredentials(dockerCredentials string) OrchestratorOption {
111111
o.dockerCredentials = creds
112112
}
113113
}
114+
115+
// WithMinioAddr Sets the MinIO server address (endpoint)
116+
func WithMinioAddr(minioAddr string) OrchestratorOption {
117+
return func(o *Orchestrator) {
118+
o.minioAddr = minioAddr
119+
}
120+
}
121+
122+
// WithMinioAccessKey Sets the MinIO access key
123+
// Used in conjunction with the secret key for authentication with the MinIO server
124+
func WithMinioAccessKey(minioAccessKey string) OrchestratorOption {
125+
return func(o *Orchestrator) {
126+
o.minioAccessKey = minioAccessKey
127+
}
128+
}
129+
130+
// WithMinioSecretKey Sets the MinIO secret key
131+
// Used in conjunction with the access key for authentication with the MinIO server
132+
func WithMinioSecretKey(minioSecretKey string) OrchestratorOption {
133+
return func(o *Orchestrator) {
134+
o.minioSecretKey = minioSecretKey
135+
}
136+
}

docs/quickstart_guide.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,11 +172,11 @@ Another option is to install using official instructions: [https://golang.org/do
172172
# EITHER
173173
sudo screen -dmS vhive bash -c "./vhive > >(tee -a /tmp/vhive-logs/vhive.stdout) 2> >(tee -a /tmp/vhive-logs/vhive.stderr >&2)"
174174
# OR
175-
sudo screen -dmS vhive bash -c "./vhive -snapshots > >(tee -a /tmp/vhive-logs/vhive.stdout) 2> >(tee -a /tmp/vhive-logs/vhive.stderr >&2)"
175+
sudo screen -dmS vhive bash -c "./vhive -snapshots 'local' > >(tee -a /tmp/vhive-logs/vhive.stdout) 2> >(tee -a /tmp/vhive-logs/vhive.stderr >&2)"
176176
```
177177
> **Note:**
178178
>
179-
> By default, the microVMs are booted, `-snapshots` enables snapshots after the 2nd invocation of each function.
179+
> By default, the microVMs are booted, `-snapshots <local|remote>` enables snapshots after the 2nd invocation of each function.
180180
>
181181
> If `-snapshots` and `-upf` are specified, the snapshots are accelerated with the Record-and-Prefetch (REAP)
182182
technique that we described in our ASPLOS'21
@@ -186,6 +186,8 @@ Another option is to install using official instructions: [https://golang.org/do
186186
>
187187
> If you are using `stargz` with `firecracker`, you also need to set the `-dockerCredentials` flag to be able to [pull the images
188188
from inside the microVMs](https://github.com/firecracker-microvm/firecracker-containerd/blob/main/docker-credential-mmds/README.md#docker-credential-helper-mmds).
189+
>
190+
> Remote snapshots are only supported in the `firecracker` mode using `stargz`. Check the [snapshot guide](../docs/snapshots.md) for more details on how to set up remote snapshots.
189191
190192
### 3. Configure Master Node
191193
**On the master node**, execute the following instructions below **as a non-root user with sudo rights** using **bash**:
@@ -297,7 +299,7 @@ Execute the following below **as a non-root user with sudo rights** using **bash
297299
298300
> **Note:**
299301
>
300-
> By default, the microVMs are booted, `-snapshots` enables snapshots after the 2nd invocation of each function.
302+
> By default, the microVMs are booted, `-snapshots <local|remote>` enables snapshots after the 2nd invocation of each function.
301303
>
302304
> If `-snapshots` and `-upf` are specified, the snapshots are accelerated with the Record-and-Prefetch (REAP)
303305
technique that we described in our ASPLOS'21
@@ -307,6 +309,8 @@ Execute the following below **as a non-root user with sudo rights** using **bash
307309
>
308310
> If you are using `stargz` with `firecracker`, you also need to set the `-dockerCredentials` flag to be able to [pull the images
309311
from inside the microVMs](https://github.com/firecracker-microvm/firecracker-containerd/blob/main/docker-credential-mmds/README.md#docker-credential-helper-mmds).
312+
>
313+
> Remote snapshots are only supported in the `firecracker` mode using `stargz`. Check the [snapshot guide](../docs/snapshots.md) for more details on how to set up remote snapshots.
310314
311315
6. Run the single node cluster setup script:
312316
```bash

0 commit comments

Comments
 (0)