Skip to content

Commit def6841

Browse files
committed
Add support for remote Firecracker snapshots
- When remote snapshots are enabled, after committing the snapshot, it is uploaded to a MinIO instance. When loading from a snapshot, if it is not available locally, it checks if it is available in MinIO and fetches it. - Remote Firecracker snapshots are currently only supported using the Stargz snapshotter (there are some container corruption issues when using devmapper). Signed-off-by: André Jesus <[email protected]>
1 parent 5b9627f commit def6841

File tree

19 files changed

+915
-106
lines changed

19 files changed

+915
-106
lines changed

.github/workflows/unit_tests.yml

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,32 @@ jobs:
2727
fail-fast: false
2828
matrix:
2929
module: [misc, networking, snapshotting]
30+
services:
31+
minio: # MinIO service for testing remote snapshots
32+
image: lazybit/minio # Can't use minio/minio because there's still no support for the jobs.<job_id>.services.<service_id>.command option in GH Actions
33+
ports:
34+
- 9000:9000
35+
options: >-
36+
--health-cmd "curl -f http://localhost:9000/minio/health/live || exit 1"
37+
--health-interval 5s
38+
--health-timeout 5s
39+
--health-retries 5
40+
volumes:
41+
- ${{ github.workspace }}/data:/data
42+
env:
43+
MINIO_ROOT_USER: minio
44+
MINIO_ROOT_PASSWORD: minio123
45+
# command: server /data
3046
steps:
3147
- name: Check out code into the Go module directory
3248
uses: actions/checkout@v4
49+
with:
50+
path: ${{ github.sha }}
3351

3452
- name: Set up Go version in go.mod file
3553
uses: actions/setup-go@v5
3654
with:
37-
go-version-file: ${{ github.workspace }}/go.mod
55+
go-version-file: ${{ github.workspace }}/${{ github.sha }}/go.mod
3856
cache-dependency-path: |
3957
**/go.sum
4058
**/go.mod
@@ -45,27 +63,27 @@ jobs:
4563
python-version: '3.x'
4664

4765
- name: Build setup scripts
48-
run: pushd scripts && go build -o setup_tool && popd
66+
run: pushd ${{ github.sha }}/scripts && go build -o setup_tool && popd
4967

5068
- name: Add rsync
5169
run: |
5270
sudo apt update
5371
sudo apt install rsync -y
5472
5573
- name: Setup System
56-
run: ./scripts/setup_tool setup_system
74+
run: ./${{ github.sha }}/scripts/setup_tool setup_system
5775

5876
- name: Build
59-
run: go build -race -v -a ./...
77+
run: cd ${{ github.sha }} && go build -race -v -a ./...
6078

6179
- name: Run tests in submodules
6280
env:
6381
MODULE: ${{ matrix.module }}
6482
AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }}
6583
AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }}
6684
run: |
67-
make -C $MODULE test
68-
make -C $MODULE test-man
85+
make -C ${{ github.sha }}/${{ matrix.module }} test
86+
make -C ${{ github.sha }}/${{ matrix.module }} test-man
6987
7088
profile-unit-test:
7189
name: "Unit test: profile unit test"

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ EXTRATESTFILES:=vhive_test.go stats.go vhive.go functions.go
2929
# WITHLAZY:=-lazyTest
3030
WITHUPF:=
3131
WITHLAZY:=
32-
WITHSNAPSHOTS:=-snapshotsTest
32+
WITHSNAPSHOTS:=-snapshotsTest 'local'
3333
CTRDLOGDIR:=/tmp/ctrd-logs
3434

3535
vhive: proto

bench_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func TestBenchParallelServe(t *testing.T) {
6363
imageName, isPresent := images[*funcName]
6464
require.True(t, isPresent, "Function is not supported")
6565

66-
funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst)
66+
funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst, *snapshotTestMode)
6767

6868
createResultsDir()
6969

@@ -136,7 +136,7 @@ func TestBenchWarmServe(t *testing.T) {
136136
imageName, isPresent := images[*funcName]
137137
require.True(t, isPresent, "Function is not supported")
138138

139-
funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst)
139+
funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst, *snapshotTestMode)
140140

141141
createResultsDir()
142142

@@ -201,7 +201,7 @@ func TestBenchServe(t *testing.T) {
201201
imageName, isPresent := images[*funcName]
202202
require.True(t, isPresent, "Function is not supported")
203203

204-
funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst)
204+
funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst, *snapshotTestMode)
205205

206206
createResultsDir()
207207

configs/.wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ SMI
465465
sms
466466
SMT
467467
snapshotted
468+
snapshotters
468469
snapshotting
469470
SoC
470471
SOCACHE

cri/firecracker/coordinator.go

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ import (
3535

3636
log "github.com/sirupsen/logrus"
3737
"github.com/vhive-serverless/vhive/ctriface"
38+
39+
"github.com/minio/minio-go/v7"
40+
"github.com/minio/minio-go/v7/pkg/credentials"
3841
)
3942

4043
type coordinator struct {
@@ -67,10 +70,22 @@ func newFirecrackerCoordinator(orch *ctriface.Orchestrator, opts ...coordinatorO
6770
}
6871

6972
snapshotsDir := "/fccd/test/snapshots"
73+
snapshotsBucket := "snapshots"
74+
var minioClient *minio.Client
75+
7076
if !c.withoutOrchestrator {
7177
snapshotsDir = orch.GetSnapshotsDir()
78+
snapshotsBucket = orch.GetSnapshotsBucket()
79+
80+
if orch.GetSnapshotMode() == "remote" {
81+
minioClient, _ = minio.New(orch.GetMinioAddr(), &minio.Options{
82+
Creds: credentials.NewStaticV4(orch.GetMinioAccessKey(), orch.GetMinioSecretKey(), ""),
83+
Secure: false,
84+
})
85+
}
7286
}
73-
c.snapshotManager = snapshotting.NewSnapshotManager(snapshotsDir)
87+
88+
c.snapshotManager = snapshotting.NewSnapshotManager(snapshotsDir, snapshotsBucket, minioClient)
7489

7590
return c
7691
}
@@ -80,9 +95,18 @@ func (c *coordinator) startVM(ctx context.Context, image, revision string) (*fun
8095
}
8196

8297
func (c *coordinator) startVMWithEnvironment(ctx context.Context, image, revision string, environment []string) (*funcInstance, error) {
83-
if c.orch != nil && c.orch.GetSnapshotsEnabled() {
84-
// Check if snapshot is available
85-
if snap, err := c.snapshotManager.AcquireSnapshot(revision); err == nil {
98+
if c.orch != nil && c.orch.GetSnapshotMode() != "disabled" {
99+
if snap, _ := c.snapshotManager.AcquireSnapshot(revision); snap == nil {
100+
if c.orch.GetSnapshotMode() == "remote" {
101+
if _, err := c.snapshotManager.DownloadSnapshot(revision); err != nil {
102+
log.WithError(err).Errorf("failed to download snapshot %s from remote storage", revision)
103+
} else {
104+
log.Printf("downloaded snapshot %s from remote storage", revision)
105+
}
106+
}
107+
}
108+
109+
if snap, _ := c.snapshotManager.AcquireSnapshot(revision); snap != nil {
86110
return c.orchLoadInstance(ctx, snap)
87111
}
88112
}
@@ -102,7 +126,7 @@ func (c *coordinator) stopVM(ctx context.Context, containerID string) error {
102126
return nil
103127
}
104128

105-
if c.orch != nil && c.orch.GetSnapshotsEnabled() && !fi.SnapBooted {
129+
if c.orch != nil && c.orch.GetSnapshotMode() != "disabled" && !fi.SnapBooted {
106130
err := c.orchCreateSnapshot(ctx, fi)
107131
if err != nil {
108132
log.Printf("Err creating snapshot %s\n", err)
@@ -230,11 +254,23 @@ func (c *coordinator) orchCreateSnapshot(ctx context.Context, fi *funcInstance)
230254
}
231255
}
232256

257+
if err := snap.SerializeSnapInfo(); err != nil {
258+
fi.Logger.WithError(err).Error("failed to serialize snapshot info")
259+
return err
260+
}
261+
233262
if err := c.snapshotManager.CommitSnapshot(fi.Revision); err != nil {
234263
fi.Logger.WithError(err).Error("failed to commit snapshot")
235264
return err
236265
}
237266

267+
if !c.withoutOrchestrator && c.orch.GetSnapshotMode() == "remote" {
268+
fi.Logger.Debug("uploading snapshot to remote storage")
269+
if err := c.snapshotManager.UploadSnapshot(fi.Revision); err != nil {
270+
fi.Logger.WithError(err).Error("failed to upload snapshot")
271+
}
272+
}
273+
238274
return nil
239275
}
240276

ctriface/orch.go

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -98,16 +98,21 @@ type Orchestrator struct {
9898
imageManager *image.ImageManager
9999
dockerCredentials DockerCredentials
100100
// store *skv.KVStore
101-
snapshotsEnabled bool
102-
isUPFEnabled bool
103-
isLazyMode bool
104-
snapshotsDir string
105-
isMetricsMode bool
106-
netPoolSize int
101+
snapshotMode string
102+
isUPFEnabled bool
103+
isLazyMode bool
104+
snapshotsDir string
105+
snapshotsBucket string
106+
isMetricsMode bool
107+
netPoolSize int
107108

108109
vethPrefix string
109110
clonePrefix string
110111

112+
minioAddr string
113+
minioAccessKey string
114+
minioSecretKey string
115+
111116
memoryManager *manager.MemoryManager
112117
}
113118

@@ -119,9 +124,13 @@ func NewOrchestrator(snapshotter, hostIface string, opts ...OrchestratorOption)
119124
o.cachedImages = make(map[string]containerd.Image)
120125
o.snapshotter = snapshotter
121126
o.snapshotsDir = "/fccd/snapshots"
127+
o.snapshotsBucket = "snapshots"
122128
o.netPoolSize = 10
123129
o.vethPrefix = "172.17"
124130
o.clonePrefix = "172.18"
131+
o.minioAddr = "10.96.0.46:9000"
132+
o.minioAccessKey = "minio"
133+
o.minioSecretKey = "minio123"
125134

126135
for _, opt := range opts {
127136
opt(o)
@@ -187,9 +196,9 @@ func (o *Orchestrator) Cleanup() {
187196
}
188197
}
189198

190-
// GetSnapshotsEnabled Returns the snapshots mode of the orchestrator
191-
func (o *Orchestrator) GetSnapshotsEnabled() bool {
192-
return o.snapshotsEnabled
199+
// GetSnapshotMode Returns the snapshots mode of the orchestrator
200+
func (o *Orchestrator) GetSnapshotMode() string {
201+
return o.snapshotMode
193202
}
194203

195204
// GetUPFEnabled Returns the UPF mode of the orchestrator
@@ -252,6 +261,27 @@ func (o *Orchestrator) GetDockerCredentials() string {
252261
return string(data)
253262
}
254263

264+
// GetSnapshotsBucket returns the S3 bucket name used by the orchestrator for storing remote snapshots.
265+
func (o *Orchestrator) GetSnapshotsBucket() string {
266+
return o.snapshotsBucket
267+
}
268+
269+
// GetMinioAddr returns the address (endpoint) of the MinIO server used by the orchestrator.
270+
func (o *Orchestrator) GetMinioAddr() string {
271+
return o.minioAddr
272+
}
273+
274+
// GetMinioAccessKey returns the access key used to authenticate with the MinIO server.
275+
func (o *Orchestrator) GetMinioAccessKey() string {
276+
return o.minioAccessKey
277+
}
278+
279+
// GetMinioSecretKey returns the secret key used to authenticate with the MinIO server.
280+
// This should be handled securely and never exposed in logs or error messages.
281+
func (o *Orchestrator) GetMinioSecretKey() string {
282+
return o.minioSecretKey
283+
}
284+
255285
func (o *Orchestrator) setupHeartbeat() {
256286
heartbeat := time.NewTicker(60 * time.Second)
257287

ctriface/orch_options.go

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ func WithTestModeOn(testModeOn bool) OrchestratorOption {
4040
}
4141
}
4242

43-
// WithSnapshots Sets the snapshot mode on or off
44-
func WithSnapshots(snapshotsEnabled bool) OrchestratorOption {
43+
// WithSnapshotMode Sets the snapshot mode
44+
func WithSnapshotMode(snapshotMode string) OrchestratorOption {
4545
return func(o *Orchestrator) {
46-
o.snapshotsEnabled = snapshotsEnabled
46+
o.snapshotMode = snapshotMode
4747
}
4848
}
4949

@@ -111,3 +111,26 @@ func WithDockerCredentials(dockerCredentials string) OrchestratorOption {
111111
o.dockerCredentials = creds
112112
}
113113
}
114+
115+
// WithMinioAddr Sets the MinIO server address (endpoint)
116+
func WithMinioAddr(minioAddr string) OrchestratorOption {
117+
return func(o *Orchestrator) {
118+
o.minioAddr = minioAddr
119+
}
120+
}
121+
122+
// WithMinioAccessKey Sets the MinIO access key
123+
// Used in conjunction with the secret key for authentication with the MinIO server
124+
func WithMinioAccessKey(minioAccessKey string) OrchestratorOption {
125+
return func(o *Orchestrator) {
126+
o.minioAccessKey = minioAccessKey
127+
}
128+
}
129+
130+
// WithMinioSecretKey Sets the MinIO secret key
131+
// Used in conjunction with the access key for authentication with the MinIO server
132+
func WithMinioSecretKey(minioSecretKey string) OrchestratorOption {
133+
return func(o *Orchestrator) {
134+
o.minioSecretKey = minioSecretKey
135+
}
136+
}

docs/quickstart_guide.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,11 +172,11 @@ Another option is to install using official instructions: [https://golang.org/do
172172
# EITHER
173173
sudo screen -dmS vhive bash -c "./vhive > >(tee -a /tmp/vhive-logs/vhive.stdout) 2> >(tee -a /tmp/vhive-logs/vhive.stderr >&2)"
174174
# OR
175-
sudo screen -dmS vhive bash -c "./vhive -snapshots > >(tee -a /tmp/vhive-logs/vhive.stdout) 2> >(tee -a /tmp/vhive-logs/vhive.stderr >&2)"
175+
sudo screen -dmS vhive bash -c "./vhive -snapshots 'local' > >(tee -a /tmp/vhive-logs/vhive.stdout) 2> >(tee -a /tmp/vhive-logs/vhive.stderr >&2)"
176176
```
177177
> **Note:**
178178
>
179-
> By default, the microVMs are booted, `-snapshots` enables snapshots after the 2nd invocation of each function.
179+
> By default, the microVMs are booted, `-snapshots <local|remote>` enables snapshots after the 2nd invocation of each function.
180180
>
181181
> If `-snapshots` and `-upf` are specified, the snapshots are accelerated with the Record-and-Prefetch (REAP)
182182
technique that we described in our ASPLOS'21
@@ -186,6 +186,8 @@ Another option is to install using official instructions: [https://golang.org/do
186186
>
187187
> If you are using `stargz` with `firecracker`, you also need to set the `-dockerCredentials` flag to be able to [pull the images
188188
from inside the microVMs](https://github.com/firecracker-microvm/firecracker-containerd/blob/main/docker-credential-mmds/README.md#docker-credential-helper-mmds).
189+
>
190+
> Remote snapshots are only supported in the `firecracker` mode using `stargz`. Check the [snapshot guide](../docs/snapshots.md) for more details on how to set up remote snapshots.
189191
190192
### 3. Configure Master Node
191193
**On the master node**, execute the following instructions below **as a non-root user with sudo rights** using **bash**:
@@ -297,7 +299,7 @@ Execute the following below **as a non-root user with sudo rights** using **bash
297299
298300
> **Note:**
299301
>
300-
> By default, the microVMs are booted, `-snapshots` enables snapshots after the 2nd invocation of each function.
302+
> By default, the microVMs are booted, `-snapshots <local|remote>` enables snapshots after the 2nd invocation of each function.
301303
>
302304
> If `-snapshots` and `-upf` are specified, the snapshots are accelerated with the Record-and-Prefetch (REAP)
303305
technique that we described in our ASPLOS'21
@@ -307,6 +309,8 @@ Execute the following below **as a non-root user with sudo rights** using **bash
307309
>
308310
> If you are using `stargz` with `firecracker`, you also need to set the `-dockerCredentials` flag to be able to [pull the images
309311
from inside the microVMs](https://github.com/firecracker-microvm/firecracker-containerd/blob/main/docker-credential-mmds/README.md#docker-credential-helper-mmds).
312+
>
313+
> Remote snapshots are only supported in the `firecracker` mode using `stargz`. Check the [snapshot guide](../docs/snapshots.md) for more details on how to set up remote snapshots.
310314
311315
6. Run the single node cluster setup script:
312316
```bash

0 commit comments

Comments
 (0)