Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions espresso/devnet-tests/batcher_restart_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,38 @@ import (

"github.com/ethereum/go-ethereum"
"github.com/stretchr/testify/require"

env "github.com/ethereum-optimism/optimism/espresso/environment"
)

func TestBatcherRestart(t *testing.T) {
testRestart(t, false)
}
Comment on lines +14 to +15
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realized to restart op-batcher-tee we not only need profile to be tee but also need things like restarting the service op-batcher-tee specifically.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, that's a big catch for sure, thanks. No idea how it passed for me in the first place.


func TestEnclaveRestart(t *testing.T) {
env.RunOnlyWithEnclave(t)
testRestart(t, true)
}

func testRestart(t *testing.T, tee bool) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

d := NewDevnet(ctx, t)
d.tee = tee

require.NoError(t, d.Up())
defer func() {
require.NoError(t, d.Down())
}()

require.NoError(t, d.WaitForL2Operational())

// Send a transaction just to check that everything has started up ok.
require.NoError(t, d.RunSimpleL2Burn())

// Shut down the batcher and have another transaction submitted while it is down.
require.NoError(t, d.ServiceDown("op-batcher"))
require.NoError(t, d.ServiceDown(ServiceBatcher))
d.SleepOutageDuration()

receipt, err := d.SubmitSimpleL2Burn()
Expand All @@ -35,7 +50,7 @@ func TestBatcherRestart(t *testing.T) {

// Bring the batcher back up and check that it processes the transaction which was submitted
// while it was down.
require.NoError(t, d.ServiceUp("op-batcher"))
require.NoError(t, d.ServiceUp(ServiceBatcher))
require.NoError(t, d.VerifySimpleL2Burn(receipt))

// Submit another transaction at the end just to check that things stay working.
Expand Down
153 changes: 135 additions & 18 deletions espresso/devnet-tests/devnet_tools.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"time"

"github.com/ethereum-optimism/optimism/op-e2e/bindings"
"github.com/ethereum-optimism/optimism/op-e2e/e2eutils/geth"
"github.com/ethereum-optimism/optimism/op-e2e/e2eutils/wait"
"github.com/ethereum-optimism/optimism/op-e2e/system/helpers"
"github.com/ethereum-optimism/optimism/op-node/rollup"
Expand All @@ -32,8 +33,85 @@ import (
"github.com/ethereum-optimism/optimism/op-e2e/config/secrets"
)

type Service uint64

const (
ServiceBatcher Service = iota
ServiceCaffNode
ServiceCaffNodeGeth
ServiceChallenger
ServiceDevNode
ServiceHTTPProxy
ServiceL1Beacon
ServiceL1DataInit
ServiceL1Genesis
ServiceL1Geth
ServiceL1Validator
ServiceL2Genesis
ServiceL2Rollup
ServiceL2Seq
ServiceL2SeqGeth
ServiceL2Verif
ServiceL2VerifGeth
ServiceProposer
)

const (
ProfileDefault = "default"
ProfileTee = "tee"
)

type Profile struct {
BatcherService string
ProposerService string
}

// modifyDefaultProfile creates a new profile based on the default profile with specified modifications
func modifyDefaultProfile(modifications map[Service]string) map[Service]string {
result := make(map[Service]string)
// Copy all services from default profile
for service, container := range defaultProfile {
result[service] = container
}
// Apply modifications
for service, container := range modifications {
result[service] = container
}
return result
}

var defaultProfile = map[Service]string{
ServiceBatcher: "op-batcher",
ServiceCaffNode: "caff-node",
ServiceCaffNodeGeth: "op-geth-caff-node",
ServiceChallenger: "op-challenger",
ServiceDevNode: "espresso-dev-node",
ServiceHTTPProxy: "http-proxy",
ServiceL1Beacon: "l1-beacon",
ServiceL1DataInit: "l1-data-init",
ServiceL1Genesis: "l1-genesis",
ServiceL1Geth: "l1-geth",
ServiceL1Validator: "l1-validator",
ServiceL2Genesis: "l2-genesis",
ServiceL2Rollup: "l2-rollup",
ServiceL2Seq: "op-node-sequencer",
ServiceL2SeqGeth: "op-geth-sequencer",
ServiceL2Verif: "op-node-verifier",
ServiceL2VerifGeth: "op-geth-verifier",
ServiceProposer: "op-proposer",
}

var Profiles = map[string]map[Service]string{
ProfileDefault: defaultProfile,
ProfileTee: modifyDefaultProfile(map[Service]string{
ServiceBatcher: "op-batcher-tee",
ServiceProposer: "op-proposer-tee",
}),
}

type Devnet struct {
ctx context.Context
tee bool
secrets secrets.Secrets
outageTime time.Duration
successTime time.Duration
Expand Down Expand Up @@ -85,7 +163,7 @@ func NewDevnet(ctx context.Context, t *testing.T) *Devnet {
func (d *Devnet) isRunning() bool {
cmd := exec.CommandContext(
d.ctx,
"docker", "compose", "ps", "-q",
"docker", "compose", "--profile", d.getProfile(), "ps", "-q",
)
buf := new(bytes.Buffer)
cmd.Stdout = buf
Expand All @@ -97,6 +175,27 @@ func (d *Devnet) isRunning() bool {
return len(out) > 0
}

// getProfile returns the current profile name based on devnet configuration
func (d *Devnet) getProfile() string {
if d.tee {
return ProfileTee
}
return ProfileDefault
}

// getServiceName returns the container name for a given service in the current profile
func (d *Devnet) getServiceName(service Service) string {
profile := d.getProfile()
if container, ok := Profiles[profile][service]; ok {
return container
}
// Fall back to default profile if service not found
if container, ok := Profiles[ProfileDefault][service]; ok {
return container
}
return ""
}

func (d *Devnet) Up() (err error) {
if d.isRunning() {
if err := d.Down(); err != nil {
Expand All @@ -107,9 +206,11 @@ func (d *Devnet) Up() (err error) {
return fmt.Errorf("devnet is already running, this should be a clean state; please shut it down first")
}

profile := d.getProfile()

cmd := exec.CommandContext(
d.ctx,
"docker", "compose", "up", "-d",
"docker", "compose", "--profile", profile, "up", "-d",
)
cmd.Env = append(
cmd.Env,
Expand Down Expand Up @@ -144,7 +245,7 @@ func (d *Devnet) Up() (err error) {
// Stream logs to stdout while the test runs. This goroutine will automatically exit when
// the context is cancelled.
go func() {
cmd = exec.CommandContext(d.ctx, "docker", "compose", "logs", "-f")
cmd = exec.CommandContext(d.ctx, "docker", "compose", "--profile", profile, "logs", "-f")
cmd.Stdout = os.Stdout
// We don't care about the error return of this command, since it's always going to be
// killed by the context cancellation.
Expand All @@ -153,50 +254,65 @@ func (d *Devnet) Up() (err error) {
}

// Open RPC clients for the different nodes.
d.L2Seq, err = d.serviceClient("op-geth-sequencer", 8546)
d.L2Seq, err = d.serviceClient(d.getServiceName(ServiceL2SeqGeth), 8546)
if err != nil {
return err
}
d.L2SeqRollup, err = d.rollupClient("op-node-sequencer", 9545)
d.L2SeqRollup, err = d.rollupClient(d.getServiceName(ServiceL2Seq), 9545)
if err != nil {
return err
}
d.L2Verif, err = d.serviceClient("op-geth-verifier", 8546)
d.L2Verif, err = d.serviceClient(d.getServiceName(ServiceL2VerifGeth), 8546)
if err != nil {
return err
}
d.L2VerifRollup, err = d.rollupClient("op-node-verifier", 9546)
d.L2VerifRollup, err = d.rollupClient(d.getServiceName(ServiceL2Verif), 9546)
if err != nil {
return err
}

d.L1, err = d.serviceClient("l1-geth", 8545)
d.L1, err = d.serviceClient(d.getServiceName(ServiceL1Geth), 8545)
if err != nil {
return err
}

return nil
}

func (d *Devnet) ServiceUp(service string) error {
log.Info("bringing up service", "service", service)
func (d *Devnet) WaitForL2Operational() error {

timeout := time.Minute * 5

// Batcher needs more time to startup in tee
if d.getProfile() == "tee" {
timeout = time.Minute * 10
}

_, err := geth.WaitForBlockToBeSafe(big.NewInt(1), d.L2Verif, timeout)
return err
}

func (d *Devnet) ServiceUp(service Service) error {
serviceName := d.getServiceName(service)
log.Info("bringing up service", "service", serviceName)
cmd := exec.CommandContext(
d.ctx,
"docker", "compose", "up", "-d", service,
"docker", "compose", "--profile", d.getProfile(), "up", "-d", serviceName,
)
return cmd.Run()
}

func (d *Devnet) ServiceDown(service string) error {
log.Info("shutting down service", "service", service)
func (d *Devnet) ServiceDown(service Service) error {
serviceName := d.getServiceName(service)
log.Info("shutting down service", "service", serviceName)
cmd := exec.CommandContext(
d.ctx,
"docker", "compose", "down", service,
"docker", "compose", "--profile", d.getProfile(), "down", serviceName,
)
return cmd.Run()
}
Comment on lines +305 to 313
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This reminds me of one thing: op-batcher-tee container spawns an enclave container (batcher-enclaver-xxx) that doesn’t stop automatically when op-batcher-tee stops. In Docker Compose I’ve been cleaning it up manually via espresso/scripts/shutdown.sh, but that’s probably not the best approach.
I'm thinking we may see the same issue here, even if op-batcher-tee exits, batcher-enclaver-xxx may keep running.
One possible way is to add a cleanup hook when shutting downop-batcher-tee (not sure whether it's supported) or also add a manual shutdown here. WDYT?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh interesting. I'll see what we can do here


func (d *Devnet) ServiceRestart(service string) error {
func (d *Devnet) ServiceRestart(service Service) error {
if err := d.ServiceDown(service); err != nil {
return err
}
Expand Down Expand Up @@ -422,7 +538,7 @@ func (d *Devnet) Down() error {
// Use timeout flag for faster Docker shutdown
cmd := exec.CommandContext(
d.ctx,
"docker", "compose", "down", "-v", "--remove-orphans", "--timeout", "10",
"docker", "compose", "--profile", d.getProfile(), "down", "-v", "--remove-orphans", "--timeout", "10",
)
return cmd.Run()
}
Expand Down Expand Up @@ -568,7 +684,8 @@ func (d *Devnet) OpChallengerOutput(opts ...string) (string, error) {
}

func (d *Devnet) opChallengerCmd(opts ...string) *exec.Cmd {
opts = append([]string{"compose", "exec", "op-challenger", "entrypoint.sh", "op-challenger"}, opts...)
serviceName := d.getServiceName(ServiceChallenger)
opts = append([]string{"compose", "--profile", d.getProfile(), "exec", serviceName, "entrypoint.sh", "op-challenger"}, opts...)
cmd := exec.CommandContext(
d.ctx,
"docker",
Expand All @@ -588,7 +705,7 @@ func (d *Devnet) hostPort(service string, privatePort uint16) (uint16, error) {
errBuf := new(bytes.Buffer)
cmd := exec.CommandContext(
d.ctx,
"docker", "compose", "port", service, fmt.Sprint(privatePort),
"docker", "compose", "--profile", d.getProfile(), "port", service, fmt.Sprint(privatePort),
)
cmd.Stdout = buf
cmd.Stderr = errBuf
Expand Down
4 changes: 2 additions & 2 deletions espresso/devnet-tests/key_rotation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func TestRotateBatcherKey(t *testing.T) {
require.NoError(t, d.RunSimpleL2Burn())

// Shut down the batcher
require.NoError(t, d.ServiceDown("op-batcher"))
require.NoError(t, d.ServiceDown(ServiceBatcher))
d.SleepOutageDuration()

// Change the batch sender key to Bob
Expand All @@ -44,7 +44,7 @@ func TestRotateBatcherKey(t *testing.T) {
d.secrets.Batcher = d.secrets.Bob

// Restart the batcher
require.NoError(t, d.ServiceUp("op-batcher"))
require.NoError(t, d.ServiceUp(ServiceBatcher))
d.SleepOutageDuration()

// Send a transaction to check the L2 still runs
Expand Down
3 changes: 3 additions & 0 deletions espresso/docker/op-batcher-tee/run-enclave.sh
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ CONTAINER_ID=$(docker ps --filter "name=$CONTAINER_NAME" --format "{{.ID}}" | he
CONTAINER_IMAGE=$(docker inspect "$CONTAINER_NAME" --format '{{.Config.Image}}' 2>/dev/null)
STARTED_AT=$(docker inspect "$CONTAINER_NAME" --format '{{.State.StartedAt}}' 2>/dev/null)

# Shutdown the container when we exit for any reason
trap "docker rm -f $CONTAINER_ID" EXIT

echo "Container Details:"
echo " ID: $CONTAINER_ID"
echo " Image: $CONTAINER_IMAGE"
Expand Down
7 changes: 7 additions & 0 deletions espresso/scripts/run-tests-github-actions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ git submodule update --init --recursive
# Poblate cachix cahe
nix flake archive --json | jq -r '.path,(.inputs|to_entries[].value.path)' | cachix push espresso-systems-private

echo "[*] Downloading Docker Compose..."
DOCKER_PLUGINS=/usr/local/lib/docker/cli-plugins/
sudo mkdir -p $DOCKER_PLUGINS
sudo curl -SL https://github.com/docker/compose/releases/download/v2.40.0/docker-compose-linux-x86_64 \
-o $DOCKER_PLUGINS/docker-compose
sudo chmod +x $DOCKER_PLUGINS/docker-compose

echo "[*] Starting Docker..."
sudo systemctl enable --now docker
sudo usermod -a -G docker ec2-user
Expand Down
3 changes: 3 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ espresso-tests timeout=espresso_tests_timeout: compile-contracts
espresso-enclave-tests:
ESPRESSO_RUN_ENCLAVE_TESTS=true go test -timeout={{espresso_tests_timeout}} -p=1 -count=1 ./espresso/enclave-tests/...

devnet-enclave-tests: build-devnet
ESPRESSO_RUN_ENCLAVE_TESTS=true U_ID={{uid}} GID={{gid}} go test -timeout 30m -p 1 -count 1 -v -run 'TestEnclaveRestart' ./espresso/devnet-tests/...


IMAGE_NAME := "ghcr.io/espressosystems/espresso-sequencer/espresso-dev-node:release-fix-cors"
remove-espresso-containers:
Expand Down
Loading