Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ func Run(ctx context.Context, conf Config) (err error) {
life := new(lifecycle.Manager)

if conf.PrivKeyLocking {
lockSvc, err := privkeylock.New(conf.PrivKeyFile+".lock", "charon run")
lockSvc, err := privkeylock.New(conf.PrivKeyFile, conf.LockFile, "charon run")
if err != nil {
return err
}
Expand Down
97 changes: 76 additions & 21 deletions app/privkeylock/privkeylock.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package privkeylock

import (
"encoding/json"
"fmt"
"os"
"time"

Expand All @@ -17,50 +18,81 @@ var (

// updatePeriod is the duration after which the private key lock file is updated.
updatePeriod = 1 * time.Second

// gracePeriod is the duration after which a new cluster (new lock hash) can be run after an edit command.
// (we can't use chain spec at this time, so we use fixed duration of 768 seconds)
gracePeriod = 2 * 32 * 12 * time.Second
)

// New returns new private key locking service. It errors if a recently-updated private key lock file exists.
func New(path, command string) (Service, error) {
content, err := os.ReadFile(path)
func New(privKeyFilePath, clusterLockFilePath, command string) (Service, error) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: can we rename that to privKeyFileLockPath? Reading it as privKeyFilePath I've assumed it's the actual privKey

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used to follow "filepath" convention as a common widely adopted term. For instance, the standard Golang's package "path/filepath" is an example.

clusterLockHash, err := readClusterLockHash(clusterLockFilePath)
if err != nil {
return Service{}, err
}

privKeyFilePath += ".lock"

content, err := os.ReadFile(privKeyFilePath)
if errors.Is(err, os.ErrNotExist) { //nolint:revive // Empty block is fine.
// No file, we will create it in run
} else if err != nil {
return Service{}, errors.Wrap(err, "read private key lock file", z.Str("path", path))
return Service{}, errors.Wrap(err, "read private key lock file", z.Str("path", privKeyFilePath))
} else {
var meta metadata
if err := json.Unmarshal(content, &meta); err != nil {
return Service{}, errors.Wrap(err, "decode private key lock file", z.Str("path", path))
return Service{}, errors.Wrap(err, "decode private key lock file", z.Str("path", privKeyFilePath))
}

if time.Since(meta.Timestamp) <= staleDuration {
return Service{}, errors.New(
"existing private key lock file found, another charon instance may be running on your machine",
z.Str("path", path),
z.Str("path", privKeyFilePath),
z.Str("command", meta.Command),
z.Str("cluster_lock_hash", meta.ClusterLockHash),
)
}

if meta.ClusterLockHash != "" && clusterLockHash != meta.ClusterLockHash {
elapsedPeriod := time.Since(meta.Timestamp)
if elapsedPeriod < gracePeriod {
waitTime := gracePeriod - elapsedPeriod
errText := fmt.Sprintf("existing private key lock file found with different cluster lock hash, you must wait for %v before starting charon with the new cluster hash", waitTime)

return Service{}, errors.New(
errText,
z.Str("path", privKeyFilePath),
z.Str("command", meta.Command),
z.Str("existing_cluster_lock_hash", meta.ClusterLockHash),
z.Str("current_cluster_lock_hash", clusterLockHash),
z.Str("grace_period", gracePeriod.String()),
)
}
}
}

if err := writeFile(path, command, time.Now()); err != nil {
if err := writeFile(privKeyFilePath, clusterLockHash, command, time.Now()); err != nil {
return Service{}, err
}

return Service{
command: command,
path: path,
updatePeriod: updatePeriod,
quit: make(chan struct{}),
done: make(chan struct{}),
clusterLockHash: clusterLockHash,
command: command,
path: privKeyFilePath,
updatePeriod: updatePeriod,
quit: make(chan struct{}),
done: make(chan struct{}),
}, nil
}

// Service is a private key locking service.
type Service struct {
command string
path string
updatePeriod time.Duration
quit chan struct{} // Quit exits the Run goroutine if closed.
done chan struct{} // Done is closed when Run exits, which exits the Close goroutine.
clusterLockHash string
command string
path string
updatePeriod time.Duration
quit chan struct{} // Quit exits the Run goroutine if closed.
done chan struct{} // Done is closed when Run exits, which exits the Close goroutine.
}

// Run runs the service, updating the lock file every second and deleting it on context cancellation.
Expand All @@ -80,7 +112,7 @@ func (s Service) Run() error {
return nil
case <-tick.C:
// Overwrite lockfile with new metadata
if err := writeFile(s.path, s.command, time.Now()); err != nil {
if err := writeFile(s.path, s.clusterLockHash, s.command, time.Now()); err != nil {
return err
}
}
Expand All @@ -96,13 +128,14 @@ func (s Service) Close() {

// metadata is the metadata stored in the lock file.
type metadata struct {
Command string `json:"command"`
Timestamp time.Time `json:"timestamp"`
Command string `json:"command"`
Timestamp time.Time `json:"timestamp"`
ClusterLockHash string `json:"cluster_lock_hash,omitempty"`
}

// writeFile creates or updates the file with the latest metadata.
func writeFile(path, command string, now time.Time) error {
b, err := json.Marshal(metadata{Command: command, Timestamp: now})
func writeFile(path, clusterLockHash, command string, now time.Time) error {
b, err := json.Marshal(metadata{Command: command, Timestamp: now, ClusterLockHash: clusterLockHash})
if err != nil {
return errors.Wrap(err, "marshal private key lock file")
}
Expand All @@ -114,3 +147,25 @@ func writeFile(path, command string, now time.Time) error {

return nil
}

type clusterLockHash struct {
LockHash string `json:"lock_hash"`
}

// readClusterLockHash reads only lock_hash field from the cluster lock file.
// Returns empty string if file doesn't exist (e.g., during DKG before cluster-lock.json is created).
func readClusterLockHash(clusterLockFilePath string) (string, error) {
content, err := os.ReadFile(clusterLockFilePath)
if errors.Is(err, os.ErrNotExist) {
return "", nil // File doesn't exist yet, return empty hash
} else if err != nil {
return "", errors.Wrap(err, "read cluster lock file", z.Str("path", clusterLockFilePath))
}

var hash clusterLockHash
if err := json.Unmarshal(content, &hash); err != nil {
return "", errors.Wrap(err, "decode cluster lock hash file", z.Str("path", clusterLockFilePath))
}

return hash.LockHash, nil
}
168 changes: 160 additions & 8 deletions app/privkeylock/privkeylock_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package privkeylock

import (
"encoding/json"
"os"
"path/filepath"
"testing"
Expand All @@ -14,32 +15,38 @@ import (
)

func TestService(t *testing.T) {
path := filepath.Join(t.TempDir(), "privkeylocktest")
tmpDir := t.TempDir()
privKeyPath := filepath.Join(tmpDir, "privkey")
lockFilePath := filepath.Join(tmpDir, "cluster-lock.json")
lockPath := privKeyPath + ".lock"

// Create cluster lock file.
writeClusterLockFile(t, lockFilePath, "hash123")

// Create a stale file that is ignored.
err := writeFile(path, "test", time.Now().Add(-staleDuration))
err := writeFile(lockPath, "hash123", "test", time.Now().Add(-staleDuration))
require.NoError(t, err)

// Create a new service.
svc, err := New(path, "test")
svc, err := New(privKeyPath, lockFilePath, "test")
require.NoError(t, err)
// Increase the update period to make the test faster.
svc.updatePeriod = time.Millisecond

assertFileExists(t, path)
assertFileExists(t, lockPath)

// Assert a new service can't be created.
_, err = New(path, "test")
_, err = New(privKeyPath, lockFilePath, "test")
require.ErrorContains(t, err, "existing private key lock file found")

// Delete the file so Run will create it again.
require.NoError(t, os.Remove(path))
require.NoError(t, os.Remove(lockPath))

var eg errgroup.Group
eg.Go(svc.Run) // Run will create the file.

eg.Go(func() error {
assertFileExists(t, path)
assertFileExists(t, lockPath)
svc.Close()

return nil
Expand All @@ -48,10 +55,113 @@ func TestService(t *testing.T) {
require.NoError(t, eg.Wait())

// Assert the file is deleted.
_, openErr := os.Open(path)
_, openErr := os.Open(lockPath)
require.ErrorIs(t, openErr, os.ErrNotExist)
}

func TestClusterHashMismatchWithinGracePeriod(t *testing.T) {
tmpDir := t.TempDir()
privKeyPath := filepath.Join(tmpDir, "privkey")
lockFilePath := filepath.Join(tmpDir, "cluster-lock.json")
lockPath := privKeyPath + ".lock"

// Create cluster lock file with hash1.
writeClusterLockFile(t, lockFilePath, "hash1")

// Create a stale but within grace period lock file with hash1.
err := writeFile(lockPath, "hash1", "test", time.Now().Add(-staleDuration-time.Second))
require.NoError(t, err)

// Update cluster lock file to hash2.
writeClusterLockFile(t, lockFilePath, "hash2")

// Try to create service with new hash within grace period - should fail.
_, err = New(privKeyPath, lockFilePath, "test")
require.Error(t, err)
require.ErrorContains(t, err, "different cluster lock hash")
require.ErrorContains(t, err, "you must wait")
}

func TestClusterHashMismatchAfterGracePeriod(t *testing.T) {
tmpDir := t.TempDir()
privKeyPath := filepath.Join(tmpDir, "privkey")
lockFilePath := filepath.Join(tmpDir, "cluster-lock.json")
lockPath := privKeyPath + ".lock"

// Create cluster lock file with hash1.
writeClusterLockFile(t, lockFilePath, "hash1")

// Create an old lock file with hash1 (beyond grace period).
err := writeFile(lockPath, "hash1", "test", time.Now().Add(-gracePeriod-time.Second))
require.NoError(t, err)

// Update cluster lock file to hash2.
writeClusterLockFile(t, lockFilePath, "hash2")

// Try to create service with new hash after grace period - should succeed.
_, err = New(privKeyPath, lockFilePath, "test")
require.NoError(t, err)

// Verify the new hash is written.
content, err := os.ReadFile(lockPath)
require.NoError(t, err)

var meta metadata

err = json.Unmarshal(content, &meta)
require.NoError(t, err)
require.Equal(t, "hash2", meta.ClusterLockHash)
}

func TestClusterHashMatchWithinGracePeriod(t *testing.T) {
tmpDir := t.TempDir()
privKeyPath := filepath.Join(tmpDir, "privkey")
lockFilePath := filepath.Join(tmpDir, "cluster-lock.json")
lockPath := privKeyPath + ".lock"

// Create cluster lock file with hash1.
writeClusterLockFile(t, lockFilePath, "hash1")

// Create a recent lock file with hash1 (within stale duration).
err := writeFile(lockPath, "hash1", "test", time.Now().Add(-time.Second))
require.NoError(t, err)

// Try to create service with same hash - should fail due to staleness check.
_, err = New(privKeyPath, lockFilePath, "test")
require.Error(t, err)
require.ErrorContains(t, err, "another charon instance may be running")

// Now create a stale lock file with same hash (beyond stale duration but within grace period).
err = writeFile(lockPath, "hash1", "test", time.Now().Add(-staleDuration-time.Second))
require.NoError(t, err)

// Should succeed since hash matches and file is stale.
_, err = New(privKeyPath, lockFilePath, "test")
require.NoError(t, err)
}

func TestClusterLockFileNotFound(t *testing.T) {
tmpDir := t.TempDir()
privKeyPath := filepath.Join(tmpDir, "privkey")
lockFilePath := filepath.Join(tmpDir, "nonexistent.json")

// Should succeed when cluster lock file doesn't exist (e.g., during DKG).
// The cluster lock hash will be empty.
_, err := New(privKeyPath, lockFilePath, "test")
require.NoError(t, err)

// Verify empty cluster hash is written.
lockPath := privKeyPath + ".lock"
content, err := os.ReadFile(lockPath)
require.NoError(t, err)

var meta metadata

err = json.Unmarshal(content, &meta)
require.NoError(t, err)
require.Empty(t, meta.ClusterLockHash)
}

func assertFileExists(t *testing.T, path string) {
t.Helper()

Expand All @@ -60,3 +170,45 @@ func assertFileExists(t *testing.T, path string) {
return openErr == nil
}, time.Second, time.Millisecond)
}

// writeClusterLockFile creates a cluster lock file with the given hash.
func writeClusterLockFile(t *testing.T, path, lockHash string) {
t.Helper()

content := map[string]any{
"lock_hash": lockHash,
"name": "test-cluster",
}
b, err := json.Marshal(content)
require.NoError(t, err)
err = os.WriteFile(path, b, 0o644)
require.NoError(t, err)
}

func TestEmptyHashToHashMigration(t *testing.T) {
tmpDir := t.TempDir()
privKeyPath := filepath.Join(tmpDir, "privkey")
lockFilePath := filepath.Join(tmpDir, "cluster-lock.json")
lockPath := privKeyPath + ".lock"

// Create cluster lock file.
writeClusterLockFile(t, lockFilePath, "newhash")

// Create a stale lock file with empty cluster hash (migration scenario).
err := writeFile(lockPath, "", "test", time.Now().Add(-staleDuration*2))
require.NoError(t, err)

// Should succeed - empty hash shouldn't trigger grace period.
_, err = New(privKeyPath, lockFilePath, "test")
require.NoError(t, err)

// Verify the new hash is written.
content, err := os.ReadFile(lockPath)
require.NoError(t, err)

var meta metadata

err = json.Unmarshal(content, &meta)
require.NoError(t, err)
require.Equal(t, "newhash", meta.ClusterLockHash)
}
2 changes: 2 additions & 0 deletions app/sse/listener.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ func (p *listener) storeBlockGossipTime(slot uint64, addr string, timestamp time
if p.blockGossipTimes[slot] == nil {
p.blockGossipTimes[slot] = make(map[string]time.Time)
}

p.blockGossipTimes[slot][addr] = timestamp
}

Expand Down Expand Up @@ -318,6 +319,7 @@ func (p *listener) recordBlockProcessingTime(slot uint64, addr string, headTimes

// Clean up this entry as it's no longer needed
delete(addrMap, addr)

if len(addrMap) == 0 {
delete(p.blockGossipTimes, slot)
}
Expand Down
Loading
Loading