Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion pkg/roachprod/blobfixture/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ type FixtureMetadata struct {

// FingerprintTime is the aost used by the fingerprint command.
FingerprintTime string `json:"fingerprint_time,omitempty"`

// LastFailureAt indicates that some test using this fixture has failed and we
// want to prevent GC of this fixture for investigation purposes. If this
// field is set, the fixture will not be GC'd until a week after this time.
LastFailureAt *time.Time `json:"last_failure_at,omitempty"`
}

func (f *FixtureMetadata) MarshalJson() ([]byte, error) {
Expand Down Expand Up @@ -113,6 +118,10 @@ func fixturesToGc(gcAt time.Time, allFixtures []FixtureMetadata) []fixtureToDele
// made ready more than 24 hours ago.
obsoleteThreshold := gcAt.Add(-24 * time.Hour)

// A fixture that has had a test failure within the past week is not eligible
// for GC.
failureGCThreshold := gcAt.Add(-7 * 24 * time.Hour)

toDelete := []fixtureToDelete{}

byKind := make(map[string][]FixtureMetadata)
Expand Down Expand Up @@ -144,10 +153,15 @@ func fixturesToGc(gcAt time.Time, allFixtures []FixtureMetadata) []fixtureToDele
// NOTE: starting at 1 because index 0 is the most recent fixture and is
// not eligible for garbage collection.
for i := 1; i < len(fixtures); i++ {
fixture := fixtures[i]
successor := fixtures[i-1]
if successor.ReadyAt.Before(obsoleteThreshold) {
if fixture.LastFailureAt != nil && !fixture.LastFailureAt.Before(failureGCThreshold) {
// Fixture has had a test failure within the past week, skip gc.
continue
}
toDelete = append(toDelete, fixtureToDelete{
metadata: fixtures[i],
metadata: fixture,
reason: fmt.Sprintf("fixture '%s' is was mode obsolete by '%s' at '%s'", fixtures[i].DataPath, successor.DataPath, successor.ReadyAt),
})
}
Expand Down
55 changes: 55 additions & 0 deletions pkg/roachprod/blobfixture/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,22 @@ func (r *Registry) GC(ctx context.Context, l *logger.Logger) error {
return nil
}

// MarkFailure marks the fixture at the given metadata path as having resulted
// in a test failure. This prevents the fixture from being garbage collected,
// providing time for investigation.
func (r *Registry) MarkFailure(ctx context.Context, l *logger.Logger, metadataPath string) error {
setTime := r.clock()
err := r.updateMetadata(metadataPath, func(m *FixtureMetadata) error {
m.LastFailureAt = &setTime
return nil
})
if err != nil {
return err
}
l.Printf("fixture '%s' marked last failure at '%s'", metadataPath, setTime)
return nil
}

func (r *Registry) Close() {
_ = r.storage.Close()
}
Expand Down Expand Up @@ -263,6 +279,45 @@ func (r *Registry) upsertMetadata(metadata FixtureMetadata) error {
return writer.Close()
}

func (r *Registry) updateMetadata(
metadataPath string, update func(metadata *FixtureMetadata) error,
) error {
ctx := context.Background()
json, err := r.maybeReadFile(ctx, metadataPath)
if err != nil {
return errors.Wrap(err, "failed to read metadata for update")
}
if json == nil {
return errors.New("metadata does not exist for update")
}

metadata := &FixtureMetadata{}
if err := metadata.UnmarshalJson(json); err != nil {
return errors.Wrap(err, "failed to unmarshal metadata for update")
}

if err := update(metadata); err != nil {
return errors.Wrap(err, "failed to update metadata")
}

updatedJson, err := metadata.MarshalJson()
if err != nil {
return errors.Wrap(err, "failed to marshal updated metadata")
}

writer, err := r.storage.Writer(ctx, metadata.MetadataPath)
if err != nil {
return errors.Wrap(err, "failed to create writer for updated metadata")
}

if _, err := writer.Write(updatedJson); err != nil {
_ = writer.Close()
return errors.Wrap(err, "failed to write updated metadata")
}

return writer.Close()
}

func (r *Registry) deleteMetadata(metadata FixtureMetadata) error {
return errors.Wrap(r.storage.Delete(context.Background(), metadata.MetadataPath), "failed to delete metadata")
}
Expand Down
35 changes: 35 additions & 0 deletions pkg/roachprod/blobfixture/registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ func TestFixtureRegistry(t *testing.T) {
kind string
createdAt time.Time
readyAt time.Time
lastFailureAt time.Time
isLatestOfKind bool
survivesGC bool
}
Expand Down Expand Up @@ -127,6 +128,35 @@ func TestFixtureRegistry(t *testing.T) {
survivesGC: true,
isLatestOfKind: false,
},
{
// This fixture was marked with a failure long ago, so it will be deleted
// despite the flag.
kind: "kind-marked-failure",
createdAt: makeTime(-10),
readyAt: makeTime(-9),
lastFailureAt: makeTime(-8),
survivesGC: false,
isLatestOfKind: false,
},
{
// This fixture was marked with a failure recently, so despite the fact
// that it has been obsolete for more than a day, it will not be
// deleted.
kind: "kind-marked-failure",
createdAt: makeTime(-6),
readyAt: makeTime(-5),
lastFailureAt: makeTime(-4),
survivesGC: true,
isLatestOfKind: false,
},
{
// This is the most recent fixture of its kind, so it will not be deleted.
kind: "kind-marked-failure",
createdAt: makeTime(-1),
readyAt: makeTime(-0.5),
survivesGC: true,
isLatestOfKind: true,
},
}

type fixturesCreated struct {
Expand Down Expand Up @@ -169,6 +199,11 @@ func TestFixtureRegistry(t *testing.T) {
require.NoError(t, handle.SetReadyAt(ctx))
}

if !f.lastFailureAt.IsZero() {
now = f.lastFailureAt
require.NoError(t, registry.MarkFailure(ctx, l, metadata.MetadataPath))
}

created = append(created, fixturesCreated{
fixture: f,
metadata: metadata,
Expand Down