Skip to content

Commit add1f52

Browse files
committed
roachprod: gc ibm clusters with no tags
Starting with #148008, some partially created clusters (without tags) get listed for garbage collection, but are never actually deleted because the `DeleteCluster()` lists VMs to destroy via tags. This PR updates the `DeleteCluster()` function to also list VMs by instance names and properly destroy some leftover clusters. Epic: none Release note: None
1 parent 631d99e commit add1f52

File tree

3 files changed

+18
-3
lines changed

3 files changed

+18
-3
lines changed

pkg/roachprod/vm/ibm/ibm_extended_types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,11 @@ func (i *instance) toVM() vm.VM {
588588
if err != nil {
589589
vmErrors = append(vmErrors, errors.Wrap(err, "unable to compute lifetime"))
590590
}
591+
} else {
592+
// Missing lifetime tag, use the default lifetime.
593+
// This is not an error, but a fallback to ensure the VM has a lifetime
594+
// even if the tag is not set to avoid GCing it right away.
595+
lifetime = vm.DefaultLifetime
591596
}
592597

593598
privateIP := i.getPrivateIPAddress()

pkg/roachprod/vm/ibm/provider.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -545,8 +545,16 @@ func (p *Provider) DeleteCluster(l *logger.Logger, name string) error {
545545

546546
svc := p.getGlobalSearchService()
547547

548-
// Get the resources with the cluster name tag.
549-
query := fmt.Sprintf(`tags:"%s:true" AND "%s:%s"`, vm.TagRoachprod, vm.TagCluster, name)
548+
// The IBM API sometimes silently fails on the tagging request and resources
549+
// end up being created without any tags.
550+
// The query below will look for resources that are properly tagged with the
551+
// roachprod and cluster tags, but will also fallback to searching via the
552+
// instance name to ensure all instances are properly deleted.
553+
query := fmt.Sprintf(
554+
`(tags:"%s:true AND %s:%s") OR (NOT (tags:"%s:true") AND name:%s-*)`,
555+
vm.TagRoachprod, vm.TagCluster, name,
556+
vm.TagRoachprod, name,
557+
)
550558
searchOptions := svc.NewSearchOptions().SetLimit(defaultPaginationLimit).SetQuery(query)
551559

552560
instances := make([]*instance, 0)

pkg/roachprod/vm/vm.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ const (
4343
ArchFIPS = CPUArch("fips")
4444
ArchS390x = CPUArch("s390x")
4545
ArchUnknown = CPUArch("unknown")
46+
47+
DefaultLifetime = 12 * time.Hour
4648
)
4749

4850
// UnimplementedError is returned when a method is not implemented by a
@@ -308,7 +310,7 @@ type CreateOpts struct {
308310
func DefaultCreateOpts() CreateOpts {
309311
defaultCreateOpts := CreateOpts{
310312
ClusterName: "",
311-
Lifetime: 12 * time.Hour,
313+
Lifetime: DefaultLifetime,
312314
GeoDistributed: false,
313315
VMProviders: []string{},
314316
OsVolumeSize: 10,

0 commit comments

Comments
 (0)