Skip to content

Commit 3d99ae1

Browse files
craig[bot]golgeekkyle-a-wong
committed
150873: roachprod: gc ibm clusters with no tags r=srosenberg a=golgeek Starting with #148008, some partially created clusters (without tags) get listed for garbage collection, but are never actually deleted because the `DeleteCluster()` lists VMs to destroy via tags. This PR updates the `DeleteCluster()` function to also list VMs by instance names and properly destroy some leftover clusters. Epic: none Release note: None 150951: auditloggingccl: fix TestReducedAuditConfig r=kyle-a-wong a=kyle-a-wong Test started flaking due to pebble logs containing matching the regex used to assert that certain audit logs existed. Adds a more specific regex to assert found augit config logs. Fixes: #150140 Epic: None Release note: None Co-authored-by: Ludovic Leroux <[email protected]> Co-authored-by: Kyle Wong <[email protected]>
3 parents 21f5b4f + add1f52 + 15ec2ad commit 3d99ae1

File tree

4 files changed

+19
-4
lines changed

4 files changed

+19
-4
lines changed

pkg/ccl/auditloggingccl/audit_logging_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ func TestReducedAuditConfig(t *testing.T) {
389389
0,
390390
math.MaxInt64,
391391
10000,
392-
regexp.MustCompile(stmt),
392+
regexp.MustCompile(`"Statement":"`+stmt),
393393
log.WithMarkedSensitiveData,
394394
)
395395

pkg/roachprod/vm/ibm/ibm_extended_types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,11 @@ func (i *instance) toVM() vm.VM {
588588
if err != nil {
589589
vmErrors = append(vmErrors, errors.Wrap(err, "unable to compute lifetime"))
590590
}
591+
} else {
592+
// Missing lifetime tag, use the default lifetime.
593+
// This is not an error, but a fallback to ensure the VM has a lifetime
594+
// even if the tag is not set to avoid GCing it right away.
595+
lifetime = vm.DefaultLifetime
591596
}
592597

593598
privateIP := i.getPrivateIPAddress()

pkg/roachprod/vm/ibm/provider.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -545,8 +545,16 @@ func (p *Provider) DeleteCluster(l *logger.Logger, name string) error {
545545

546546
svc := p.getGlobalSearchService()
547547

548-
// Get the resources with the cluster name tag.
549-
query := fmt.Sprintf(`tags:"%s:true" AND "%s:%s"`, vm.TagRoachprod, vm.TagCluster, name)
548+
// The IBM API sometimes silently fails on the tagging request and resources
549+
// end up being created without any tags.
550+
// The query below will look for resources that are properly tagged with the
551+
// roachprod and cluster tags, but will also fallback to searching via the
552+
// instance name to ensure all instances are properly deleted.
553+
query := fmt.Sprintf(
554+
`(tags:"%s:true AND %s:%s") OR (NOT (tags:"%s:true") AND name:%s-*)`,
555+
vm.TagRoachprod, vm.TagCluster, name,
556+
vm.TagRoachprod, name,
557+
)
550558
searchOptions := svc.NewSearchOptions().SetLimit(defaultPaginationLimit).SetQuery(query)
551559

552560
instances := make([]*instance, 0)

pkg/roachprod/vm/vm.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ const (
4343
ArchFIPS = CPUArch("fips")
4444
ArchS390x = CPUArch("s390x")
4545
ArchUnknown = CPUArch("unknown")
46+
47+
DefaultLifetime = 12 * time.Hour
4648
)
4749

4850
// UnimplementedError is returned when a method is not implemented by a
@@ -308,7 +310,7 @@ type CreateOpts struct {
308310
func DefaultCreateOpts() CreateOpts {
309311
defaultCreateOpts := CreateOpts{
310312
ClusterName: "",
311-
Lifetime: 12 * time.Hour,
313+
Lifetime: DefaultLifetime,
312314
GeoDistributed: false,
313315
VMProviders: []string{},
314316
OsVolumeSize: 10,

0 commit comments

Comments
 (0)