Skip to content

Commit 4b27506

Browse files
authored
feat(cluster): add support for availability zones (#241)
1 parent ca3ceaa commit 4b27506

File tree

5 files changed

+419
-84
lines changed

5 files changed

+419
-84
lines changed

internal/provider/cluster/cluster.go

Lines changed: 119 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package cluster
33
import (
44
"context"
55
"fmt"
6+
"slices"
67
"strconv"
78
"strings"
89
"time"
@@ -39,7 +40,7 @@ func ResourceCluster() *schema.Resource {
3940
Delete: schema.DefaultTimeout(clusterDeleteTimeout),
4041
},
4142

42-
SchemaVersion: 2,
43+
SchemaVersion: 3,
4344

4445
StateUpgraders: []schema.StateUpgrader{
4546
{
@@ -52,6 +53,11 @@ func ResourceCluster() *schema.Resource {
5253
Type: resourceClusterV1().CoreConfigSchema().ImpliedType(),
5354
Upgrade: resourceClusterUpgradeV1,
5455
},
56+
{
57+
Version: 2,
58+
Type: resourceClusterV2().CoreConfigSchema().ImpliedType(),
59+
Upgrade: resourceClusterUpgradeV2,
60+
},
5561
},
5662

5763
Schema: map[string]*schema.Schema{
@@ -189,6 +195,20 @@ func ResourceCluster() *schema.Resource {
189195
Computed: true,
190196
Type: schema.TypeInt,
191197
},
198+
"availability_zone_ids": {
199+
Description: "List of Availability Zone IDs for the cluster nodes (e.g., " +
200+
"'use1-az1', 'use1-az2', 'use1-az4' for AWS or 'us-central1-a', 'us-central1-b', " +
201+
"'us-central1-c' for GCP). It is recommended to specify exactly 3 AZ IDs to " +
202+
"ensure optimal distribution of nodes across availability zones. AZ IDs are " +
203+
"consistent identifiers that map to the same physical availability zone across " +
204+
"all accounts, unlike AZ names which may differ between accounts. If not " +
205+
"specified, the server will automatically select availability zones.",
206+
Optional: true,
207+
Computed: true,
208+
ForceNew: true,
209+
Type: schema.TypeSet,
210+
Elem: &schema.Schema{Type: schema.TypeString},
211+
},
192212
},
193213
}
194214
}
@@ -269,6 +289,37 @@ func resourceClusterCreate(ctx context.Context, d *schema.ResourceData, meta int
269289

270290
clusterCreateRequest.InstanceID = mi.ID
271291

292+
// Handle availability zone IDs
293+
if azIDs, ok := d.GetOk("availability_zone_ids"); ok {
294+
// Figure out the cloud account ID; it's either BYOA or Scylla Account.
295+
// There is a clear mapping from cloudProviderID to cloudAccountID.
296+
cloudAccountID := clusterCreateRequest.AccountCredentialID
297+
if cloudAccountID == 0 {
298+
switch cloudProvider.CloudProvider.ID {
299+
case 1: // AWS
300+
cloudAccountID = 1
301+
case 2: // GCP
302+
cloudAccountID = 200
303+
default:
304+
return diag.Errorf("unknown cloud provider ID %d", cloudProvider.CloudProvider.ID)
305+
}
306+
}
307+
308+
azIDsSet := azIDs.(*schema.Set)
309+
310+
var azIDList []string
311+
for _, v := range azIDsSet.List() {
312+
azIDList = append(azIDList, v.(string))
313+
}
314+
slices.Sort(azIDList)
315+
316+
if err := validateAvailabilityZoneIDs(ctx, scyllaClient, cloudAccountID, mr.ID, azIDList); err != nil {
317+
return diag.FromErr(err)
318+
}
319+
320+
clusterCreateRequest.AvailabilityZoneIDs = azIDList
321+
}
322+
272323
if !versionOK {
273324
clusterCreateRequest.ScyllaVersionID = scyllaClient.Meta.ScyllaVersions.DefaultScyllaVersionID
274325
} else if mv := scyllaClient.Meta.VersionByName(version.(string)); mv != nil {
@@ -291,6 +342,10 @@ func resourceClusterCreate(ctx context.Context, d *schema.ResourceData, meta int
291342
return diag.Errorf("failed to read cluster %d: %s", cr.ClusterID, err)
292343
}
293344

345+
if n := len(cluster.Datacenters); n != 1 {
346+
return diag.Errorf("clusters without datacenter or multi-datacenter clusters are not currently supported (found %d datacenters)", n)
347+
}
348+
294349
i := cloudProvider.InstanceByIDFromInstances(cluster.Datacenter.InstanceID, instances)
295350
if i == nil {
296351
return diag.Errorf("unexpected instance ID for cluster %d: %d", cluster.ID, cluster.Datacenter.InstanceID)
@@ -351,8 +406,8 @@ func resourceClusterRead(ctx context.Context, d *schema.ResourceData, meta inter
351406
return diag.Errorf("unexpected cloud provider %d for cluster %d", cluster.CloudProviderID, cluster.ID)
352407
}
353408

354-
if n := len(cluster.Datacenters); n > 1 {
355-
return diag.Errorf("multi-datacenter clusters are not currently supported (found %d datacenters)", n)
409+
if n := len(cluster.Datacenters); n != 1 {
410+
return diag.Errorf("clusters without datacenter or multi-datacenter clusters are not currently supported (found %d datacenters)", n)
356411
}
357412

358413
var instanceExternalID string
@@ -368,6 +423,7 @@ func resourceClusterRead(ctx context.Context, d *schema.ResourceData, meta inter
368423
}
369424
instanceExternalID = i.ExternalID
370425
}
426+
371427
err = setClusterKVs(d, cluster, p.CloudProvider.Name, instanceExternalID)
372428
if err != nil {
373429
return diag.Errorf("failed to set cluster values for cluster %d: %s", cluster.ID, err)
@@ -424,6 +480,13 @@ func setClusterKVs(d *schema.ResourceData, cluster *model.Cluster, providerName,
424480
_ = d.Set("node_disk_size", cluster.Instance.TotalStorage)
425481
}
426482

483+
azIDs := cluster.Datacenter.AvailabilityZoneIDs()
484+
if azIDs == nil {
485+
// Prevent stale data in case the new value is empty or missing.
486+
azIDs = []string{}
487+
}
488+
_ = d.Set("availability_zone_ids", azIDs)
489+
427490
return nil
428491
}
429492

@@ -479,8 +542,8 @@ func resourceClusterUpdate(ctx context.Context, d *schema.ResourceData, meta int
479542
return diag.Errorf("failed to get the cluster with ID %d: %s", clusterID, err)
480543
}
481544

482-
if n := len(cluster.Datacenters); n > 1 {
483-
return diag.Errorf("multi-datacenter clusters are not currently supported (found %d datacenters for cluster %d)", n, clusterID)
545+
if n := len(cluster.Datacenters); n != 1 {
546+
return diag.Errorf("clusters without datacenter or multi-datacenter clusters are not currently supported (found %d datacenters)", n)
484547
}
485548

486549
// Resize will fail if there is any ongoing cluster request.
@@ -653,3 +716,54 @@ func parseClusterID(d *schema.ResourceData) (int64, diag.Diagnostics) {
653716
}
654717
return clusterID, nil
655718
}
719+
720+
// validateAvailabilityZoneIDs validates that the provided AZ IDs are valid for the given region.
721+
// TODO: When placement groups are supported through the API, revisit the minimum AZ requirement
722+
// as single-AZ deployments may become valid with placement group configuration.
723+
func validateAvailabilityZoneIDs(ctx context.Context, c *scylla.Client, cloudAccountID, regionID int64, azIDs []string) error {
724+
if l := len(azIDs); l < 2 || l > 3 {
725+
return fmt.Errorf("at least 2 and at most 3 availability zone IDs are required, got %d", l)
726+
}
727+
728+
// Check for duplicate AZ IDs.
729+
seen := make(map[string]struct{}, len(azIDs))
730+
var duplicates []string
731+
for _, azID := range azIDs {
732+
if _, ok := seen[azID]; ok {
733+
duplicates = append(duplicates, azID)
734+
} else {
735+
seen[azID] = struct{}{}
736+
}
737+
}
738+
if len(duplicates) > 0 {
739+
return fmt.Errorf("duplicate availability zone IDs are not allowed: %v", duplicates)
740+
}
741+
742+
// Validate available AZ IDs.
743+
availableAZs, err := c.ListAvailabilityZoneIDs(ctx, cloudAccountID, regionID)
744+
if err != nil {
745+
return fmt.Errorf("failed to list availability zones for region: %w", err)
746+
}
747+
748+
availableSet := make(map[string]struct{}, len(availableAZs))
749+
for _, az := range availableAZs {
750+
availableSet[az] = struct{}{}
751+
}
752+
753+
var invalidAZs []string
754+
for _, azID := range azIDs {
755+
if _, ok := availableSet[azID]; !ok {
756+
invalidAZs = append(invalidAZs, azID)
757+
}
758+
}
759+
760+
if len(invalidAZs) > 0 {
761+
return fmt.Errorf(
762+
"invalid availability zone IDs %v; available AZ IDs for this region are: %v",
763+
invalidAZs,
764+
availableAZs,
765+
)
766+
}
767+
768+
return nil
769+
}
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
package cluster
2+
3+
import (
4+
"context"
5+
6+
"github.com/hashicorp/go-cty/cty"
7+
"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
8+
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
9+
)
10+
11+
// resourceClusterV2 returns the schema for cluster resource version 2.
12+
// This version does not include the availability_zone_ids field.
13+
func resourceClusterV2() *schema.Resource {
14+
return &schema.Resource{
15+
Schema: map[string]*schema.Schema{
16+
"cluster_id": {
17+
Computed: true,
18+
Type: schema.TypeInt,
19+
},
20+
"cloud": {
21+
Optional: true,
22+
ForceNew: true,
23+
Default: "AWS",
24+
Type: schema.TypeString,
25+
},
26+
"name": {
27+
Required: true,
28+
ForceNew: true,
29+
Type: schema.TypeString,
30+
},
31+
"region": {
32+
Required: true,
33+
ForceNew: true,
34+
Type: schema.TypeString,
35+
},
36+
"node_count": {
37+
Computed: true,
38+
Type: schema.TypeInt,
39+
},
40+
"min_nodes": {
41+
Required: true,
42+
Type: schema.TypeInt,
43+
ValidateDiagFunc: func(v interface{}, path cty.Path) diag.Diagnostics {
44+
value := v.(int)
45+
if value < 3 {
46+
return diag.Errorf("min_nodes must be at least 3, got %d", value)
47+
}
48+
if value%3 != 0 {
49+
return diag.Errorf("min_nodes must be divisible by 3, got %d", value)
50+
}
51+
return nil
52+
},
53+
},
54+
"byoa_id": {
55+
Optional: true,
56+
ForceNew: true,
57+
Type: schema.TypeInt,
58+
},
59+
"user_api_interface": {
60+
Optional: true,
61+
ForceNew: true,
62+
Type: schema.TypeString,
63+
Default: "CQL",
64+
},
65+
"alternator_write_isolation": {
66+
Optional: true,
67+
ForceNew: true,
68+
Type: schema.TypeString,
69+
Default: "only_rmw_uses_lwt",
70+
},
71+
"node_type": {
72+
Required: true,
73+
ForceNew: true,
74+
Type: schema.TypeString,
75+
},
76+
"node_dns_names": {
77+
Computed: true,
78+
Type: schema.TypeSet,
79+
Elem: schema.TypeString,
80+
Set: schema.HashString,
81+
},
82+
"node_private_ips": {
83+
Computed: true,
84+
Type: schema.TypeSet,
85+
Elem: schema.TypeString,
86+
Set: schema.HashString,
87+
},
88+
"cidr_block": {
89+
Optional: true,
90+
Computed: true,
91+
ForceNew: true,
92+
Type: schema.TypeString,
93+
},
94+
"scylla_version": {
95+
Optional: true,
96+
Computed: true,
97+
ForceNew: true,
98+
Type: schema.TypeString,
99+
},
100+
"enable_vpc_peering": {
101+
Optional: true,
102+
ForceNew: true,
103+
Type: schema.TypeBool,
104+
Default: true,
105+
},
106+
"enable_dns": {
107+
Optional: true,
108+
ForceNew: true,
109+
Type: schema.TypeBool,
110+
Default: true,
111+
},
112+
"request_id": {
113+
Computed: true,
114+
Type: schema.TypeInt,
115+
},
116+
"datacenter": {
117+
Computed: true,
118+
Type: schema.TypeString,
119+
},
120+
"status": {
121+
Computed: true,
122+
Type: schema.TypeString,
123+
},
124+
"node_disk_size": {
125+
ForceNew: true,
126+
Optional: true,
127+
Computed: true,
128+
Type: schema.TypeInt,
129+
},
130+
},
131+
}
132+
}
133+
134+
// resourceClusterUpgradeV2 migrates state from version 2 to version 3.
135+
// This migration adds the availability_zone_ids field which will be
136+
// populated on the next read from the server.
137+
func resourceClusterUpgradeV2(_ context.Context, rawState map[string]any, _ any) (map[string]any, error) {
138+
// availability_zone_ids is a new computed+optional field.
139+
// No migration needed - the field will be populated on the next read.
140+
return rawState, nil
141+
}

0 commit comments

Comments
 (0)