Skip to content

Commit 939ec42

Browse files
amacaskillsunnylovestiramisu
authored andcommitted
fix bug where volume cloning topology requirements are ignored when chosing the location of the volume
1 parent fe62f32 commit 939ec42

File tree

3 files changed

+1023
-52
lines changed

3 files changed

+1023
-52
lines changed

pkg/gce-pd-csi-driver/controller.go

Lines changed: 127 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"math/rand"
2222
"regexp"
2323
"sort"
24+
"strings"
2425
"time"
2526

2627
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
@@ -33,6 +34,7 @@ import (
3334
"k8s.io/apimachinery/pkg/util/uuid"
3435
"k8s.io/client-go/util/flowcontrol"
3536
"k8s.io/klog/v2"
37+
"k8s.io/utils/strings/slices"
3638

3739
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
3840
gce "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/compute"
@@ -101,6 +103,14 @@ type workItem struct {
101103
unpublishReq *csi.ControllerUnpublishVolumeRequest
102104
}
103105

106+
// locationRequirements are additional location topology requirements that must be respected when creating a volume.
107+
type locationRequirements struct {
108+
srcVolRegion string
109+
srcVolZone string
110+
srcReplicationType string
111+
cloneReplicationType string
112+
}
113+
104114
var _ csi.ControllerServer = &GCEControllerServer{}
105115

106116
const (
@@ -145,6 +155,44 @@ func isDiskReady(disk *gce.CloudDisk) (bool, error) {
145155
return false, nil
146156
}
147157

158+
// cloningLocationRequirements returns additional location requirements to be applied to the given create volume requests topology.
159+
// If the CreateVolumeRequest will use volume cloning, location requirements in compliance with the volume cloning limitations
160+
// will be returned: https://cloud.google.com/kubernetes-engine/docs/how-to/persistent-volumes/volume-cloning#limitations.
161+
func cloningLocationRequirements(req *csi.CreateVolumeRequest, cloneReplicationType string) (*locationRequirements, error) {
162+
if !useVolumeCloning(req) {
163+
return nil, nil
164+
}
165+
// If we are using volume cloning, this will be set.
166+
volSrc := req.VolumeContentSource.GetVolume()
167+
volSrcVolID := volSrc.GetVolumeId()
168+
169+
_, sourceVolKey, err := common.VolumeIDToKey(volSrcVolID)
170+
if err != nil {
171+
return nil, fmt.Errorf("volume ID is invalid: %w", err)
172+
}
173+
174+
isZonalSrcVol := sourceVolKey.Type() == meta.Zonal
175+
if isZonalSrcVol {
176+
region, err := common.GetRegionFromZones([]string{sourceVolKey.Zone})
177+
if err != nil {
178+
return nil, fmt.Errorf("failed to get region from zones: %w", err)
179+
}
180+
sourceVolKey.Region = region
181+
}
182+
183+
srcReplicationType := replicationTypeNone
184+
if !isZonalSrcVol {
185+
srcReplicationType = replicationTypeRegionalPD
186+
}
187+
188+
return &locationRequirements{srcVolZone: sourceVolKey.Zone, srcVolRegion: sourceVolKey.Region, srcReplicationType: srcReplicationType, cloneReplicationType: cloneReplicationType}, nil
189+
}
190+
191+
// useVolumeCloning returns true if the create volume request should be created with volume cloning.
192+
func useVolumeCloning(req *csi.CreateVolumeRequest) bool {
193+
return req.VolumeContentSource != nil && req.VolumeContentSource.GetVolume() != nil
194+
}
195+
148196
func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error) {
149197
var err error
150198
diskTypeForMetric := ""
@@ -187,12 +235,21 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
187235
if multiWriter {
188236
gceAPIVersion = gce.GCEAPIVersionBeta
189237
}
238+
239+
var locationTopReq *locationRequirements
240+
if useVolumeCloning(req) {
241+
locationTopReq, err = cloningLocationRequirements(req, params.ReplicationType)
242+
if err != nil {
243+
return nil, status.Errorf(codes.InvalidArgument, "failed to get location requirements: %v", err.Error())
244+
}
245+
}
246+
190247
// Determine the zone or zones+region of the disk
191248
var zones []string
192249
var volKey *meta.Key
193250
switch params.ReplicationType {
194251
case replicationTypeNone:
195-
zones, err = pickZones(ctx, gceCS, req.GetAccessibilityRequirements(), 1)
252+
zones, err = pickZones(ctx, gceCS, req.GetAccessibilityRequirements(), 1, locationTopReq)
196253
if err != nil {
197254
return nil, status.Errorf(codes.InvalidArgument, "CreateVolume failed to pick zones for disk: %v", err.Error())
198255
}
@@ -202,7 +259,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
202259
volKey = meta.ZonalKey(name, zones[0])
203260

204261
case replicationTypeRegionalPD:
205-
zones, err = pickZones(ctx, gceCS, req.GetAccessibilityRequirements(), 2)
262+
zones, err = pickZones(ctx, gceCS, req.GetAccessibilityRequirements(), 2, locationTopReq)
206263
if err != nil {
207264
return nil, status.Errorf(codes.InvalidArgument, "CreateVolume failed to pick zones for disk: %v", err.Error())
208265
}
@@ -1436,7 +1493,29 @@ func diskIsAttachedAndCompatible(deviceName string, instance *compute.Instance,
14361493
return false, nil
14371494
}
14381495

1439-
func pickZonesFromTopology(top *csi.TopologyRequirement, numZones int) ([]string, error) {
1496+
// pickZonesInRegion will remove any zones that are not in the given region.
1497+
func pickZonesInRegion(region string, zones []string) []string {
1498+
refinedZones := []string{}
1499+
for _, zone := range zones {
1500+
if strings.Contains(zone, region) {
1501+
refinedZones = append(refinedZones, zone)
1502+
}
1503+
}
1504+
return refinedZones
1505+
}
1506+
1507+
func prependZone(zone string, zones []string) []string {
1508+
newZones := []string{zone}
1509+
for i := 0; i < len(zones); i++ {
1510+
// Do not add a zone if it is equal to the zone that is already prepended to newZones.
1511+
if zones[i] != zone {
1512+
newZones = append(newZones, zones[i])
1513+
}
1514+
}
1515+
return newZones
1516+
}
1517+
1518+
func pickZonesFromTopology(top *csi.TopologyRequirement, numZones int, locationTopReq *locationRequirements) ([]string, error) {
14401519
reqZones, err := getZonesFromTopology(top.GetRequisite())
14411520
if err != nil {
14421521
return nil, fmt.Errorf("could not get zones from requisite topology: %w", err)
@@ -1446,6 +1525,39 @@ func pickZonesFromTopology(top *csi.TopologyRequirement, numZones int) ([]string
14461525
return nil, fmt.Errorf("could not get zones from preferred topology: %w", err)
14471526
}
14481527

1528+
if locationTopReq != nil {
1529+
srcVolZone := locationTopReq.srcVolZone
1530+
switch locationTopReq.cloneReplicationType {
1531+
// For zonal -> zonal cloning, the source disk zone must match the destination disk zone.
1532+
case replicationTypeNone:
1533+
// If the source volume zone is not in the topology requirement, we return an error.
1534+
if !slices.Contains(prefZones, srcVolZone) && !slices.Contains(reqZones, srcVolZone) {
1535+
volumeCloningReq := fmt.Sprintf("clone zone must match source disk zone: %s", srcVolZone)
1536+
return nil, fmt.Errorf("failed to find zone from topology %v: %s", top, volumeCloningReq)
1537+
}
1538+
return []string{srcVolZone}, nil
1539+
// For zonal or regional -> regional disk cloning, the source disk region must match the destination disk region.
1540+
case replicationTypeRegionalPD:
1541+
srcVolRegion := locationTopReq.srcVolRegion
1542+
prefZones = pickZonesInRegion(srcVolRegion, prefZones)
1543+
reqZones = pickZonesInRegion(srcVolRegion, reqZones)
1544+
1545+
if len(prefZones) == 0 && len(reqZones) == 0 {
1546+
volumeCloningReq := fmt.Sprintf("clone zone must reside in source disk region %s", srcVolRegion)
1547+
return nil, fmt.Errorf("failed to find zone from topology %v: %s", top, volumeCloningReq)
1548+
}
1549+
1550+
// For zonal -> regional disk cloning, one of the replicated zones must match the source zone.
1551+
if locationTopReq.srcReplicationType == replicationTypeNone {
1552+
if !slices.Contains(prefZones, srcVolZone) && !slices.Contains(reqZones, srcVolZone) {
1553+
volumeCloningReq := fmt.Sprintf("one of the replica zones of the clone must match the source disk zone: %s", srcVolZone)
1554+
return nil, fmt.Errorf("failed to find zone from topology %v: %s", top, volumeCloningReq)
1555+
}
1556+
prefZones = prependZone(srcVolZone, prefZones)
1557+
}
1558+
}
1559+
}
1560+
14491561
if numZones <= len(prefZones) {
14501562
return prefZones[0:numZones], nil
14511563
} else {
@@ -1504,16 +1616,25 @@ func getZoneFromSegment(seg map[string]string) (string, error) {
15041616
return zone, nil
15051617
}
15061618

1507-
func pickZones(ctx context.Context, gceCS *GCEControllerServer, top *csi.TopologyRequirement, numZones int) ([]string, error) {
1619+
func pickZones(ctx context.Context, gceCS *GCEControllerServer, top *csi.TopologyRequirement, numZones int, locationTopReq *locationRequirements) ([]string, error) {
15081620
var zones []string
15091621
var err error
15101622
if top != nil {
1511-
zones, err = pickZonesFromTopology(top, numZones)
1623+
zones, err = pickZonesFromTopology(top, numZones, locationTopReq)
15121624
if err != nil {
15131625
return nil, fmt.Errorf("failed to pick zones from topology: %w", err)
15141626
}
15151627
} else {
1516-
zones, err = getDefaultZonesInRegion(ctx, gceCS, []string{gceCS.CloudProvider.GetDefaultZone()}, numZones)
1628+
existingZones := []string{gceCS.CloudProvider.GetDefaultZone()}
1629+
// We set existingZones to the source volume zone so that for zonal -> zonal cloning, the clone is provisioned
1630+
// in the same zone as the source volume, and for zonal -> regional, one of the replicated zones will always
1631+
// be the zone of the source volume. For regional -> regional cloning, the srcVolZone will not be set, so we
1632+
// just use the default zone.
1633+
if locationTopReq != nil && locationTopReq.srcReplicationType == replicationTypeNone {
1634+
existingZones = []string{locationTopReq.srcVolZone}
1635+
}
1636+
// If topology is nil, then the Immediate binding mode was used without setting allowedTopologies in the storageclass.
1637+
zones, err = getDefaultZonesInRegion(ctx, gceCS, existingZones, numZones)
15171638
if err != nil {
15181639
return nil, fmt.Errorf("failed to get default %v zones in region: %w", numZones, err)
15191640
}

0 commit comments

Comments
 (0)