Skip to content

Commit 86fee16

Browse files
authored
controller: fix handling for invalid node name (#20)
If the model PVC is already in a bound state but the node hosting it has been taken offline, the previous delete volume process would trust the node IP marked on the PV, but since the node no longer exists, gRPC request sent via that node IP would fail, and then preventing the PV from being deleted. This fix ensures that during each delete volume operation, we check the node status, if the node no longer exists, we return success directly, allowing the orphaned PV to be cleaned up properly. Signed-off-by: imeoer <yansong.ys@antgroup.com>
1 parent ec4c64c commit 86fee16

File tree

1 file changed

+19
-16
lines changed

1 file changed

+19
-16
lines changed

pkg/service/controller_remote.go

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"google.golang.org/grpc/keepalive"
1616
"google.golang.org/grpc/metadata"
1717
"google.golang.org/grpc/status"
18+
apierrors "k8s.io/apimachinery/pkg/api/errors"
1819

1920
"github.com/container-storage-interface/spec/lib/go/csi"
2021
"github.com/modelpack/model-csi-driver/pkg/logger"
@@ -139,24 +140,26 @@ func (s *Service) remoteDeleteVolume(
139140
parameters = map[string]string{}
140141
}
141142

142-
nodeIP := parameters[s.cfg.Get().ParameterVolumeContextNodeIP()]
143-
if nodeIP == "" {
144-
nodeName := parameters[annotationSelectedNode]
145-
if nodeName == "" {
146-
return nil, status.Errorf(codes.InvalidArgument, "empty annotation %s in PVC", annotationSelectedNode)
147-
}
148-
_, span := tracing.Tracer.Start(ctx, "GetNodeInfoByName")
149-
span.SetAttributes(attribute.String("node_name", nodeName))
150-
nodeInfo, err := s.getNodeInfoByName(ctx, nodeName)
151-
if err != nil {
152-
span.SetStatus(otelCodes.Error, "failed to get node info")
153-
span.RecordError(err)
154-
span.End()
155-
return nil, errors.Wrapf(err, "get node IP by name: %s", nodeName)
156-
}
143+
nodeName := parameters[annotationSelectedNode]
144+
if nodeName == "" {
145+
return nil, status.Errorf(codes.InvalidArgument, "empty annotation %s in PVC", annotationSelectedNode)
146+
}
147+
_, span := tracing.Tracer.Start(ctx, "GetNodeInfoByName")
148+
span.SetAttributes(attribute.String("node_name", nodeName))
149+
nodeInfo, err := s.getNodeInfoByName(ctx, nodeName)
150+
if err != nil {
151+
span.SetStatus(otelCodes.Error, "failed to get node info")
152+
span.RecordError(err)
157153
span.End()
158-
nodeIP = nodeInfo.ip
154+
// If node not found, we just return success to avoid orphaned volume.
155+
if apierrors.IsNotFound(err) {
156+
logger.WithContext(ctx).WithError(err).Warnf("node %s not found, return success for deleting volume", nodeName)
157+
return &csi.DeleteVolumeResponse{}, nil
158+
}
159+
return nil, errors.Wrapf(err, "get node IP by name: %s", nodeName)
159160
}
161+
span.End()
162+
nodeIP := nodeInfo.ip
160163

161164
volumeID := req.GetVolumeId()
162165
if volumeID == "" {

0 commit comments

Comments
 (0)