Skip to content

Commit 062f111

Browse files
committed
feat: populate SizeBytes in snapshots and add CSI maturity features
Populate SizeBytes in all CreateSnapshot and ListSnapshots responses using the source volume's capacity_bytes property, so VolumeSnapshot objects report restoreSize correctly. Add CSI capabilities and infrastructure that mature drivers implement: - Advertise VolumeExpansion ONLINE plugin capability - Advertise VOLUME_CONDITION controller capability for health monitoring - Advertise SINGLE_NODE_MULTI_WRITER on controller and node - Enable podInfoOnMount and storageCapacity in CSIDriver manifest - Add livenessprobe sidecar to controller and node pods - Enable --enable-capacity on csi-provisioner for capacity tracking - Map capacity-related TrueNAS errors to ResourceExhausted gRPC code
1 parent 3803b41 commit 062f111

16 files changed

+255
-17
lines changed

charts/tns-csi-driver/templates/controller.yaml

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,23 @@ spec:
7171
- name: DEBUG_CSI
7272
value: "true"
7373
{{- end }}
74-
{{- if .Values.controller.metrics.enabled }}
7574
ports:
75+
{{- if .Values.controller.metrics.enabled }}
7676
- name: metrics
7777
containerPort: {{ .Values.controller.metrics.port }}
7878
protocol: TCP
79-
{{- end }}
79+
{{- end }}
80+
- name: healthz
81+
containerPort: 9808
82+
protocol: TCP
83+
livenessProbe:
84+
httpGet:
85+
path: /healthz
86+
port: healthz
87+
initialDelaySeconds: 10
88+
timeoutSeconds: 3
89+
periodSeconds: 10
90+
failureThreshold: 5
8091
volumeMounts:
8192
- name: socket-dir
8293
mountPath: /var/lib/csi/sockets/pluginproxy/
@@ -97,9 +108,19 @@ spec:
97108
- "--leader-election"
98109
- "--default-fstype=nfs"
99110
- "--extra-create-metadata"
111+
- "--enable-capacity"
112+
- "--capacity-ownerref-level=2"
100113
env:
101114
- name: ADDRESS
102115
value: /var/lib/csi/sockets/pluginproxy/csi.sock
116+
- name: NAMESPACE
117+
valueFrom:
118+
fieldRef:
119+
fieldPath: metadata.namespace
120+
- name: POD_NAME
121+
valueFrom:
122+
fieldRef:
123+
fieldPath: metadata.name
103124
volumeMounts:
104125
- name: socket-dir
105126
mountPath: /var/lib/csi/sockets/pluginproxy/
@@ -171,6 +192,22 @@ spec:
171192
{{- toYaml .Values.sidecars.snapshotter.resources | nindent 12 }}
172193
{{- end }}
173194

195+
# CSI Liveness Probe sidecar
196+
- name: liveness-probe
197+
image: "{{ .Values.sidecars.livenessprobe.image.repository }}:{{ .Values.sidecars.livenessprobe.image.tag }}"
198+
imagePullPolicy: {{ .Values.sidecars.livenessprobe.image.pullPolicy }}
199+
args:
200+
- "--csi-address=$(ADDRESS)"
201+
- "--health-port=9808"
202+
env:
203+
- name: ADDRESS
204+
value: /var/lib/csi/sockets/pluginproxy/csi.sock
205+
volumeMounts:
206+
- name: socket-dir
207+
mountPath: /var/lib/csi/sockets/pluginproxy/
208+
resources:
209+
{{- toYaml .Values.sidecars.livenessprobe.resources | nindent 12 }}
210+
174211
volumes:
175212
- name: socket-dir
176213
emptyDir: {}

charts/tns-csi-driver/templates/csidriver.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ metadata:
1010
{{- end }}
1111
spec:
1212
attachRequired: false
13-
podInfoOnMount: false
13+
podInfoOnMount: true
14+
storageCapacity: true
1415
fsGroupPolicy: File
1516
volumeLifecycleModes:
1617
- Persistent

charts/tns-csi-driver/templates/node.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,18 @@ spec:
7474
- name: DEBUG_CSI
7575
value: "true"
7676
{{- end }}
77+
ports:
78+
- name: healthz
79+
containerPort: 9808
80+
protocol: TCP
81+
livenessProbe:
82+
httpGet:
83+
path: /healthz
84+
port: healthz
85+
initialDelaySeconds: 10
86+
timeoutSeconds: 3
87+
periodSeconds: 10
88+
failureThreshold: 5
7789
securityContext:
7890
privileged: true
7991
capabilities:
@@ -123,6 +135,22 @@ spec:
123135
resources:
124136
{{- toYaml .Values.sidecars.nodeDriverRegistrar.resources | nindent 12 }}
125137

138+
# CSI Liveness Probe sidecar
139+
- name: liveness-probe
140+
image: "{{ .Values.sidecars.livenessprobe.image.repository }}:{{ .Values.sidecars.livenessprobe.image.tag }}"
141+
imagePullPolicy: {{ .Values.sidecars.livenessprobe.image.pullPolicy }}
142+
args:
143+
- "--csi-address=$(ADDRESS)"
144+
- "--health-port=9808"
145+
env:
146+
- name: ADDRESS
147+
value: /csi/csi.sock
148+
volumeMounts:
149+
- name: plugin-dir
150+
mountPath: /csi
151+
resources:
152+
{{- toYaml .Values.sidecars.livenessprobe.resources | nindent 12 }}
153+
126154
volumes:
127155
- name: plugin-dir
128156
hostPath:

charts/tns-csi-driver/values.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,19 @@ sidecars:
217217
cpu: 200m
218218
memory: 200Mi
219219

220+
livenessprobe:
221+
image:
222+
repository: registry.k8s.io/sig-storage/livenessprobe
223+
tag: v2.14.0
224+
pullPolicy: IfNotPresent
225+
resources:
226+
requests:
227+
cpu: 10m
228+
memory: 20Mi
229+
limits:
230+
cpu: 100m
231+
memory: 100Mi
232+
220233
# Storage class configuration
221234
# Each entry creates a Kubernetes StorageClass. You can have multiple classes
222235
# per protocol (e.g., two NFS classes with different reclaim policies).

pkg/driver/controller.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,41 @@ var (
5959
ErrDatasetNotFound = errors.New("dataset not found for share")
6060
)
6161

62+
// capacityErrorSubstrings are error message patterns that indicate insufficient pool capacity.
63+
// TrueNAS returns these when a pool or dataset doesn't have enough free space.
64+
var capacityErrorSubstrings = []string{
65+
"insufficient space",
66+
"out of space",
67+
"not enough space",
68+
"no space left",
69+
"ENOSPC",
70+
"quota exceeded",
71+
}
72+
73+
// isCapacityError checks if an error indicates a storage capacity issue.
74+
// Returns codes.ResourceExhausted status if it is, nil otherwise.
75+
func isCapacityError(err error) bool {
76+
if err == nil {
77+
return false
78+
}
79+
errStr := strings.ToLower(err.Error())
80+
for _, substr := range capacityErrorSubstrings {
81+
if strings.Contains(errStr, substr) {
82+
return true
83+
}
84+
}
85+
return false
86+
}
87+
88+
// createVolumeError returns an appropriate gRPC status error for volume creation failures.
89+
// Maps capacity-related errors to ResourceExhausted per CSI spec.
90+
func createVolumeError(msg string, err error) error {
91+
if isCapacityError(err) {
92+
return status.Errorf(codes.ResourceExhausted, "%s: %v", msg, err)
93+
}
94+
return status.Errorf(codes.Internal, "%s: %v", msg, err)
95+
}
96+
6297
// mountpointToDatasetID converts a ZFS mountpoint to a dataset ID.
6398
// ZFS datasets are mounted at /mnt/<dataset_name>, so we strip the /mnt/ prefix.
6499
// Example: /mnt/tank/csi/pvc-xxx -> tank/csi/pvc-xxx.
@@ -1509,6 +1544,20 @@ func (s *ControllerService) ControllerGetCapabilities(_ context.Context, _ *csi.
15091544
},
15101545
},
15111546
},
1547+
{
1548+
Type: &csi.ControllerServiceCapability_Rpc{
1549+
Rpc: &csi.ControllerServiceCapability_RPC{
1550+
Type: csi.ControllerServiceCapability_RPC_VOLUME_CONDITION,
1551+
},
1552+
},
1553+
},
1554+
{
1555+
Type: &csi.ControllerServiceCapability_Rpc{
1556+
Rpc: &csi.ControllerServiceCapability_RPC{
1557+
Type: csi.ControllerServiceCapability_RPC_SINGLE_NODE_MULTI_WRITER,
1558+
},
1559+
},
1560+
},
15121561
},
15131562
}, nil
15141563
}

pkg/driver/controller_iscsi.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ func (s *ControllerService) getOrCreateZVOLForISCSI(ctx context.Context, params
424424
zvol, err := s.apiClient.CreateZvol(ctx, createParams)
425425
if err != nil {
426426
timer.ObserveError()
427-
return nil, status.Errorf(codes.Internal, "Failed to create ZVOL %s: %v", params.zvolName, err)
427+
return nil, createVolumeError("Failed to create ZVOL "+params.zvolName, err)
428428
}
429429

430430
klog.V(4).Infof("Created ZVOL: %s (ID: %s)", params.zvolName, zvol.ID)

pkg/driver/controller_iscsi_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ func TestCreateISCSIVolume(t *testing.T) {
389389
}
390390
},
391391
wantErr: true,
392-
wantCode: codes.Internal,
392+
wantCode: codes.ResourceExhausted,
393393
},
394394
}
395395

pkg/driver/controller_nfs.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ func (s *ControllerService) getOrCreateDataset(ctx context.Context, params *nfsV
423423
dataset, err := s.apiClient.CreateDataset(ctx, createParams)
424424
if err != nil {
425425
timer.ObserveError()
426-
return nil, status.Errorf(codes.Internal, "Failed to create dataset: %v", err)
426+
return nil, createVolumeError("Failed to create dataset", err)
427427
}
428428

429429
klog.V(4).Infof("Created dataset: %s with mountpoint: %s", dataset.Name, dataset.Mountpoint)

pkg/driver/controller_nvmeof.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ func (s *ControllerService) getOrCreateZVOL(ctx context.Context, params *nvmeofV
620620
zvol, err := s.apiClient.CreateZvol(ctx, createParams)
621621
if err != nil {
622622
timer.ObserveError()
623-
return nil, status.Errorf(codes.Internal, "Failed to create ZVOL: %v", err)
623+
return nil, createVolumeError("Failed to create ZVOL", err)
624624
}
625625

626626
klog.V(4).Infof("Created ZVOL: %s (ID: %s)", zvol.Name, zvol.ID)

pkg/driver/controller_nvmeof_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ func TestCreateNVMeOFVolume(t *testing.T) {
313313
}
314314
},
315315
wantErr: true,
316-
wantCode: codes.Internal,
316+
wantCode: codes.ResourceExhausted,
317317
},
318318
{
319319
name: "subsystem creation failure with cleanup",

0 commit comments

Comments
 (0)