Skip to content

Commit 9ca3f24

Browse files
Gather Kubernetes data about pod volumes and add K8s context to filesystem metrics (#613)
* Gather Kubernetes data about pod volumes and add K8s context to filesystem metrics * Take into account block volumes as well * Filter pod volume metrics to only include PVC-backed volumes and exclude kube-system * Add RBAC permissions for PVCs and PVs in controller ClusterRole * Deduplicate filesystem metrics * Simplify FilesystemMetric and add in-tree volume source support * Add k8s_pod_volume_metrics to E2E test
1 parent aed57ba commit 9ca3f24

21 files changed

+1180
-94
lines changed

api/v1/kube/kube_api.pb.go

Lines changed: 324 additions & 28 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v1/kube/kube_api.proto

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ service KubeAPI {
1212
rpc GetPod(GetPodRequest) returns (GetPodResponse);
1313
rpc GetNode(GetNodeRequest) returns (GetNodeResponse);
1414
rpc GetNodeStatsSummary(GetNodeStatsSummaryRequest) returns (GetNodeStatsSummaryResponse);
15+
rpc GetPodVolumes(GetPodVolumesRequest) returns (GetPodVolumesResponse);
1516
}
1617

1718
message GetClusterInfoRequest {}
@@ -152,3 +153,31 @@ message RuntimeStats {
152153
FsStats image_fs = 2;
153154
FsStats container_fs = 3;
154155
}
156+
157+
message GetPodVolumesRequest {
158+
string node_name = 1;
159+
}
160+
161+
message GetPodVolumesResponse {
162+
repeated PodVolumeInfo volumes = 1;
163+
}
164+
165+
message PodVolumeInfo {
166+
string namespace = 1;
167+
string pod_name = 2;
168+
string pod_uid = 3;
169+
string controller_kind = 4;
170+
string controller_name = 5;
171+
string container_name = 6;
172+
string volume_name = 7;
173+
string mount_path = 8;
174+
string pvc_name = 9;
175+
string pvc_uid = 10;
176+
int64 requested_size_bytes = 11;
177+
string pv_name = 12;
178+
string storage_class = 13;
179+
string csi_driver = 14;
180+
string csi_volume_handle = 15;
181+
string volume_mode = 16; // "Filesystem" or "Block"
182+
string device_path = 17; // For block volumes: container's volumeDevices[].devicePath
183+
}

api/v1/kube/kube_api_grpc.pb.go

Lines changed: 48 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v1/runtime/common.pb.go

Lines changed: 5 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v1/runtime/runtime_agent_api.pb.go

Lines changed: 6 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v1/runtime/runtime_agent_api_grpc.pb.go

Lines changed: 12 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

charts/kvisor/templates/controller.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,8 @@ rules:
257257
- namespaces
258258
- services
259259
- endpoints
260+
- persistentvolumeclaims
261+
- persistentvolumes
260262
verbs:
261263
- get
262264
- list

cmd/agent/daemon/app/app.go

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ func (a *App) Run(ctx context.Context) error {
255255
var blockDeviceMetricsWriter pipeline.BlockDeviceMetricsWriter
256256
var filesystemMetricsWriter pipeline.FilesystemMetricsWriter
257257
var nodeStatsSummaryWriter pipeline.NodeStatsSummaryWriter
258+
var podVolumeMetricsWriter pipeline.K8sPodVolumeMetricsWriter
258259
var storageInfoProvider pipeline.StorageInfoProvider
259260
if cfg.Stats.StorageEnabled {
260261
metricsClient, err := createMetricsClient(cfg)
@@ -263,12 +264,12 @@ func (a *App) Run(ctx context.Context) error {
263264
}
264265

265266
go func() {
266-
if err = metricsClient.Start(ctx); err != nil {
267-
log.Warnf("metric client failed with:%v", err)
267+
if err := metricsClient.Start(ctx); err != nil {
268+
log.Warnf("metrics client failed: %v", err)
268269
}
269270
}()
270271

271-
blockDeviceMetricsWriter, filesystemMetricsWriter, nodeStatsSummaryWriter, err = setupStorageMetrics(metricsClient)
272+
blockDeviceMetricsWriter, filesystemMetricsWriter, nodeStatsSummaryWriter, podVolumeMetricsWriter, err = setupStorageMetrics(metricsClient)
272273
if err != nil {
273274
return fmt.Errorf("failed to setup storage metrics: %w", err)
274275
}
@@ -301,6 +302,7 @@ func (a *App) Run(ctx context.Context) error {
301302
filesystemMetricsWriter,
302303
storageInfoProvider,
303304
nodeStatsSummaryWriter,
305+
podVolumeMetricsWriter,
304306
)
305307

306308
for _, namespace := range cfg.MutedNamespaces {
@@ -569,23 +571,28 @@ func waitWithTimeout(errg *errgroup.Group, timeout time.Duration) error {
569571
}
570572
}
571573

572-
func setupStorageMetrics(metricsClient custommetrics.MetricClient) (pipeline.BlockDeviceMetricsWriter, pipeline.FilesystemMetricsWriter, pipeline.NodeStatsSummaryWriter, error) {
574+
func setupStorageMetrics(metricsClient custommetrics.MetricClient) (pipeline.BlockDeviceMetricsWriter, pipeline.FilesystemMetricsWriter, pipeline.NodeStatsSummaryWriter, pipeline.K8sPodVolumeMetricsWriter, error) {
573575
blockDeviceMetrics, err := pipeline.NewBlockDeviceMetricsWriter(metricsClient)
574576
if err != nil {
575-
return nil, nil, nil, fmt.Errorf("failed to create block device metrics writer: %w", err)
577+
return nil, nil, nil, nil, fmt.Errorf("failed to create block device metrics writer: %w", err)
576578
}
577579

578580
filesystemMetrics, err := pipeline.NewFilesystemMetricsWriter(metricsClient)
579581
if err != nil {
580-
return nil, nil, nil, fmt.Errorf("failed to create filesystem metrics writer: %w", err)
582+
return nil, nil, nil, nil, fmt.Errorf("failed to create filesystem metrics writer: %w", err)
581583
}
582584

583585
nodeStatsSummaryWriter, err := pipeline.NewNodeStatsSummaryWriter(metricsClient)
584586
if err != nil {
585-
return nil, nil, nil, fmt.Errorf("failed to create node storage stats summary writer: %w", err)
587+
return nil, nil, nil, nil, fmt.Errorf("failed to create node storage stats summary writer: %w", err)
586588
}
587589

588-
return blockDeviceMetrics, filesystemMetrics, nodeStatsSummaryWriter, nil
590+
podVolumeMetricsWriter, err := pipeline.NewK8sPodVolumeMetricsWriter(metricsClient)
591+
if err != nil {
592+
return nil, nil, nil, nil, fmt.Errorf("failed to create pod volume metrics writer: %w", err)
593+
}
594+
595+
return blockDeviceMetrics, filesystemMetrics, nodeStatsSummaryWriter, podVolumeMetricsWriter, nil
589596
}
590597

591598
// resolveMetricsAddr transforms kvisor.* addresses to telemetry.* addresses

0 commit comments

Comments
 (0)