Skip to content

Commit d620e43

Browse files
authored
Merge pull request #1864 from CatherineF-dev/sharding-per-node
Sharding metrics per node via fieldSelector
2 parents 0f48343 + 43c6073 commit d620e43

File tree

11 files changed

+244
-21
lines changed

11 files changed

+244
-21
lines changed

README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ are deleted they are no longer visible on the `/metrics` endpoint.
4545
- [Resource recommendation](#resource-recommendation)
4646
- [Horizontal sharding](#horizontal-sharding)
4747
- [Automated sharding](#automated-sharding)
48+
- [Daemonset sharding for pod metrics](#daemonset-sharding-pod-metrics)
4849
- [Setup](#setup)
4950
- [Building the Docker container](#building-the-docker-container)
5051
- [Usage](#usage)
@@ -235,6 +236,36 @@ This way of deploying shards is useful when you want to manage KSM shards throug
235236

236237
The downside of using an auto-sharded setup comes from the rollout strategy supported by `StatefulSet`s. When managed by a `StatefulSet`, pods are replaced one at a time with each pod first getting terminated and then recreated. Besides such rollouts being slower, they will also lead to short downtime for each shard. If a Prometheus scrape happens during a rollout, it can miss some of the metrics exported by kube-state-metrics.
237238

239+
### Daemonset sharding for pod metrics
240+
241+
For pod metrics, they can be sharded per node with the following flag:
242+
* `--node`
243+
244+
Each kube-state-metrics pod uses FieldSelector (spec.nodeName) to watch/list pod metrics only on the same node.
245+
246+
A daemonset kube-state-metrics example:
247+
```
248+
apiVersion: apps/v1
249+
kind: DaemonSet
250+
spec:
251+
template:
252+
spec:
253+
containers:
254+
- image: registry.k8s.io/kube-state-metrics/kube-state-metrics:IMAGE_TAG
255+
name: kube-state-metrics
256+
args:
257+
- --resource=pods
258+
- --node=$(NODE_NAME)
259+
env:
260+
- name: NODE_NAME
261+
valueFrom:
262+
fieldRef:
263+
apiVersion: v1
264+
fieldPath: spec.nodeName
265+
```
266+
267+
Other metrics can be sharded via [Horizontal sharding](#horizontal-sharding).
268+
238269
### Setup
239270

240271
Install this project to your `$GOPATH` using `go get`:

docs/cli-arguments.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ Usage of ./kube-state-metrics:
4646
--metric-opt-in-list string Comma-separated list of metrics which are opt-in and not enabled by default. This is in addition to the metric allow- and denylists
4747
--namespaces string Comma-separated list of namespaces to be enabled. Defaults to ""
4848
--namespaces-denylist string Comma-separated list of namespaces not to be enabled. If namespaces and namespaces-denylist are both set, only namespaces that are excluded in namespaces-denylist will be used.
49+
--node string Name of the node that contains the kube-state-metrics pod. Most likely it should be passed via the downward API. This is used for daemonset sharding. Only available for resources (pod metrics) that support spec.nodeName fieldSelector. This is experimental.
4950
--one_output If true, only write logs to their native severity level (vs also writing to each lower severity level; no effect when -logtostderr=true)
5051
--pod string Name of the pod that contains the kube-state-metrics container. When set, it is expected that --pod and --pod-namespace are both set. Most likely this should be passed via the downward API. This is used for auto-detecting sharding. If set, this has preference over statically configured sharding. This is experimental, it may be removed without notice.
5152
--pod-namespace string Name of the namespace of the pod specified by --pod. When set, it is expected that --pod and --pod-namespace are both set. Most likely this should be passed via the downward API. This is used for auto-detecting sharding. If set, this has preference over statically configured sharding. This is experimental, it may be removed without notice.

internal/store/builder.go

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,12 @@ var _ ksmtypes.BuilderInterface = &Builder{}
5858
// Builder helps to build store. It follows the builder pattern
5959
// (https://en.wikipedia.org/wiki/Builder_pattern).
6060
type Builder struct {
61-
kubeClient clientset.Interface
62-
customResourceClients map[string]interface{}
63-
vpaClient vpaclientset.Interface
64-
namespaces options.NamespaceList
65-
namespaceFilter string
61+
kubeClient clientset.Interface
62+
customResourceClients map[string]interface{}
63+
vpaClient vpaclientset.Interface
64+
namespaces options.NamespaceList
65+
// namespaceFilter is inside fieldSelectorFilter
66+
fieldSelectorFilter string
6667
ctx context.Context
6768
enabledResources []string
6869
familyGeneratorFilter generator.FamilyGeneratorFilter
@@ -106,10 +107,19 @@ func (b *Builder) WithEnabledResources(r []string) error {
106107
return nil
107108
}
108109

110+
// WithFieldSelectorFilter sets the fieldSelector property of a Builder.
111+
func (b *Builder) WithFieldSelectorFilter(fieldSelectorFilter string) {
112+
b.fieldSelectorFilter = fieldSelectorFilter
113+
}
114+
109115
// WithNamespaces sets the namespaces property of a Builder.
110-
func (b *Builder) WithNamespaces(n options.NamespaceList, nsFilter string) {
116+
func (b *Builder) WithNamespaces(n options.NamespaceList) {
111117
b.namespaces = n
112-
b.namespaceFilter = nsFilter
118+
}
119+
120+
// MergeFieldSelectors merges multiple fieldSelectors using AND operator.
121+
func (b *Builder) MergeFieldSelectors(selectors []string) (string, error) {
122+
return options.MergeFieldSelectors(selectors)
113123
}
114124

115125
// WithSharding sets the shard and totalShards property of a Builder.
@@ -467,7 +477,10 @@ func (b *Builder) buildStores(
467477
familyHeaders,
468478
composedMetricGenFuncs,
469479
)
470-
listWatcher := listWatchFunc(b.kubeClient, v1.NamespaceAll, b.namespaceFilter)
480+
if b.fieldSelectorFilter != "" {
481+
klog.Infof("FieldSelector is used %s", b.fieldSelectorFilter)
482+
}
483+
listWatcher := listWatchFunc(b.kubeClient, v1.NamespaceAll, b.fieldSelectorFilter)
471484
b.startReflector(expectedType, store, listWatcher, useAPIServerCache)
472485
return []cache.Store{store}
473486
}
@@ -478,7 +491,10 @@ func (b *Builder) buildStores(
478491
familyHeaders,
479492
composedMetricGenFuncs,
480493
)
481-
listWatcher := listWatchFunc(b.kubeClient, ns, b.namespaceFilter)
494+
if b.fieldSelectorFilter != "" {
495+
klog.Infof("FieldSelector is used %s", b.fieldSelectorFilter)
496+
}
497+
listWatcher := listWatchFunc(b.kubeClient, ns, b.fieldSelectorFilter)
482498
b.startReflector(expectedType, store, listWatcher, useAPIServerCache)
483499
stores = append(stores, store)
484500
}
@@ -508,7 +524,10 @@ func (b *Builder) buildCustomResourceStores(resourceName string,
508524
familyHeaders,
509525
composedMetricGenFuncs,
510526
)
511-
listWatcher := listWatchFunc(customResourceClient, v1.NamespaceAll, b.namespaceFilter)
527+
if b.fieldSelectorFilter != "" {
528+
klog.Infof("FieldSelector is used %s", b.fieldSelectorFilter)
529+
}
530+
listWatcher := listWatchFunc(customResourceClient, v1.NamespaceAll, b.fieldSelectorFilter)
512531
b.startReflector(expectedType, store, listWatcher, useAPIServerCache)
513532
return []cache.Store{store}
514533
}
@@ -519,7 +538,8 @@ func (b *Builder) buildCustomResourceStores(resourceName string,
519538
familyHeaders,
520539
composedMetricGenFuncs,
521540
)
522-
listWatcher := listWatchFunc(customResourceClient, ns, b.namespaceFilter)
541+
klog.Infof("FieldSelector is used %s", b.fieldSelectorFilter)
542+
listWatcher := listWatchFunc(customResourceClient, ns, b.fieldSelectorFilter)
523543
b.startReflector(expectedType, store, listWatcher, useAPIServerCache)
524544
stores = append(stores, store)
525545
}

main.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ func main() {
5353
os.Exit(0)
5454
}
5555

56+
if err := opts.Validate(); err != nil {
57+
klog.ErrorS(err, "Validating options error")
58+
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
59+
}
60+
5661
var factories []customresource.RegistryFactory
5762
if config, set := resolveCustomResourceConfig(opts); set {
5863
crf, err := customresourcestate.FromConfig(config)

pkg/app/server.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,13 @@ func RunKubeStateMetrics(ctx context.Context, opts *options.Options, factories .
106106

107107
namespaces := opts.Namespaces.GetNamespaces()
108108
nsFieldSelector := namespaces.GetExcludeNSFieldSelector(opts.NamespacesDenylist)
109-
storeBuilder.WithNamespaces(namespaces, nsFieldSelector)
109+
nodeFieldSelector := opts.Node.GetNodeFieldSelector()
110+
merged, err := storeBuilder.MergeFieldSelectors([]string{nsFieldSelector, nodeFieldSelector})
111+
if err != nil {
112+
return err
113+
}
114+
storeBuilder.WithNamespaces(namespaces)
115+
storeBuilder.WithFieldSelectorFilter(merged)
110116

111117
allowDenyList, err := allowdenylist.New(opts.MetricAllowlist, opts.MetricDenylist)
112118
if err != nil {

pkg/app/server_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ func BenchmarkKubeStateMetrics(b *testing.B) {
8080
builder.WithKubeClient(kubeClient)
8181
builder.WithSharding(0, 1)
8282
builder.WithContext(ctx)
83-
builder.WithNamespaces(options.DefaultNamespaces, "")
83+
builder.WithNamespaces(options.DefaultNamespaces)
8484
builder.WithGenerateStoresFunc(builder.DefaultGenerateStoresFunc())
8585

8686
allowDenyListFilter, err := allowdenylist.New(map[string]struct{}{}, map[string]struct{}{})
@@ -155,7 +155,7 @@ func TestFullScrapeCycle(t *testing.T) {
155155
t.Fatal(err)
156156
}
157157
builder.WithKubeClient(kubeClient)
158-
builder.WithNamespaces(options.DefaultNamespaces, "")
158+
builder.WithNamespaces(options.DefaultNamespaces)
159159
builder.WithGenerateStoresFunc(builder.DefaultGenerateStoresFunc())
160160

161161
l, err := allowdenylist.New(map[string]struct{}{}, map[string]struct{}{})
@@ -442,7 +442,7 @@ func TestShardingEquivalenceScrapeCycle(t *testing.T) {
442442
t.Fatal(err)
443443
}
444444
unshardedBuilder.WithKubeClient(kubeClient)
445-
unshardedBuilder.WithNamespaces(options.DefaultNamespaces, "")
445+
unshardedBuilder.WithNamespaces(options.DefaultNamespaces)
446446
unshardedBuilder.WithFamilyGeneratorFilter(l)
447447
unshardedBuilder.WithAllowLabels(map[string][]string{})
448448
unshardedBuilder.WithGenerateStoresFunc(unshardedBuilder.DefaultGenerateStoresFunc())
@@ -458,7 +458,7 @@ func TestShardingEquivalenceScrapeCycle(t *testing.T) {
458458
t.Fatal(err)
459459
}
460460
shardedBuilder1.WithKubeClient(kubeClient)
461-
shardedBuilder1.WithNamespaces(options.DefaultNamespaces, "")
461+
shardedBuilder1.WithNamespaces(options.DefaultNamespaces)
462462
shardedBuilder1.WithFamilyGeneratorFilter(l)
463463
shardedBuilder1.WithAllowLabels(map[string][]string{})
464464
shardedBuilder1.WithGenerateStoresFunc(shardedBuilder1.DefaultGenerateStoresFunc())
@@ -474,7 +474,7 @@ func TestShardingEquivalenceScrapeCycle(t *testing.T) {
474474
t.Fatal(err)
475475
}
476476
shardedBuilder2.WithKubeClient(kubeClient)
477-
shardedBuilder2.WithNamespaces(options.DefaultNamespaces, "")
477+
shardedBuilder2.WithNamespaces(options.DefaultNamespaces)
478478
shardedBuilder2.WithFamilyGeneratorFilter(l)
479479
shardedBuilder2.WithAllowLabels(map[string][]string{})
480480
shardedBuilder2.WithGenerateStoresFunc(shardedBuilder2.DefaultGenerateStoresFunc())
@@ -616,7 +616,7 @@ func TestCustomResourceExtension(t *testing.T) {
616616

617617
builder.WithKubeClient(kubeClient)
618618
builder.WithCustomResourceClients(customResourceClients)
619-
builder.WithNamespaces(options.DefaultNamespaces, "")
619+
builder.WithNamespaces(options.DefaultNamespaces)
620620
builder.WithGenerateStoresFunc(builder.DefaultGenerateStoresFunc())
621621
builder.WithGenerateCustomResourceStoresFunc(builder.DefaultGenerateCustomResourceStoresFunc())
622622

pkg/builder/builder.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ func (b *Builder) WithEnabledResources(c []string) error {
5858
}
5959

6060
// WithNamespaces sets the namespaces property of a Builder.
61-
func (b *Builder) WithNamespaces(n options.NamespaceList, nsFilter string) {
62-
b.internal.WithNamespaces(n, nsFilter)
61+
func (b *Builder) WithNamespaces(n options.NamespaceList) {
62+
b.internal.WithNamespaces(n)
6363
}
6464

6565
// WithSharding sets the shard and totalShards property of a Builder.

pkg/builder/types/interfaces.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ import (
3535
type BuilderInterface interface {
3636
WithMetrics(r prometheus.Registerer)
3737
WithEnabledResources(c []string) error
38-
WithNamespaces(n options.NamespaceList, nsFilter string)
38+
WithNamespaces(n options.NamespaceList)
39+
WithFieldSelectorFilter(fieldSelectors string)
3940
WithSharding(shard int32, totalShards int)
4041
WithContext(ctx context.Context)
4142
WithKubeClient(c clientset.Interface)

pkg/options/options.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ type Options struct {
3939
Resources ResourceSet
4040
Namespaces NamespaceList
4141
NamespacesDenylist NamespaceList
42+
Node NodeType
4243
Shard int32
4344
TotalShards int
4445
Pod string
@@ -111,6 +112,8 @@ func (o *Options) AddFlags() {
111112
o.flags.Int32Var(&o.Shard, "shard", int32(0), "The instances shard nominal (zero indexed) within the total number of shards. (default 0)")
112113
o.flags.IntVar(&o.TotalShards, "total-shards", 1, "The total number of shards. Sharding is disabled when total shards is set to 1.")
113114

115+
o.flags.StringVar((*string)(&o.Node), "node", "", "Name of the node that contains the kube-state-metrics pod. Most likely it should be passed via the downward API. This is used for daemonset sharding. Only available for resources (pod metrics) that support spec.nodeName fieldSelector. This is experimental.")
116+
114117
autoshardingNotice := "When set, it is expected that --pod and --pod-namespace are both set. Most likely this should be passed via the downward API. This is used for auto-detecting sharding. If set, this has preference over statically configured sharding. This is experimental, it may be removed without notice."
115118

116119
o.flags.StringVar(&o.Pod, "pod", "", "Name of the pod that contains the kube-state-metrics container. "+autoshardingNotice)
@@ -132,3 +135,17 @@ func (o *Options) Parse() error {
132135
func (o *Options) Usage() {
133136
o.flags.Usage()
134137
}
138+
139+
// Validate validates arguments
140+
func (o *Options) Validate() error {
141+
shardableResource := "pods"
142+
if o.Node == "" {
143+
return nil
144+
}
145+
for _, x := range o.Resources.AsSlice() {
146+
if x != shardableResource {
147+
return fmt.Errorf("Resource %s can't be sharded by field selector spec.nodeName", x)
148+
}
149+
}
150+
return nil
151+
}

pkg/options/types.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,54 @@ func (r *ResourceSet) Type() string {
104104
return "string"
105105
}
106106

107+
// NodeType represents a nodeName to query from.
108+
type NodeType string
109+
110+
// GetNodeFieldSelector returns a nodename field selector.
111+
func (n *NodeType) GetNodeFieldSelector() string {
112+
if string(*n) != "" {
113+
return fields.OneTermEqualSelector("spec.nodeName", string(*n)).String()
114+
}
115+
return EmptyFieldSelector()
116+
}
117+
118+
// EmptyFieldSelector returns an empty field selector.
119+
func EmptyFieldSelector() string {
120+
return fields.Nothing().String()
121+
}
122+
123+
// MergeFieldSelectors returns AND of a list of field selectors.
124+
func MergeFieldSelectors(selectors []string) (string, error) {
125+
var err error
126+
merged := EmptyFieldSelector()
127+
for _, s := range selectors {
128+
merged, err = MergeTwoFieldSelectors(merged, s)
129+
if err != nil {
130+
return "", err
131+
}
132+
}
133+
return merged, nil
134+
}
135+
136+
// MergeTwoFieldSelectors returns AND of two field selectors.
137+
func MergeTwoFieldSelectors(s1 string, s2 string) (string, error) {
138+
selector1, err := fields.ParseSelector(s1)
139+
if err != nil {
140+
return EmptyFieldSelector(), err
141+
}
142+
selector2, err := fields.ParseSelector(s2)
143+
if err != nil {
144+
return EmptyFieldSelector(), err
145+
}
146+
if selector1.Empty() {
147+
return selector2.String(), nil
148+
}
149+
if selector2.Empty() {
150+
return selector1.String(), nil
151+
}
152+
return fields.AndSelectors(selector1, selector2).String(), nil
153+
}
154+
107155
// NamespaceList represents a list of namespaces to query from.
108156
type NamespaceList []string
109157

0 commit comments

Comments
 (0)