Skip to content

Commit b13fa8f

Browse files
alvaroalemankubermatic-bot
authored andcommitted
Expose an instance count metric (#444)
* Extend the cloud provider interface with GetMetricsForMachines * Basic plumbing for SetInstanceNumberForMachines * Implement SetInstanceNumberForMachines for AWS * Fix linting * Use prometheus.Default{Registry,Gatherer} to allow providers to register individual metrics * Change interface * Properly implement SetMetricsForMachines * Move polling of SetMetricsForMachines into metrics.go * Call SetMetricsForMachines for all providers individually * Make aws metrics for instance be able to cope with multiple credentials * Rebase fixups * Do not throw an error when SetMetricsForMachines has no implementation
1 parent c617a2a commit b13fa8f

File tree

13 files changed

+205
-21
lines changed

13 files changed

+205
-21
lines changed

cmd/controller/main.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ func main() {
225225
glog.Fatalf("error building kubernetes clientset for leaderElectionClient: %v", err)
226226
}
227227

228-
prometheusRegistry := prometheus.NewRegistry()
228+
prometheusRegistry := prometheus.DefaultRegisterer
229229

230230
// before we acquire a lock we actually warm up caches mirroring the state of the API server
231231
clusterInformerFactory := clusterinformers.NewFilteredSharedInformerFactory(machineClient, time.Minute*15, metav1.NamespaceAll, labelSelector(name))
@@ -281,14 +281,12 @@ func main() {
281281
ctx, ctxDone := context.WithCancel(context.Background())
282282
var g run.Group
283283
{
284-
prometheusRegistry.MustRegister(prometheus.NewProcessCollector(os.Getpid(), ""))
285-
prometheusRegistry.MustRegister(prometheus.NewGoCollector())
286284
prometheusRegistry.MustRegister(machinecontroller.NewMachineCollector(
287285
clusterInformerFactory.Cluster().V1alpha1().Machines().Lister(),
288286
kubeClient,
289287
))
290288

291-
s := createUtilHTTPServer(kubeClient, kubeconfigProvider, prometheusRegistry)
289+
s := createUtilHTTPServer(kubeClient, kubeconfigProvider, prometheus.DefaultGatherer)
292290
g.Add(func() error {
293291
return s.ListenAndServe()
294292
}, func(err error) {

pkg/cloudprovider/cloud/provider.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,16 @@ type Provider interface {
4545
// about created machines, e.g. instance type, instance size, region
4646
// or whatever the provider deems interesting. Should always return
4747
// a "size" label.
48+
// This should not do any api calls to the cloud provider
4849
MachineMetricsLabels(machine *clusterv1alpha1.Machine) (map[string]string, error)
4950

5051
// MigrateUID is called when the controller migrates types and the UID of the machine object changes
5152
// All cloud providers that use Machine.UID to uniquely identify resources must implement this
5253
MigrateUID(machine *clusterv1alpha1.Machine, new types.UID) error
54+
55+
// SetMetricsForMachines allows providers to provide provider-specific metrics. This may be implemented
56+
// as no-op
57+
SetMetricsForMachines(machines clusterv1alpha1.MachineList) error
5358
}
5459

5560
// MachineUpdater defines a function to persist an update to a machine

pkg/cloudprovider/provider/aws/provider.go

Lines changed: 104 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,16 @@ import (
88
"sync"
99
"time"
1010

11+
"github.com/aws/aws-sdk-go/aws"
12+
"github.com/aws/aws-sdk-go/aws/awserr"
13+
"github.com/aws/aws-sdk-go/aws/credentials"
14+
"github.com/aws/aws-sdk-go/aws/session"
15+
"github.com/aws/aws-sdk-go/service/ec2"
16+
"github.com/aws/aws-sdk-go/service/iam"
17+
"github.com/golang/glog"
18+
gocache "github.com/patrickmn/go-cache"
19+
"github.com/prometheus/client_golang/prometheus"
20+
1121
"github.com/kubermatic/machine-controller/pkg/cloudprovider/cloud"
1222
cloudprovidererrors "github.com/kubermatic/machine-controller/pkg/cloudprovider/errors"
1323
"github.com/kubermatic/machine-controller/pkg/cloudprovider/instance"
@@ -18,19 +28,23 @@ import (
1828
"k8s.io/apimachinery/pkg/types"
1929
"k8s.io/apimachinery/pkg/util/sets"
2030

21-
"github.com/aws/aws-sdk-go/aws"
22-
"github.com/aws/aws-sdk-go/aws/awserr"
23-
"github.com/aws/aws-sdk-go/aws/credentials"
24-
"github.com/aws/aws-sdk-go/aws/session"
25-
"github.com/aws/aws-sdk-go/service/ec2"
26-
"github.com/aws/aws-sdk-go/service/iam"
27-
"github.com/golang/glog"
28-
gocache "github.com/patrickmn/go-cache"
29-
3031
"sigs.k8s.io/cluster-api/pkg/apis/cluster/common"
3132
"sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1"
3233
)
3334

35+
var (
36+
prometheusRegisterer = &sync.Once{}
37+
metricInstancesForMachines = prometheus.NewGaugeVec(prometheus.GaugeOpts{
38+
Name: "machine_controller_aws_instances_for_machine",
39+
Help: "The number of instances at aws for a given machine"}, []string{"machine"})
40+
)
41+
42+
func init() {
43+
prometheusRegisterer.Do(func() {
44+
prometheus.MustRegister(metricInstancesForMachines)
45+
})
46+
}
47+
3448
type provider struct {
3549
configVarResolver *providerconfig.ConfigVarResolver
3650
}
@@ -766,3 +780,84 @@ func setProviderSpec(rawConfig RawConfig, s v1alpha1.ProviderSpec) (*runtime.Raw
766780

767781
return &runtime.RawExtension{Raw: rawPconfig}, nil
768782
}
783+
784+
func (p *provider) SetMetricsForMachines(machines v1alpha1.MachineList) error {
785+
if len(machines.Items) < 1 {
786+
return nil
787+
}
788+
789+
type ec2Credentials struct {
790+
acccessKeyID string
791+
secretAccessKey string
792+
region string
793+
}
794+
795+
var errors []error
796+
credentials := map[string]ec2Credentials{}
797+
for _, machine := range machines.Items {
798+
config, _, _, err := p.getConfig(machines.Items[0].Spec.ProviderSpec)
799+
if err != nil {
800+
errors = append(errors, fmt.Errorf("failed to parse MachineSpec of machine %s/%s, due to %v", machine.Namespace, machine.Name, err))
801+
continue
802+
}
803+
804+
// Very simple and very stupid
805+
credentials[fmt.Sprintf("%s/%s/%s", config.AccessKeyID, config.SecretAccessKey, config.Region)] = ec2Credentials{
806+
acccessKeyID: config.AccessKeyID,
807+
secretAccessKey: config.SecretAccessKey,
808+
region: config.Region,
809+
}
810+
811+
}
812+
813+
allReservations := []*ec2.Reservation{}
814+
for _, cred := range credentials {
815+
ec2Client, err := getEC2client(cred.acccessKeyID, cred.secretAccessKey, cred.region)
816+
if err != nil {
817+
errors = append(errors, fmt.Errorf("failed to get EC2 client: %v", err))
818+
continue
819+
}
820+
inOut, err := ec2Client.DescribeInstances(&ec2.DescribeInstancesInput{})
821+
if err != nil {
822+
errors = append(errors, fmt.Errorf("failed to get EC2 instances: %v", err))
823+
continue
824+
}
825+
allReservations = append(allReservations, inOut.Reservations...)
826+
}
827+
828+
for _, machine := range machines.Items {
829+
metricInstancesForMachines.WithLabelValues(fmt.Sprintf("%s/%s", machine.Namespace, machine.Name)).Set(
830+
getIntanceCountForMachine(machine, allReservations))
831+
}
832+
833+
if len(errors) > 0 {
834+
return fmt.Errorf("errors: %v", errors)
835+
}
836+
837+
return nil
838+
}
839+
840+
func getIntanceCountForMachine(machine v1alpha1.Machine, reservations []*ec2.Reservation) float64 {
841+
var count float64
842+
for _, reservation := range reservations {
843+
for _, i := range reservation.Instances {
844+
if i.State == nil ||
845+
i.State.Name == nil ||
846+
*i.State.Name == ec2.InstanceStateNameTerminated {
847+
continue
848+
}
849+
850+
for _, tag := range i.Tags {
851+
if *tag.Key != machineUIDTag {
852+
continue
853+
}
854+
855+
if *tag.Value == string(machine.UID) {
856+
count = count + 1
857+
}
858+
break
859+
}
860+
}
861+
}
862+
return count
863+
}

pkg/cloudprovider/provider/azure/provider.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,3 +819,7 @@ func (p *provider) MachineMetricsLabels(machine *v1alpha1.Machine) (map[string]s
819819

820820
return labels, err
821821
}
822+
823+
func (p *provider) SetMetricsForMachines(machines v1alpha1.MachineList) error {
824+
return nil
825+
}

pkg/cloudprovider/provider/digitalocean/provider.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,3 +495,7 @@ func doStatusAndErrToTerminalError(status int, err error) error {
495495
return err
496496
}
497497
}
498+
499+
func (p *provider) SetMetricsForMachines(machines v1alpha1.MachineList) error {
500+
return nil
501+
}

pkg/cloudprovider/provider/fake/provider.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,7 @@ func (p *provider) MigrateUID(machine *v1alpha1.Machine, new types.UID) error {
9191
func (p *provider) MachineMetricsLabels(machine *v1alpha1.Machine) (map[string]string, error) {
9292
return map[string]string{}, nil
9393
}
94+
95+
func (p *provider) SetMetricsForMachines(_ v1alpha1.MachineList) error {
96+
return nil
97+
}

pkg/cloudprovider/provider/hetzner/provider.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,3 +401,7 @@ func hzErrorToTerminalError(err error, msg string) error {
401401

402402
return err
403403
}
404+
405+
func (p *provider) SetMetricsForMachines(machines v1alpha1.MachineList) error {
406+
return nil
407+
}

pkg/cloudprovider/provider/kubevirt/provider.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ import (
1515
kerrors "k8s.io/apimachinery/pkg/api/errors"
1616
"k8s.io/apimachinery/pkg/api/resource"
1717
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18+
"k8s.io/apimachinery/pkg/types"
1819
"k8s.io/client-go/rest"
1920
"k8s.io/client-go/tools/clientcmd"
2021

21-
"k8s.io/apimachinery/pkg/types"
2222
kubevirtv1 "kubevirt.io/kubevirt/pkg/api/v1"
2323
"kubevirt.io/kubevirt/pkg/kubecli"
2424

@@ -353,3 +353,7 @@ func parseResources(cpus, memory string) (*corev1.ResourceList, error) {
353353
corev1.ResourceCPU: cpuResource,
354354
}, nil
355355
}
356+
357+
func (p *provider) SetMetricsForMachines(machines v1alpha1.MachineList) error {
358+
return nil
359+
}

pkg/cloudprovider/provider/openstack/provider.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -861,3 +861,7 @@ func assignFloatingIPToInstance(machineUpdater cloud.MachineUpdater, machine *v1
861861
glog.V(2).Infof("Successfully assigned the FloatingIP %s to instance %s. Took %f seconds(without the recheck wait period %f seconds). ", ip.FloatingIP, instanceID, secondsTook, floatingReassignIPCheckPeriod.Seconds())
862862
return nil
863863
}
864+
865+
func (p *provider) SetMetricsForMachines(machines v1alpha1.MachineList) error {
866+
return nil
867+
}

pkg/cloudprovider/provider/vsphere/provider.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,3 +636,7 @@ func (p *provider) MachineMetricsLabels(machine *v1alpha1.Machine) (map[string]s
636636

637637
return labels, err
638638
}
639+
640+
func (p *provider) SetMetricsForMachines(machines v1alpha1.MachineList) error {
641+
return nil
642+
}

0 commit comments

Comments
 (0)